import qemu-kvm-8.2.0-6.el9

i9c-beta changed/i9c-beta/qemu-kvm-8.2.0-6.el9
MSVSphere Packaging Team 10 months ago
parent 83a513bac2
commit d2fffb6dfb

2
.gitignore vendored

@ -1 +1 @@
SOURCES/qemu-8.0.0.tar.xz
SOURCES/qemu-8.2.0.tar.xz

@ -1 +1 @@
17d54a85aa5d7f5dcfc619aa34049f9a91ceed0d SOURCES/qemu-8.0.0.tar.xz
1615e59b1bd68324e0819245fe003e33c14a52f9 SOURCES/qemu-8.2.0.tar.xz

@ -1,4 +1,4 @@
From 84039bfc860878f3c3421de4a1836ac5d6300ed7 Mon Sep 17 00:00:00 2001
From faae70a870156f86a5cf55ca967b15d7612941ff Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 26 May 2021 10:56:02 +0200
Subject: Initial redhat build
@ -13,7 +13,7 @@ several issues are fixed in QEMU tree:
We disable make check due to issues with some of the tests.
This rebase is based on qemu-kvm-7.2.0-14.el9
This rebase is based on qemu-kvm-8.1.0-5.el9
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
--
@ -50,32 +50,39 @@ Rebase changes (7.0.0):
- Change permissions on installing tests/Makefile.include
- Remove ssh block driver
Rebase changes (7.1.0 rc0):
Rebase changes (7.1.0):
- --disable-vnc-png renamed to --disable-png (upstream)
- removed --disable-vhost-vsock and --disable-vhost-scsi
- capstone submodule removed
- Temporary include capstone build
Rebase changes (7.2.0 rc0):
Rebase changes (7.2.0):
- Switch --enable-slirp=system to --enable-slirp
Rebaes changes (7.2.0 rc2):
- Added new configure options (blkio and sndio, both disabled)
Rebase changes (7.2.0):
- Fix SRPM name generation to work on Fedora 37
- Switch back to system meson
Rebase changes (8.0.0-rc1):
Rebase changes (8.0.0):
- use enable-dtrace-backands instead of enable-dtrace-backend
- Removed qemu virtiofsd bits
Rebase changes (8.0.0-rc2):
- test/check-block.sh removed (upstream)
Rebase changes (8.0.0-rc3):
- Add new --disable-* options for configure
Rebase changes (8.1.0):
- qmp-spec.txt installed by make
- Removed --meson configure option
- Add --disable-pypi
- Removed --with-git and -with-gitsubmodules
- Renamed --disable-pypi to --disable-downloads
- Minor updates in README.tests
Rebase changes (8.2.0):
- Removed --disable-hax (upstream)
- Added --disable-plugins configure option
- Fixing frh.py strings
Merged patches (6.0.0):
- 605758c902 Limit build on Power to qemu-img and qemu-ga only
@ -168,24 +175,35 @@ Merged patches (7.0.0):
- d46d2710b2 spec: Obsolete old usb redir subpackage
- 6f52a50b68 spec: Obsolete ssh driver
Merged patches (7.2.0 rc4):
Merged patches (7.2.0):
- 8c6834feb6 Remove opengl display device subpackages (C9S MR 124)
- 0ecc97f29e spec: Add requires for packages with additional virtio-gpu variants (C9S MR 124)
Merged patches (8.0.0-rc1):
Merged patches (8.0.0):
- 7754f6ba78 Minor packaging fixes
- 401af56187 spec: Disable VDUSE
Merged patches (8.1.0):
- 0c2306676f Enable Linux io_uring
- b7fa6426d5 Enable libblkio block drivers
- 19f6d7a6f4 Fix virtio-blk-vhost-vdpa typo in spec file
- f356cae88f spec: Build DBUS display
- 77b763efd5 Provide elf2dmp binary in qemu-tools
Merged patches (8.2.0):
- cd9efa221d Enable qemu-kvm-device-usb-redirec for aarch64
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
.distro/Makefile | 100 +
.distro/Makefile.common | 41 +
.distro/Makefile.common | 42 +
.distro/README.tests | 39 +
.distro/modules-load.conf | 4 +
.distro/qemu-guest-agent.service | 1 -
.distro/qemu-kvm.spec.template | 4528 +++++++++++++++++++++++
.distro/qemu-kvm.spec.template | 4909 +++++++++++++++++++++++
.distro/rpminspect.yaml | 6 +-
.distro/scripts/extract_build_cmd.py | 12 +
.distro/scripts/frh.py | 4 +-
.distro/scripts/process-patches.sh | 4 +
.gitignore | 1 +
README.systemtap | 43 +
@ -193,7 +211,7 @@ Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
scripts/systemtap/conf.d/qemu_kvm.conf | 4 +
scripts/systemtap/script.d/qemu_kvm.stp | 1 +
ui/vnc-auth-sasl.c | 2 +-
15 files changed, 4784 insertions(+), 4 deletions(-)
16 files changed, 5168 insertions(+), 6 deletions(-)
create mode 100644 .distro/Makefile
create mode 100644 .distro/Makefile.common
create mode 100644 .distro/README.tests
@ -296,5 +314,5 @@ index 47fdae5b21..2a950caa2a 100644
if (saslErr != SASL_OK) {
error_setg(errp, "Failed to initialize SASL auth: %s",
--
2.39.1
2.39.3

@ -1,4 +1,4 @@
From 63829772dbc2075fc014a9d52e3968735d228018 Mon Sep 17 00:00:00 2001
From 048067b4618ba1fa7c8c517185d4cd3a675eba72 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 7 Dec 2022 03:05:48 -0500
Subject: Enable/disable devices for RHEL
@ -22,21 +22,31 @@ Rebase notes (7.0.0):
- Renamed CONFIG_ARM_GIC_TCG to CONFIG_ARM_GICV3_TCG
- Removed upstream devices
Rebase notes (7.1.0 rc0):
Rebase notes (7.1.0):
- Added CONFIG_VHOST_VSOCK and CONFIG_VHOST_USER_VSOCK configs
- Added CONFIG_CXL and CONFIG_CXL_MEM_DEVICE for aarch64 and x86_64
Rebase notes (7.1.0 rc3):
- Added CONFIG_VHOST_USER_FS option (all archs)
Rebase notes (7.2.0 rc20):
Rebase notes (7.2.0):
- Removed disabling a15mpcore.c as no longer needed
Rebase notes (8.0.0-rc1):
Rebase notes (8.0.0):
- Rename CONFIG_ACPI_X86_ICH to CONFIG_ACPI_ICH9
- Inlude qemu/error-report.h in hw/display/cirrus_vga.c
- Change virtiofsd dependency version
Rebase notes (8.1.0):
- Added CONFIG_PCIE_PCI_BRIDGE for x86_64
- Disabling tcg cpus for aarch64
- Disable CONFIG_ARM_V7M and remove related hack
- Moved aarch64 tcg cpu disabling from arm machine type commit
Rebase notes (8.2.0):
- Disabled new a710 arm64 tcg cpu
- No longer needed hack for removal of i2c-echo
- Disable new neoverse-v2
- Removed CONFIG_OPENGL from x86_64 config file
Merged patches (6.1.0):
- c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak
- 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI
@ -53,32 +63,47 @@ Merged patches (7.0.0):
- fd7c45a5a8 redhat: Enable virtio-mem as tech-preview on x86-64
- c9e68ea451 Enable SGX -- RH Only
Merged patches (7.1.0 rc0):
Merged patches (7.1.0):
- 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/acpi/ich9.c chunk)
- 8f663466c6 configs/devices/aarch64-softmmu: Enable CONFIG_VIRTIO_MEM
- 1bf372717a Enable virtio-iommu-pci on aarch64
- ae3f269458 Enable virtio-iommu-pci on x86_64
Merged patches (8.1.0):
- 8173d2eaba Disable unwanted new devices
Merged patches (8.2.0):
- b29f66431f Enable igb on x86_64
---
.distro/qemu-kvm.spec.template | 18 +--
.../aarch64-softmmu/aarch64-rh-devices.mak | 41 +++++++
.../ppc64-softmmu/ppc64-rh-devices.mak | 37 ++++++
configs/devices/rh-virtio.mak | 10 ++
.../s390x-softmmu/s390x-rh-devices.mak | 18 +++
.../x86_64-softmmu/x86_64-rh-devices.mak | 109 ++++++++++++++++++
hw/arm/meson.build | 2 +-
.../x86_64-softmmu/x86_64-rh-devices.mak | 110 ++++++++++++++++++
hw/arm/virt.c | 2 +
hw/block/fdc.c | 10 ++
hw/cpu/meson.build | 3 +-
hw/display/cirrus_vga.c | 7 +-
hw/cxl/meson.build | 3 +-
hw/display/cirrus_vga.c | 4 +
hw/ide/piix.c | 5 +-
hw/ide/qdev.c | 9 ++
hw/input/pckbd.c | 2 +
hw/net/e1000.c | 2 +
hw/ppc/spapr_cpu_core.c | 2 +
hw/usb/meson.build | 2 +-
target/arm/cpu_tcg.c | 10 ++
hw/virtio/meson.build | 5 +-
target/arm/arm-qmp-cmds.c | 2 +
target/arm/cpu.c | 4 +
target/arm/cpu.h | 3 +
target/arm/cpu64.c | 12 +-
target/arm/tcg/cpu32.c | 2 +
target/arm/tcg/cpu64.c | 8 ++
target/ppc/cpu-models.c | 9 ++
target/s390x/cpu_models_sysemu.c | 3 +
target/s390x/kvm/kvm.c | 8 ++
19 files changed, 285 insertions(+), 13 deletions(-)
tests/qtest/arm-cpu-features.c | 4 +
28 files changed, 323 insertions(+), 15 deletions(-)
create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak
create mode 100644 configs/devices/rh-virtio.mak
@ -87,7 +112,7 @@ Merged patches (7.1.0 rc0):
diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
new file mode 100644
index 0000000000..720ec0cb57
index 0000000000..aec1831199
--- /dev/null
+++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
@@ -0,0 +1,41 @@
@ -97,12 +122,12 @@ index 0000000000..720ec0cb57
+CONFIG_ARM_GICV3_TCG=y
+CONFIG_ARM_GIC=y
+CONFIG_ARM_SMMUV3=y
+CONFIG_ARM_V7M=y
+CONFIG_ARM_VIRT=y
+CONFIG_CXL=y
+CONFIG_CXL_MEM_DEVICE=y
+CONFIG_EDID=y
+CONFIG_PCIE_PORT=y
+CONFIG_PCIE_PCI_BRIDGE=y
+CONFIG_PCI_DEVICES=y
+CONFIG_PCI_TESTDEV=y
+CONFIG_PFLASH_CFI01=y
@ -217,10 +242,10 @@ index 0000000000..69a799adbd
+CONFIG_VHOST_USER_FS=y
diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
new file mode 100644
index 0000000000..668b2d0e18
index 0000000000..ce5be73633
--- /dev/null
+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
@@ -0,0 +1,109 @@
@@ -0,0 +1,110 @@
+include ../rh-virtio.mak
+
+CONFIG_ACPI=y
@ -259,6 +284,7 @@ index 0000000000..668b2d0e18
+CONFIG_IDE_PCI=y
+CONFIG_IDE_PIIX=y
+CONFIG_IDE_QDEV=y
+CONFIG_IGB_PCI_EXPRESS=y
+CONFIG_IOAPIC=y
+CONFIG_IOH3420=y
+CONFIG_ISA_BUS=y
@ -268,7 +294,6 @@ index 0000000000..668b2d0e18
+CONFIG_MC146818RTC=y
+CONFIG_MEM_DEVICE=y
+CONFIG_NVDIMM=y
+CONFIG_OPENGL=y
+CONFIG_PAM=y
+CONFIG_PC=y
+CONFIG_PCI=y
@ -282,6 +307,7 @@ index 0000000000..668b2d0e18
+CONFIG_PCSPK=y
+CONFIG_PC_ACPI=y
+CONFIG_PC_PCI=y
+CONFIG_PCIE_PCI_BRIDGE=y
+CONFIG_PFLASH_CFI01=y
+CONFIG_PVPANIC_ISA=y
+CONFIG_PXB=y
@ -330,19 +356,26 @@ index 0000000000..668b2d0e18
+CONFIG_VHOST_VSOCK=y
+CONFIG_VHOST_USER_VSOCK=y
+CONFIG_VHOST_USER_FS=y
diff --git a/hw/arm/meson.build b/hw/arm/meson.build
index b545ba0e4f..a41a16cba7 100644
--- a/hw/arm/meson.build
+++ b/hw/arm/meson.build
@@ -29,7 +29,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c'))
arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c'))
arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c'))
-arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c'))
+#arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c'))
arm_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210.c'))
arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c'))
arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c'))
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index be2856c018..af9ea4dd1c 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -205,6 +205,7 @@ static const int a15irqmap[] = {
};
static const char *valid_cpus[] = {
+#if 0 /* Disabled for Red Hat Enterprise Linux */
#ifdef CONFIG_TCG
ARM_CPU_TYPE_NAME("cortex-a7"),
ARM_CPU_TYPE_NAME("cortex-a15"),
@@ -219,6 +220,7 @@ static const char *valid_cpus[] = {
ARM_CPU_TYPE_NAME("neoverse-n2"),
#endif
ARM_CPU_TYPE_NAME("cortex-a53"),
+#endif /* disabled for RHEL */
ARM_CPU_TYPE_NAME("cortex-a57"),
ARM_CPU_TYPE_NAME("host"),
ARM_CPU_TYPE_NAME("max"),
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index d7cc4d3ec1..12d0a60905 100644
--- a/hw/block/fdc.c
@ -372,18 +405,32 @@ index d7cc4d3ec1..12d0a60905 100644
error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'");
return;
diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build
index e37490074f..4431e3731c 100644
index 6d319947ca..91962fd863 100644
--- a/hw/cpu/meson.build
+++ b/hw/cpu/meson.build
@@ -1,4 +1,5 @@
-softmmu_ss.add(files('core.c', 'cluster.c'))
+#softmmu_ss.add(files('core.c', 'cluster.c'))
+softmmu_ss.add(files('core.c'))
-system_ss.add(files('core.c', 'cluster.c'))
+#system_ss.add(files('core.c', 'cluster.c'))
+system_ss.add(files('core.c'))
softmmu_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c'))
softmmu_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c'))
system_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c'))
system_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c'))
diff --git a/hw/cxl/meson.build b/hw/cxl/meson.build
index ea0aebf6e3..6878f06974 100644
--- a/hw/cxl/meson.build
+++ b/hw/cxl/meson.build
@@ -6,7 +6,8 @@ system_ss.add(when: 'CONFIG_CXL',
'cxl-host.c',
'cxl-cdat.c',
'cxl-events.c',
- 'switch-mailbox-cci.c',
+# Disabled for 8.2.0 rebase for RHEL 9.4.0
+# 'switch-mailbox-cci.c',
),
if_false: files(
'cxl-host-stubs.c',
diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c
index b80f98b6c4..cbde6a8f15 100644
index b80f98b6c4..0370cf8a64 100644
--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -36,6 +36,7 @@
@ -394,31 +441,21 @@ index b80f98b6c4..cbde6a8f15 100644
#include "sysemu/reset.h"
#include "qapi/error.h"
#include "trace.h"
@@ -47,6 +48,7 @@
#include "qom/object.h"
#include "ui/console.h"
+
/*
* TODO:
* - destination write mask support not complete (bits 5..7)
@@ -2946,7 +2948,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp)
@@ -2946,6 +2947,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp)
PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
int16_t device_id = pc->device_id;
- /*
+ warn_report("'cirrus-vga' is deprecated, "
+ "please use a different VGA card instead");
+ warn_report("'cirrus-vga' is deprecated, "
+ "please use a different VGA card instead");
+
+ /*
/*
* Follow real hardware, cirrus card emulated has 4 MB video memory.
* Also accept 8 MB/16 MB for backward compatibility.
*/
diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index 41d60921e3..a4af45b4e8 100644
index 4e5e12935f..03ca06bb17 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -193,7 +193,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data)
@@ -190,7 +190,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data)
k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1;
k->class_id = PCI_CLASS_STORAGE_IDE;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
@ -428,7 +465,7 @@ index 41d60921e3..a4af45b4e8 100644
}
static const TypeInfo piix3_ide_info = {
@@ -216,6 +217,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data)
@@ -214,6 +215,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data)
k->class_id = PCI_CLASS_STORAGE_IDE;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
dc->hotpluggable = false;
@ -437,6 +474,52 @@ index 41d60921e3..a4af45b4e8 100644
}
static const TypeInfo piix4_ide_info = {
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index 1b3b4da01d..454bfa5783 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -283,10 +283,13 @@ static void ide_cd_realize(IDEDevice *dev, Error **errp)
ide_dev_initfn(dev, IDE_CD, errp);
}
+/* Disabled for Red Hat Enterprise Linux */
+#if 0
static void ide_cf_realize(IDEDevice *dev, Error **errp)
{
ide_dev_initfn(dev, IDE_CFATA, errp);
}
+#endif
#define DEFINE_IDE_DEV_PROPERTIES() \
DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \
@@ -346,6 +349,8 @@ static const TypeInfo ide_cd_info = {
.class_init = ide_cd_class_init,
};
+/* Disabled for Red Hat Enterprise Linux */
+#if 0
static Property ide_cf_properties[] = {
DEFINE_IDE_DEV_PROPERTIES(),
DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf),
@@ -371,6 +376,7 @@ static const TypeInfo ide_cf_info = {
.instance_size = sizeof(IDEDrive),
.class_init = ide_cf_class_init,
};
+#endif
static void ide_device_class_init(ObjectClass *klass, void *data)
{
@@ -396,7 +402,10 @@ static void ide_register_types(void)
type_register_static(&ide_bus_info);
type_register_static(&ide_hd_info);
type_register_static(&ide_cd_info);
+/* Disabled for Red Hat Enterprise Linux */
+#if 0
type_register_static(&ide_cf_info);
+#endif
type_register_static(&ide_device_type_info);
}
diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c
index b92b63bedc..3b6235dde6 100644
--- a/hw/input/pckbd.c
@ -451,10 +534,10 @@ index b92b63bedc..3b6235dde6 100644
static const TypeInfo i8042_info = {
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index 23d660619f..b75c9aa799 100644
index 8ffe1077f1..b3dfeeca4f 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -1805,6 +1805,7 @@ static const E1000Info e1000_devices[] = {
@@ -1746,6 +1746,7 @@ static const E1000Info e1000_devices[] = {
.revision = 0x03,
.phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
},
@ -462,7 +545,7 @@ index 23d660619f..b75c9aa799 100644
{
.name = "e1000-82544gc",
.device_id = E1000_DEV_ID_82544GC_COPPER,
@@ -1817,6 +1818,7 @@ static const E1000Info e1000_devices[] = {
@@ -1758,6 +1759,7 @@ static const E1000Info e1000_devices[] = {
.revision = 0x03,
.phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
},
@ -471,10 +554,10 @@ index 23d660619f..b75c9aa799 100644
static void e1000_register_types(void)
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 8a4861f45a..fcb5dfe792 100644
index 91fae56573..33e0c8724c 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -379,10 +379,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = {
@@ -386,10 +386,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = {
.instance_size = sizeof(SpaprCpuCore),
.class_size = sizeof(SpaprCpuCoreClass),
},
@ -488,10 +571,10 @@ index 8a4861f45a..fcb5dfe792 100644
DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"),
DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"),
diff --git a/hw/usb/meson.build b/hw/usb/meson.build
index 599dc24f0d..905a994c3a 100644
index e94149ebde..4a8adbf3dc 100644
--- a/hw/usb/meson.build
+++ b/hw/usb/meson.build
@@ -52,7 +52,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade
@@ -52,7 +52,7 @@ system_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reader
if cacard.found()
usbsmartcard_ss = ss.source_set()
usbsmartcard_ss.add(when: 'CONFIG_USB_SMARTCARD',
@ -500,86 +583,206 @@ index 599dc24f0d..905a994c3a 100644
hw_usb_modules += {'smartcard': usbsmartcard_ss}
endif
diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
index df0c45e523..c154a4dcf2 100644
--- a/target/arm/cpu_tcg.c
+++ b/target/arm/cpu_tcg.c
@@ -155,6 +155,7 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu)
/* CPU models. These are not needed for the AArch64 linux-user build. */
#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
index c0055a7832..12e1d6c67e 100644
--- a/hw/virtio/meson.build
+++ b/hw/virtio/meson.build
@@ -17,8 +17,9 @@ if have_vhost
if have_vhost_user
# fixme - this really should be generic
specific_virtio_ss.add(files('vhost-user.c'))
- system_virtio_ss.add(files('vhost-user-device.c'))
- system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c'))
+# Disabled for 8.2.0 rebase for RHEL 9.4.0
+# system_virtio_ss.add(files('vhost-user-device.c'))
+# system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c'))
endif
if have_vhost_vdpa
system_virtio_ss.add(files('vhost-vdpa.c'))
diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c
index b53d5efe13..64989a02d1 100644
--- a/target/arm/arm-qmp-cmds.c
+++ b/target/arm/arm-qmp-cmds.c
@@ -231,6 +231,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type,
static void arm_cpu_add_definition(gpointer data, gpointer user_data)
{
ObjectClass *oc = data;
+ CPUClass *cc = CPU_CLASS(oc);
CpuDefinitionInfoList **cpu_list = user_data;
CpuDefinitionInfo *info;
const char *typename;
@@ -240,6 +241,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data)
info->name = g_strndup(typename,
strlen(typename) - strlen("-" TYPE_ARM_CPU));
info->q_typename = g_strdup(typename);
+ info->deprecated = !!cc->deprecation_note;
QAPI_LIST_PREPEND(*cpu_list, info);
}
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index efb22a87f9..a32521ada9 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2524,6 +2524,10 @@ static void cpu_register_class_init(ObjectClass *oc, void *data)
acc->info = data;
cc->gdb_core_xml_file = "arm-core.xml";
+
+ if (acc->info->deprecation_note) {
+ cc->deprecation_note = acc->info->deprecation_note;
+ }
}
void arm_cpu_register(const ARMCPUInfo *info)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index a0282e0d28..7e0f0dfea7 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -34,6 +34,8 @@
#define KVM_HAVE_MCE_INJECTION 1
#endif
+#define RHEL_CPU_DEPRECATION "use 'host' / 'max'"
+
#define EXCP_UDEF 1 /* undefined instruction */
#define EXCP_SWI 2 /* software interrupt */
#define EXCP_PREFETCH_ABORT 3
@@ -1120,6 +1122,7 @@ typedef struct ARMCPUInfo {
const char *name;
void (*initfn)(Object *obj);
void (*class_init)(ObjectClass *oc, void *data);
+ const char *deprecation_note;
} ARMCPUInfo;
/**
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 1e9c6c85ae..10be900803 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -648,6 +648,7 @@ static void aarch64_a57_initfn(Object *obj)
define_cortex_a72_a57_a53_cp_reginfo(cpu);
}
+#if 0 /* Disabled for Red Hat Enterprise Linux */
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
static void aarch64_a53_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -704,6 +705,7 @@ static void aarch64_a53_initfn(Object *obj)
cpu->gic_pribits = 5;
define_cortex_a72_a57_a53_cp_reginfo(cpu);
}
+#endif
static void aarch64_host_initfn(Object *obj)
{
@@ -508,6 +509,7 @@ static void cortex_a9_initfn(Object *obj)
cpu->isar.reset_pmcr_el0 = 0x41093000;
define_arm_cp_regs(cpu, cortexa9_cp_reginfo);
@@ -742,8 +744,11 @@ static void aarch64_max_initfn(Object *obj)
}
static const ARMCPUInfo aarch64_cpus[] = {
- { .name = "cortex-a57", .initfn = aarch64_a57_initfn },
+ { .name = "cortex-a57", .initfn = aarch64_a57_initfn,
+ .deprecation_note = RHEL_CPU_DEPRECATION },
+#if 0 /* Disabled for Red Hat Enterprise Linux */
{ .name = "cortex-a53", .initfn = aarch64_a53_initfn },
+#endif /* disabled for RHEL */
{ .name = "max", .initfn = aarch64_max_initfn },
#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
{ .name = "host", .initfn = aarch64_host_initfn },
@@ -815,8 +820,13 @@ static void aarch64_cpu_instance_init(Object *obj)
static void cpu_register_class_init(ObjectClass *oc, void *data)
{
ARMCPUClass *acc = ARM_CPU_CLASS(oc);
+ CPUClass *cc = CPU_CLASS(oc);
#ifndef CONFIG_USER_ONLY
static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -532,6 +534,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
.access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
};
acc->info = data;
+
+ if (acc->info->deprecation_note) {
+ cc->deprecation_note = acc->info->deprecation_note;
+ }
}
void aarch64_cpu_register(const ARMCPUInfo *info)
diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c
index d9e0e2a4dd..c5c639a6ea 100644
--- a/target/arm/tcg/cpu32.c
+++ b/target/arm/tcg/cpu32.c
@@ -98,6 +98,7 @@ void aa32_max_features(ARMCPU *cpu)
/* CPU models. These are not needed for the AArch64 linux-user build. */
#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static void cortex_a7_initfn(Object *obj)
#if !defined(CONFIG_USER_ONLY)
static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -580,6 +583,7 @@ static void cortex_a7_initfn(Object *obj)
cpu->isar.reset_pmcr_el0 = 0x41072000;
define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */
}
@@ -1189,3 +1190,4 @@ static void arm_tcg_cpu_register_types(void)
type_init(arm_tcg_cpu_register_types)
#endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */
+#endif /* disabled for RHEL */
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
index fcda99e158..bd5a993ff8 100644
--- a/target/arm/tcg/cpu64.c
+++ b/target/arm/tcg/cpu64.c
@@ -29,6 +29,7 @@
#include "cpu-features.h"
#include "cpregs.h"
static void cortex_a15_initfn(Object *obj)
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static uint64_t make_ccsidr64(unsigned assoc, unsigned linesize,
unsigned cachesize)
{
@@ -628,6 +632,7 @@ static void cortex_a15_initfn(Object *obj)
define_arm_cp_regs(cpu, cortexa15_cp_reginfo);
@@ -134,6 +135,7 @@ static void aarch64_a35_initfn(Object *obj)
/* These values are the same with A53/A57/A72. */
define_cortex_a72_a57_a53_cp_reginfo(cpu);
}
+#endif
static void cpu_max_get_sve_max_vq(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
@@ -223,6 +225,7 @@ static void cpu_max_get_l0gptsz(Object *obj, Visitor *v, const char *name,
static Property arm_cpu_lpa2_property =
DEFINE_PROP_BOOL("lpa2", ARMCPU, prop_lpa2, true);
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static void cortex_m0_initfn(Object *obj)
static void aarch64_a55_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -1110,6 +1115,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data)
cc->gdb_core_xml_file = "arm-m-profile.xml";
@@ -1065,6 +1068,7 @@ static void aarch64_neoverse_n2_initfn(Object *obj)
aarch64_add_pauth_properties(obj);
aarch64_add_sve_properties(obj);
}
+#endif /* disabled for RHEL */
+#endif
#ifndef TARGET_AARCH64
/*
@@ -1177,6 +1183,7 @@ static void arm_max_initfn(Object *obj)
#endif /* !TARGET_AARCH64 */
* -cpu max: a CPU with as many features enabled as our emulation supports.
@@ -1259,6 +1263,7 @@ void aarch64_max_tcg_initfn(Object *obj)
qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property);
}
static const ARMCPUInfo arm_tcg_cpus[] = {
+#if 0 /* Disabled for Red Hat Enterprise Linux */
{ .name = "arm926", .initfn = arm926_initfn },
{ .name = "arm946", .initfn = arm946_initfn },
{ .name = "arm1026", .initfn = arm1026_initfn },
@@ -1192,7 +1199,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
{ .name = "cortex-a7", .initfn = cortex_a7_initfn },
{ .name = "cortex-a8", .initfn = cortex_a8_initfn },
{ .name = "cortex-a9", .initfn = cortex_a9_initfn },
+#endif /* disabled for RHEL */
{ .name = "cortex-a15", .initfn = cortex_a15_initfn },
static const ARMCPUInfo aarch64_cpus[] = {
{ .name = "cortex-a35", .initfn = aarch64_a35_initfn },
{ .name = "cortex-a55", .initfn = aarch64_a55_initfn },
@@ -1270,14 +1275,17 @@ static const ARMCPUInfo aarch64_cpus[] = {
{ .name = "neoverse-v1", .initfn = aarch64_neoverse_v1_initfn },
{ .name = "neoverse-n2", .initfn = aarch64_neoverse_n2_initfn },
};
+#endif
static void aarch64_cpu_register_types(void)
{
+#if 0 /* Disabled for Red Hat Enterprise Linux */
{ .name = "cortex-m0", .initfn = cortex_m0_initfn,
.class_init = arm_v7m_class_init },
{ .name = "cortex-m3", .initfn = cortex_m3_initfn,
@@ -1224,6 +1233,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
{ .name = "pxa270-b1", .initfn = pxa270b1_initfn },
{ .name = "pxa270-c0", .initfn = pxa270c0_initfn },
{ .name = "pxa270-c5", .initfn = pxa270c5_initfn },
+#endif /* disabled for RHEL */
#ifndef TARGET_AARCH64
{ .name = "max", .initfn = arm_max_initfn },
#endif
size_t i;
for (i = 0; i < ARRAY_SIZE(aarch64_cpus); ++i) {
aarch64_cpu_register(&aarch64_cpus[i]);
}
+#endif
}
type_init(aarch64_cpu_register_types)
diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c
index 912b037c63..cd3ff700ac 100644
index 7dbb47de64..69fddb05bc 100644
--- a/target/ppc/cpu-models.c
+++ b/target/ppc/cpu-models.c
@@ -66,6 +66,7 @@
@ -609,7 +812,7 @@ index 912b037c63..cd3ff700ac 100644
POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7,
"POWER7 v2.3")
POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7,
@@ -896,12 +900,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
@@ -898,12 +902,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
{ "7447a", "7447a_v1.2" },
{ "7457a", "7457a_v1.2" },
{ "apollo7pm", "7457a_v1.0" },
@ -625,7 +828,7 @@ index 912b037c63..cd3ff700ac 100644
{ "power7", "power7_v2.3" },
{ "power7+", "power7+_v2.1" },
{ "power8e", "power8e_v2.1" },
@@ -911,12 +918,14 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
@@ -913,12 +920,14 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
{ "power10", "power10_v2.0" },
#endif
@ -655,10 +858,10 @@ index 63981bf36b..87a4480c05 100644
/* detect missing features if any to properly report them */
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 3ac7ec9acf..97da1a6424 100644
index 33ab3551f4..912e493951 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -2529,6 +2529,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp)
@@ -2567,6 +2567,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp)
error_setg(errp, "KVM doesn't support CPU models");
return;
}
@ -673,6 +876,37 @@ index 3ac7ec9acf..97da1a6424 100644
prop.cpuid = s390_cpuid_from_cpu_model(model);
prop.ibc = s390_ibc_from_cpu_model(model);
/* configure cpu features indicated via STFL(e) */
diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c
index a8a4c668ad..2458cc527c 100644
--- a/tests/qtest/arm-cpu-features.c
+++ b/tests/qtest/arm-cpu-features.c
@@ -451,8 +451,10 @@ static void test_query_cpu_model_expansion(const void *data)
assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL);
/* Test expected feature presence/absence for some cpu types */
+#if 0 /* Disabled for Red Hat Enterprise Linux */
assert_has_feature_enabled(qts, "cortex-a15", "pmu");
assert_has_not_feature(qts, "cortex-a15", "aarch64");
+#endif /* disabled for RHEL */
/* Enabling and disabling pmu should always work. */
assert_has_feature_enabled(qts, "max", "pmu");
@@ -469,6 +471,7 @@ static void test_query_cpu_model_expansion(const void *data)
assert_has_feature_enabled(qts, "cortex-a57", "pmu");
assert_has_feature_enabled(qts, "cortex-a57", "aarch64");
+#if 0 /* Disabled for Red Hat Enterprise Linux */
assert_has_feature_enabled(qts, "a64fx", "pmu");
assert_has_feature_enabled(qts, "a64fx", "aarch64");
/*
@@ -481,6 +484,7 @@ static void test_query_cpu_model_expansion(const void *data)
"{ 'sve384': true }");
assert_error(qts, "a64fx", "cannot enable sve640",
"{ 'sve640': true }");
+#endif /* disabled for RHEL */
sve_tests_default(qts, "max");
pauth_tests_default(qts, "max");
--
2.39.1
2.39.3

@ -1,4 +1,4 @@
From c13f8e21b32aa06b08847e88080f2fdea5084a9b Mon Sep 17 00:00:00 2001
From d9ff466c980d219ebf230ea24becce294c196f1f Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 11 Jan 2019 09:54:45 +0100
Subject: Machine type related general changes
@ -19,10 +19,13 @@ Rebase notes (7.0.0):
- Remove downstream changes leftovers in hw/rtc/mc146818rtc.c
- Remove unnecessary change in hw/usb/hcd-uhci.c
Rebase notes (7.1.0 rc0):
Rebase notes (7.1.0):
- Moved adding rhel_old_machine_deprecation variable from s390x to general machine types commit
- Moved adding hw_compat_rhel_8_6 struct from x86_64 to general machine types commit
Rebase notes (8.1.0):
- Do not modify unused vga-isa.c
Merged patches (6.1.0):
- f2fb42a3c6 redhat: add missing entries in hw_compat_rhel_8_4
- 1949ec258e hw/arm/virt: Disable PL011 clock migration through hw_compat_rhel_8_3
@ -40,39 +43,45 @@ Merged patches (7.0.0):
- ef5afcc86d Fix virtio-net-pci* "vectors" compat
- 168f0d56e3 compat: Update hw_compat_rhel_8_5 with 6.2.0 RC2 changes
Merged patches (7.1.0 rc0):
Merged patches (7.1.0):
- 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/acpi/piix4.c chunk)
- 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (only hw/core/machine.c and include/hw/boards.h chunk)
Merged patches (7.2.0 rc0):
Merged patches (7.2.0):
- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts)
Merged patches (8.0.0-rc1):
Merged patches (8.0.0):
- 21ed34787b Addd 7.2 compat bits for RHEL 9.1 machine type
- e5c8d5d603 virtio-rng-pci: fix migration compat for vectors
- 5a5fa77059 virtio-rng-pci: fix transitional migration compat for vectors
Merged patches (8.1.0):
- bd5d81d286 Add RHEL 9.2.0 compat structure (general part)
- 1165e24c6b hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type <= pc-q35-rhel9.2.0
Merged patches (8.2.0):
- 4ee284aca9 Add machine types compat bits. (partial)
---
hw/acpi/piix4.c | 2 +-
hw/arm/virt.c | 2 +-
hw/core/machine.c | 229 +++++++++++++++++++++++++++++++++++
hw/display/vga-isa.c | 2 +-
hw/core/machine.c | 267 +++++++++++++++++++++++++++++++++++
hw/i386/pc_piix.c | 2 +
hw/i386/pc_q35.c | 2 +
hw/net/rtl8139.c | 4 +-
hw/smbios/smbios.c | 46 ++++++-
hw/smbios/smbios.c | 46 +++++-
hw/timer/i8254_common.c | 2 +-
hw/usb/hcd-xhci-pci.c | 59 ++++++---
hw/usb/hcd-xhci-pci.c | 59 ++++++--
hw/usb/hcd-xhci-pci.h | 1 +
include/hw/boards.h | 31 +++++
include/hw/boards.h | 40 ++++++
include/hw/firmware/smbios.h | 5 +-
include/hw/i386/pc.h | 3 +
14 files changed, 367 insertions(+), 23 deletions(-)
13 files changed, 413 insertions(+), 22 deletions(-)
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 63d2113b86..a24b9aac92 100644
index dd523d2e4c..5050c0ba97 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -247,7 +247,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id)
@@ -245,7 +245,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id)
static const VMStateDescription vmstate_acpi = {
.name = "piix4_pm",
.version_id = 3,
@ -82,10 +91,10 @@ index 63d2113b86..a24b9aac92 100644
.fields = (VMStateField[]) {
VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState),
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index ac626b3bef..4a6e89c7bc 100644
index af9ea4dd1c..62f0f7d4d6 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1629,7 +1629,7 @@ static void virt_build_smbios(VirtMachineState *vms)
@@ -1638,7 +1638,7 @@ static void virt_build_smbios(VirtMachineState *vms)
smbios_set_defaults("QEMU", product,
vmc->smbios_old_sys_ver ? "1.0" : mc->name, false,
@ -95,10 +104,10 @@ index ac626b3bef..4a6e89c7bc 100644
/* build the array of physical mem area from base_memmap */
mem_array.address = vms->memmap[VIRT_MEM].base;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index cd13b8b0a3..5aa567fad3 100644
index 0c17398141..446601ee30 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -46,6 +46,235 @@ GlobalProperty hw_compat_7_2[] = {
@@ -57,6 +57,273 @@ GlobalProperty hw_compat_7_2[] = {
};
const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
@ -108,6 +117,44 @@ index cd13b8b0a3..5aa567fad3 100644
+const char *rhel_old_machine_deprecation =
+ "machine types for previous major releases are deprecated";
+
+GlobalProperty hw_compat_rhel_9_4[] = {
+ /* hw_compat_rhel_9_4 from hw_compat_8_0 */
+ { TYPE_VIRTIO_NET, "host_uso", "off"},
+ /* hw_compat_rhel_9_4 from hw_compat_8_0 */
+ { TYPE_VIRTIO_NET, "guest_uso4", "off"},
+ /* hw_compat_rhel_9_4 from hw_compat_8_0 */
+ { TYPE_VIRTIO_NET, "guest_uso6", "off"},
+ /* hw_compat_rhel_9_4 from hw_compat_8_1 */
+ { TYPE_PCI_BRIDGE, "x-pci-express-writeable-slt-bug", "true" },
+ /* hw_compat_rhel_9_4 from hw_compat_8_1 */
+ { "ramfb", "x-migrate", "off" },
+ /* hw_compat_rhel_9_4 from hw_compat_8_1 */
+ { "vfio-pci-nohotplug", "x-ramfb-migrate", "off" },
+ /* hw_compat_rhel_9_4 from hw_compat_8_1 */
+ { "igb", "x-pcie-flr-init", "off" },
+};
+const size_t hw_compat_rhel_9_4_len = G_N_ELEMENTS(hw_compat_rhel_9_4);
+
+GlobalProperty hw_compat_rhel_9_3[] = {
+ /* hw_compat_rhel_9_3 from hw_compat_8_0 */
+ { "migration", "multifd-flush-after-each-section", "on"},
+ /* hw_compat_rhel_9_3 from hw_compat_8_0 */
+ { TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" },
+};
+const size_t hw_compat_rhel_9_3_len = G_N_ELEMENTS(hw_compat_rhel_9_3);
+
+GlobalProperty hw_compat_rhel_9_2[] = {
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
+ { "e1000e", "migrate-timadj", "off" },
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
+ { "virtio-mem", "x-early-migration", "false" },
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
+ { "migration", "x-preempt-pre-7-2", "true" },
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
+ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" },
+};
+const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2);
+
+/*
+ * Mostly the same as hw_compat_7_0
+ */
@ -334,25 +381,12 @@ index cd13b8b0a3..5aa567fad3 100644
GlobalProperty hw_compat_7_1[] = {
{ "virtio-device", "queue_reset", "false" },
{ "virtio-rng-pci", "vectors", "0" },
diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c
index 2a5437d803..0db2c2b2a1 100644
--- a/hw/display/vga-isa.c
+++ b/hw/display/vga-isa.c
@@ -89,7 +89,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp)
}
static Property vga_isa_properties[] = {
- DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 8),
+ DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 16),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 30eedd62a3..14a794081e 100644
index eace854335..2a9f465619 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -201,6 +201,8 @@ static void pc_init1(MachineState *machine,
smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)",
@@ -238,6 +238,8 @@ static void pc_init1(MachineState *machine,
smbios_set_defaults("QEMU", mc->desc,
mc->name, pcmc->smbios_legacy_mode,
pcmc->smbios_uuid_encoded,
+ pcmc->smbios_stream_product,
@ -361,11 +395,11 @@ index 30eedd62a3..14a794081e 100644
}
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 797ba347fd..dc0ba5f9e7 100644
index 4f3e5412f6..912cb0c0dc 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -202,6 +202,8 @@ static void pc_q35_init(MachineState *machine)
smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)",
@@ -206,6 +206,8 @@ static void pc_q35_init(MachineState *machine)
smbios_set_defaults("QEMU", mc->desc,
mc->name, pcmc->smbios_legacy_mode,
pcmc->smbios_uuid_encoded,
+ pcmc->smbios_stream_product,
@ -374,10 +408,10 @@ index 797ba347fd..dc0ba5f9e7 100644
}
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index 5a5aaf868d..3d473d5869 100644
index 4af8c66266..7dc12907ab 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -3178,7 +3178,7 @@ static int rtl8139_pre_save(void *opaque)
@@ -3169,7 +3169,7 @@ static int rtl8139_pre_save(void *opaque)
static const VMStateDescription vmstate_rtl8139 = {
.name = "rtl8139",
@ -386,7 +420,7 @@ index 5a5aaf868d..3d473d5869 100644
.minimum_version_id = 3,
.post_load = rtl8139_post_load,
.pre_save = rtl8139_pre_save,
@@ -3259,7 +3259,9 @@ static const VMStateDescription vmstate_rtl8139 = {
@@ -3250,7 +3250,9 @@ static const VMStateDescription vmstate_rtl8139 = {
VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State),
VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State),
VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State),
@ -397,7 +431,7 @@ index 5a5aaf868d..3d473d5869 100644
VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State),
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index d2007e70fb..319eae9e9d 100644
index 2a90601ac5..7bde23e59d 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -58,6 +58,9 @@ static bool smbios_legacy = true;
@ -419,7 +453,7 @@ index d2007e70fb..319eae9e9d 100644
SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer);
SMBIOS_TABLE_SET_STR(2, product_str, type2.product);
@@ -980,7 +983,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features)
@@ -985,7 +988,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features)
void smbios_set_defaults(const char *manufacturer, const char *product,
const char *version, bool legacy_mode,
@ -431,7 +465,7 @@ index d2007e70fb..319eae9e9d 100644
{
smbios_have_defaults = true;
smbios_legacy = legacy_mode;
@@ -1001,11 +1007,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product,
@@ -1006,11 +1012,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product,
g_free(smbios_entries);
}
@ -479,10 +513,10 @@ index d2007e70fb..319eae9e9d 100644
SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer);
SMBIOS_SET_DEFAULT(type3.version, version);
diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c
index 050875b497..32935da46c 100644
index b25da448c8..0331e84398 100644
--- a/hw/timer/i8254_common.c
+++ b/hw/timer/i8254_common.c
@@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = {
@@ -229,7 +229,7 @@ static const VMStateDescription vmstate_pit_common = {
.pre_save = pit_dispatch_pre_save,
.post_load = pit_dispatch_post_load,
.fields = (VMStateField[]) {
@ -603,13 +637,22 @@ index 08f70ce97c..1be7527c1b 100644
#endif
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 6fbbfd56c8..c5a965d27f 100644
index da85f86efb..4a21eddbf9 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -459,4 +459,35 @@ extern const size_t hw_compat_2_2_len;
@@ -503,4 +503,44 @@ extern const size_t hw_compat_2_2_len;
extern GlobalProperty hw_compat_2_1[];
extern const size_t hw_compat_2_1_len;
+extern GlobalProperty hw_compat_rhel_9_4[];
+extern const size_t hw_compat_rhel_9_4_len;
+
+extern GlobalProperty hw_compat_rhel_9_3[];
+extern const size_t hw_compat_rhel_9_3_len;
+
+extern GlobalProperty hw_compat_rhel_9_2[];
+extern const size_t hw_compat_rhel_9_2_len;
+
+extern GlobalProperty hw_compat_rhel_9_1[];
+extern const size_t hw_compat_rhel_9_1_len;
+
@ -659,13 +702,13 @@ index 7f3259a630..d24b3ccd32 100644
void smbios_get_tables(MachineState *ms,
const struct smbios_phys_mem_area *mem_array,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 8206d5405a..908a275736 100644
index a10ceeabbf..037942d233 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -111,6 +111,9 @@ struct PCMachineClass {
bool smbios_defaults;
@@ -113,6 +113,9 @@ struct PCMachineClass {
bool smbios_legacy_mode;
bool smbios_uuid_encoded;
SmbiosEntryPointType default_smbios_ep_type;
+ /* New fields needed for Windows HardwareID-6 matching */
+ const char *smbios_stream_product;
+ const char *smbios_stream_version;
@ -673,5 +716,5 @@ index 8206d5405a..908a275736 100644
/* RAM / address space compat: */
bool gigabyte_align;
--
2.39.1
2.39.3

@ -1,4 +1,4 @@
From ec6468b65a3af0e2b84575c9f965f61916d0d8ea Mon Sep 17 00:00:00 2001
From 23f614ab0b79ec1c6f65a7f0d6993bfdfc53fd23 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 19 Oct 2018 12:53:31 +0200
Subject: Add aarch64 machine types
@ -17,18 +17,19 @@ Rebase notes (7.0.0):
- Added dtb-kaslr-seed option
- Set no_tcg_lpa2 to true
Rebase notes (7.1.0 rc0):
Rebase notes (7.1.0):
- replace dtb_kaslr_seed by dtb_randomness
Rebase notes (7.1.0 rc3):
- Updated dtb_randomness comment
Rebase notes (7.2.0 rc0):
Rebase notes (7.2.0):
- Disabled cortex-a35
Rebase notes (8.0.0-rc1):
Rebase notes (8.0.0):
- Moved changed code from target/arm/helper.c to target/arm/arm-qmp-cmds.c
Rebase notes (8.1.0):
- Added setting default_nic
Merged patches (6.2.0):
- 9a3d4fde0e hw/arm/virt: Remove 9.0 machine type
- f7d04d6695 hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type
@ -46,33 +47,33 @@ Merged patches (7.0.0):
- f79b31bdef hw/arm/virt: Remove the dtb-kaslr-seed machine option
- b6fca85f4a hw/arm/virt: Fix missing initialization in instance/class_init()
Merged patches (7.1.0 rc0):
Merged patches (7.1.0):
- ac97dd4f9f RHEL-only: AArch64: Drop unsupported CPU types
- e9c0a70664 target/arm: deprecate named CPU models
Merged patches (7.2.0 rc0):
Merged patches (7.2.0):
- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts)
Merged patches (8.0.0-rc1):
Merged patches (8.0.0):
- c1a21266d8 redhat: aarch64: add rhel9.2.0 virt machine type
- d97cd7c513 redhat: fix virt-rhel9.2.0 compat props
Merged patches (8.1.0):
- bd5d81d286 Add RHEL 9.2.0 compat structure (arm part)
- c07f666086 hw/arm/virt: Validate cluster and NUMA node boundary for RHEL machines
Merged patches (8.2.0):
- 4ee284aca9 Add machine types compat bits. (partial)
---
hw/arm/virt.c | 251 ++++++++++++++++++++++++++++++++-
include/hw/arm/virt.h | 8 ++
target/arm/arm-qmp-cmds.c | 2 +
target/arm/cpu-qom.h | 1 +
target/arm/cpu.c | 5 +
target/arm/cpu.h | 2 +
target/arm/cpu64.c | 16 ++-
target/arm/cpu_tcg.c | 12 +-
tests/qtest/arm-cpu-features.c | 6 +
9 files changed, 289 insertions(+), 14 deletions(-)
hw/arm/virt.c | 250 +++++++++++++++++++++++++++++++++++++++++-
include/hw/arm/virt.h | 8 ++
2 files changed, 257 insertions(+), 1 deletion(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 4a6e89c7bc..1ae1654be5 100644
index 62f0f7d4d6..c541efee5e 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -81,6 +81,7 @@
@@ -82,6 +82,7 @@
#include "hw/char/pl011.h"
#include "qemu/guest-random.h"
@ -80,13 +81,12 @@ index 4a6e89c7bc..1ae1654be5 100644
#define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
void *data) \
@@ -107,7 +108,48 @@
@@ -108,7 +109,48 @@
DEFINE_VIRT_MACHINE_LATEST(major, minor, true)
#define DEFINE_VIRT_MACHINE(major, minor) \
DEFINE_VIRT_MACHINE_LATEST(major, minor, false)
-
+#endif /* disabled for RHEL */
+
+#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \
+ static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \
+ void *data) \
@ -130,28 +130,7 @@ index 4a6e89c7bc..1ae1654be5 100644
/* Number of external interrupt lines to configure the GIC with */
#define NUM_IRQS 256
@@ -204,16 +246,20 @@ static const int a15irqmap[] = {
};
static const char *valid_cpus[] = {
+#if 0 /* Disabled for Red Hat Enterprise Linux */
ARM_CPU_TYPE_NAME("cortex-a7"),
ARM_CPU_TYPE_NAME("cortex-a15"),
ARM_CPU_TYPE_NAME("cortex-a35"),
ARM_CPU_TYPE_NAME("cortex-a53"),
ARM_CPU_TYPE_NAME("cortex-a55"),
+#endif /* disabled for RHEL */
ARM_CPU_TYPE_NAME("cortex-a57"),
+#if 0 /* Disabled for Red Hat Enterprise Linux */
ARM_CPU_TYPE_NAME("cortex-a72"),
ARM_CPU_TYPE_NAME("cortex-a76"),
ARM_CPU_TYPE_NAME("a64fx"),
ARM_CPU_TYPE_NAME("neoverse-n1"),
+#endif /* disabled for RHEL */
ARM_CPU_TYPE_NAME("host"),
ARM_CPU_TYPE_NAME("max"),
};
@@ -2339,6 +2385,7 @@ static void machvirt_init(MachineState *machine)
@@ -2341,6 +2383,7 @@ static void machvirt_init(MachineState *machine)
qemu_add_machine_init_done_notifier(&vms->machine_done);
}
@ -159,7 +138,7 @@ index 4a6e89c7bc..1ae1654be5 100644
static bool virt_get_secure(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -2366,6 +2413,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp)
@@ -2368,6 +2411,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp)
vms->virt = value;
}
@ -167,16 +146,15 @@ index 4a6e89c7bc..1ae1654be5 100644
static bool virt_get_highmem(Object *obj, Error **errp)
{
@@ -2380,7 +2428,7 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp)
@@ -2383,6 +2427,7 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp)
vms->highmem = value;
}
-
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static bool virt_get_compact_highmem(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -2436,7 +2484,7 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp)
@@ -2438,7 +2483,7 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp)
vms->highmem_mmio = value;
}
@ -185,7 +163,7 @@ index 4a6e89c7bc..1ae1654be5 100644
static bool virt_get_its(Object *obj, Error **errp)
{
@@ -2452,6 +2500,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp)
@@ -2454,6 +2499,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp)
vms->its = value;
}
@ -193,7 +171,7 @@ index 4a6e89c7bc..1ae1654be5 100644
static bool virt_get_dtb_randomness(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -2465,6 +2514,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp)
@@ -2467,6 +2513,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp)
vms->dtb_randomness = value;
}
@ -201,7 +179,7 @@ index 4a6e89c7bc..1ae1654be5 100644
static char *virt_get_oem_id(Object *obj, Error **errp)
{
@@ -2548,6 +2598,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp)
@@ -2550,6 +2597,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp)
vms->ras = value;
}
@ -209,7 +187,7 @@ index 4a6e89c7bc..1ae1654be5 100644
static bool virt_get_mte(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -2561,6 +2612,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp)
@@ -2563,6 +2611,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp)
vms->mte = value;
}
@ -217,7 +195,7 @@ index 4a6e89c7bc..1ae1654be5 100644
static char *virt_get_gic_version(Object *obj, Error **errp)
{
@@ -2988,6 +3040,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str)
@@ -2935,6 +2984,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str)
return fixed_ipa ? 0 : requested_pa_size;
}
@ -225,7 +203,7 @@ index 4a6e89c7bc..1ae1654be5 100644
static void virt_machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -3441,3 +3494,195 @@ static void virt_machine_2_6_options(MachineClass *mc)
@@ -3405,3 +3455,201 @@ static void virt_machine_2_6_options(MachineClass *mc)
vmc->no_pmu = true;
}
DEFINE_VIRT_MACHINE(2, 6)
@ -263,7 +241,10 @@ index 4a6e89c7bc..1ae1654be5 100644
+ mc->smp_props.clusters_supported = true;
+ mc->auto_enable_numa_with_memhp = true;
+ mc->auto_enable_numa_with_memdev = true;
+ /* platform instead of architectural choice */
+ mc->cpu_cluster_has_numa_boundary = true;
+ mc->default_ram_id = "mach-virt.ram";
+ mc->default_nic = "virtio-net-pci";
+
+ object_class_property_add(oc, "acpi", "OnOffAuto",
+ virt_get_acpi, virt_set_acpi,
@ -404,6 +385,9 @@ index 4a6e89c7bc..1ae1654be5 100644
+static void rhel920_virt_options(MachineClass *mc)
+{
+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len);
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len);
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
+}
+DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0)
+
@ -422,10 +406,10 @@ index 4a6e89c7bc..1ae1654be5 100644
+}
+DEFINE_RHEL_MACHINE(9, 0, 0)
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index e1ddbea96b..81c2363a40 100644
index f69239850e..7b8abe5645 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -187,9 +187,17 @@ struct VirtMachineState {
@@ -177,9 +177,17 @@ struct VirtMachineState {
#define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM)
@ -443,270 +427,6 @@ index e1ddbea96b..81c2363a40 100644
void virt_acpi_setup(VirtMachineState *vms);
bool virt_is_acpi_enabled(VirtMachineState *vms);
diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c
index c8fa524002..3aa089abf3 100644
--- a/target/arm/arm-qmp-cmds.c
+++ b/target/arm/arm-qmp-cmds.c
@@ -231,6 +231,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type,
static void arm_cpu_add_definition(gpointer data, gpointer user_data)
{
ObjectClass *oc = data;
+ CPUClass *cc = CPU_CLASS(oc);
CpuDefinitionInfoList **cpu_list = user_data;
CpuDefinitionInfo *info;
const char *typename;
@@ -240,6 +241,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data)
info->name = g_strndup(typename,
strlen(typename) - strlen("-" TYPE_ARM_CPU));
info->q_typename = g_strdup(typename);
+ info->deprecated = !!cc->deprecation_note;
QAPI_LIST_PREPEND(*cpu_list, info);
}
diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h
index 514c22ced9..f789173451 100644
--- a/target/arm/cpu-qom.h
+++ b/target/arm/cpu-qom.h
@@ -35,6 +35,7 @@ typedef struct ARMCPUInfo {
const char *name;
void (*initfn)(Object *obj);
void (*class_init)(ObjectClass *oc, void *data);
+ const char *deprecation_note;
} ARMCPUInfo;
void arm_cpu_register(const ARMCPUInfo *info);
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 5182ed0c91..6740a8b940 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2290,8 +2290,13 @@ static void arm_cpu_instance_init(Object *obj)
static void cpu_register_class_init(ObjectClass *oc, void *data)
{
ARMCPUClass *acc = ARM_CPU_CLASS(oc);
+ CPUClass *cc = CPU_CLASS(oc);
acc->info = data;
+
+ if (acc->info->deprecation_note) {
+ cc->deprecation_note = acc->info->deprecation_note;
+ }
}
void arm_cpu_register(const ARMCPUInfo *info)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index c097cae988..829d4a2328 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -34,6 +34,8 @@
#define KVM_HAVE_MCE_INJECTION 1
#endif
+#define RHEL_CPU_DEPRECATION "use 'host' / 'max'"
+
#define EXCP_UDEF 1 /* undefined instruction */
#define EXCP_SWI 2 /* software interrupt */
#define EXCP_PREFETCH_ABORT 3
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 0fb07cc7b6..47459627fb 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -31,6 +31,7 @@
#include "hw/qdev-properties.h"
#include "internals.h"
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static void aarch64_a35_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -110,6 +111,7 @@ static void aarch64_a35_initfn(Object *obj)
/* These values are the same with A53/A57/A72. */
define_cortex_a72_a57_a53_cp_reginfo(cpu);
}
+#endif /* disabled for RHEL */
void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
{
@@ -730,6 +732,7 @@ static void aarch64_a57_initfn(Object *obj)
define_cortex_a72_a57_a53_cp_reginfo(cpu);
}
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static void aarch64_a53_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -1164,6 +1167,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj)
define_neoverse_n1_cp_reginfo(cpu);
}
+#endif /* disabled for RHEL */
static void aarch64_host_initfn(Object *obj)
{
@@ -1373,14 +1377,19 @@ static void aarch64_max_initfn(Object *obj)
}
static const ARMCPUInfo aarch64_cpus[] = {
+#if 0 /* Disabled for Red Hat Enterprise Linux */
{ .name = "cortex-a35", .initfn = aarch64_a35_initfn },
- { .name = "cortex-a57", .initfn = aarch64_a57_initfn },
+#endif /* disabled for RHEL */
+ { .name = "cortex-a57", .initfn = aarch64_a57_initfn,
+ .deprecation_note = RHEL_CPU_DEPRECATION },
+#if 0 /* Disabled for Red Hat Enterprise Linux */
{ .name = "cortex-a53", .initfn = aarch64_a53_initfn },
{ .name = "cortex-a55", .initfn = aarch64_a55_initfn },
{ .name = "cortex-a72", .initfn = aarch64_a72_initfn },
{ .name = "cortex-a76", .initfn = aarch64_a76_initfn },
{ .name = "a64fx", .initfn = aarch64_a64fx_initfn },
{ .name = "neoverse-n1", .initfn = aarch64_neoverse_n1_initfn },
+#endif /* disabled for RHEL */
{ .name = "max", .initfn = aarch64_max_initfn },
#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
{ .name = "host", .initfn = aarch64_host_initfn },
@@ -1452,8 +1461,13 @@ static void aarch64_cpu_instance_init(Object *obj)
static void cpu_register_class_init(ObjectClass *oc, void *data)
{
ARMCPUClass *acc = ARM_CPU_CLASS(oc);
+ CPUClass *cc = CPU_CLASS(oc);
acc->info = data;
+
+ if (acc->info->deprecation_note) {
+ cc->deprecation_note = acc->info->deprecation_note;
+ }
}
void aarch64_cpu_register(const ARMCPUInfo *info)
diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
index c154a4dcf2..f29425b656 100644
--- a/target/arm/cpu_tcg.c
+++ b/target/arm/cpu_tcg.c
@@ -152,10 +152,10 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu)
}
#endif /* !CONFIG_USER_ONLY */
+#if 0 /* Disabled for Red Hat Enterprise Linux */
/* CPU models. These are not needed for the AArch64 linux-user build. */
#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
-#if 0 /* Disabled for Red Hat Enterprise Linux */
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
{
@@ -509,7 +509,6 @@ static void cortex_a9_initfn(Object *obj)
cpu->isar.reset_pmcr_el0 = 0x41093000;
define_arm_cp_regs(cpu, cortexa9_cp_reginfo);
}
-#endif /* disabled for RHEL */
#ifndef CONFIG_USER_ONLY
static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -534,7 +533,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
.access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
};
-#if 0 /* Disabled for Red Hat Enterprise Linux */
static void cortex_a7_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -583,7 +581,6 @@ static void cortex_a7_initfn(Object *obj)
cpu->isar.reset_pmcr_el0 = 0x41072000;
define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */
}
-#endif /* disabled for RHEL */
static void cortex_a15_initfn(Object *obj)
{
@@ -632,7 +629,6 @@ static void cortex_a15_initfn(Object *obj)
define_arm_cp_regs(cpu, cortexa15_cp_reginfo);
}
-#if 0 /* Disabled for Red Hat Enterprise Linux */
static void cortex_m0_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -1115,7 +1111,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data)
cc->gdb_core_xml_file = "arm-m-profile.xml";
}
-#endif /* disabled for RHEL */
#ifndef TARGET_AARCH64
/*
@@ -1183,7 +1178,6 @@ static void arm_max_initfn(Object *obj)
#endif /* !TARGET_AARCH64 */
static const ARMCPUInfo arm_tcg_cpus[] = {
-#if 0 /* Disabled for Red Hat Enterprise Linux */
{ .name = "arm926", .initfn = arm926_initfn },
{ .name = "arm946", .initfn = arm946_initfn },
{ .name = "arm1026", .initfn = arm1026_initfn },
@@ -1199,9 +1193,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
{ .name = "cortex-a7", .initfn = cortex_a7_initfn },
{ .name = "cortex-a8", .initfn = cortex_a8_initfn },
{ .name = "cortex-a9", .initfn = cortex_a9_initfn },
-#endif /* disabled for RHEL */
{ .name = "cortex-a15", .initfn = cortex_a15_initfn },
-#if 0 /* Disabled for Red Hat Enterprise Linux */
{ .name = "cortex-m0", .initfn = cortex_m0_initfn,
.class_init = arm_v7m_class_init },
{ .name = "cortex-m3", .initfn = cortex_m3_initfn,
@@ -1233,7 +1225,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
{ .name = "pxa270-b1", .initfn = pxa270b1_initfn },
{ .name = "pxa270-c0", .initfn = pxa270c0_initfn },
{ .name = "pxa270-c5", .initfn = pxa270c5_initfn },
-#endif /* disabled for RHEL */
#ifndef TARGET_AARCH64
{ .name = "max", .initfn = arm_max_initfn },
#endif
@@ -1261,3 +1252,4 @@ static void arm_tcg_cpu_register_types(void)
type_init(arm_tcg_cpu_register_types)
#endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */
+#endif /* disabled for RHEL */
diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c
index 1cb08138ad..834497dfec 100644
--- a/tests/qtest/arm-cpu-features.c
+++ b/tests/qtest/arm-cpu-features.c
@@ -441,8 +441,10 @@ static void test_query_cpu_model_expansion(const void *data)
assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL);
/* Test expected feature presence/absence for some cpu types */
+#if 0 /* Disabled for Red Hat Enterprise Linux */
assert_has_feature_enabled(qts, "cortex-a15", "pmu");
assert_has_not_feature(qts, "cortex-a15", "aarch64");
+#endif /* disabled for RHEL */
/* Enabling and disabling pmu should always work. */
assert_has_feature_enabled(qts, "max", "pmu");
@@ -459,6 +461,7 @@ static void test_query_cpu_model_expansion(const void *data)
assert_has_feature_enabled(qts, "cortex-a57", "pmu");
assert_has_feature_enabled(qts, "cortex-a57", "aarch64");
+#if 0 /* Disabled for Red Hat Enterprise Linux */
assert_has_feature_enabled(qts, "a64fx", "pmu");
assert_has_feature_enabled(qts, "a64fx", "aarch64");
/*
@@ -471,6 +474,7 @@ static void test_query_cpu_model_expansion(const void *data)
"{ 'sve384': true }");
assert_error(qts, "a64fx", "cannot enable sve640",
"{ 'sve640': true }");
+#endif /* disabled for RHEL */
sve_tests_default(qts, "max");
pauth_tests_default(qts, "max");
@@ -506,9 +510,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data)
QDict *resp;
char *error;
+#if 0 /* Disabled for Red Hat Enterprise Linux */
assert_error(qts, "cortex-a15",
"We cannot guarantee the CPU type 'cortex-a15' works "
"with KVM on this host", NULL);
+#endif /* disabled for RHEL */
assert_has_feature_enabled(qts, "host", "aarch64");
--
2.39.1
2.39.3

@ -1,4 +1,4 @@
From 401d0ebf1ee959fd944df6b5b4ae9c51c36d1244 Mon Sep 17 00:00:00 2001
From d03cff85f5f1b69b1a66011ebaa974ece81d31bc Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 19 Oct 2018 13:27:13 +0200
Subject: Add ppc64 machine types
@ -20,7 +20,7 @@ Merged patches (6.1.0):
- af69d1ca6e Remove RHEL 7.4.0 machine types (only ppc64 changes)
- 8f7a74ab78 Remove RHEL 7.5.0 machine types (only ppc64 changes)
Merged patches (7.1.0 rc0):
Merged patches (7.1.0):
- baa6790171 target/ppc/cpu-models: Fix ppc_cpu_aliases list for RHEL
---
hw/ppc/spapr.c | 243 ++++++++++++++++++++++++++++++++++++++++
@ -34,10 +34,10 @@ Merged patches (7.1.0 rc0):
8 files changed, 314 insertions(+), 1 deletion(-)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 4921198b9d..e24b3e22e3 100644
index df09aa9d6a..ff459e1a46 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1634,6 +1634,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason)
@@ -1689,6 +1689,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason)
pef_kvm_reset(machine->cgs, &error_fatal);
spapr_caps_apply(spapr);
@ -47,7 +47,7 @@ index 4921198b9d..e24b3e22e3 100644
first_ppc_cpu = POWERPC_CPU(first_cpu);
if (kvm_enabled() && kvmppc_has_cap_mmu_radix() &&
@@ -3348,6 +3351,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp)
@@ -3397,6 +3400,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp)
spapr->host_serial = g_strdup(value);
}
@ -68,7 +68,7 @@ index 4921198b9d..e24b3e22e3 100644
static void spapr_instance_init(Object *obj)
{
SpaprMachineState *spapr = SPAPR_MACHINE(obj);
@@ -3426,6 +3443,12 @@ static void spapr_instance_init(Object *obj)
@@ -3475,6 +3492,12 @@ static void spapr_instance_init(Object *obj)
spapr_get_host_serial, spapr_set_host_serial);
object_property_set_description(obj, "host-serial",
"Host serial number to advertise in guest device tree");
@ -81,7 +81,7 @@ index 4921198b9d..e24b3e22e3 100644
}
static void spapr_machine_finalizefn(Object *obj)
@@ -4683,6 +4706,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
@@ -4734,6 +4757,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
vmc->client_architecture_support = spapr_vof_client_architecture_support;
vmc->quiesce = spapr_vof_quiesce;
vmc->setprop = spapr_vof_setprop;
@ -89,15 +89,15 @@ index 4921198b9d..e24b3e22e3 100644
}
static const TypeInfo spapr_machine_info = {
@@ -4734,6 +4758,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc)
@@ -4785,6 +4809,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc)
} \
type_init(spapr_machine_register_##suffix)
+#if 0 /* Disabled for Red Hat Enterprise Linux */
/*
* pseries-8.0
* pseries-8.2
*/
@@ -4894,6 +4919,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc)
@@ -4967,6 +4992,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc)
}
DEFINE_SPAPR_MACHINE(4_1, "4.1", false);
@ -105,8 +105,8 @@ index 4921198b9d..e24b3e22e3 100644
/*
* pseries-4.0
@@ -4913,6 +4939,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index,
*nv2atsd = 0;
@@ -4982,6 +5008,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index,
}
return true;
}
+
@ -114,7 +114,7 @@ index 4921198b9d..e24b3e22e3 100644
static void spapr_machine_4_0_class_options(MachineClass *mc)
{
SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
@@ -5240,6 +5268,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc)
@@ -5306,6 +5334,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc)
compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len);
}
DEFINE_SPAPR_MACHINE(2_1, "2.1", false);
@ -337,7 +337,7 @@ index 4921198b9d..e24b3e22e3 100644
static void spapr_machine_register_types(void)
{
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index fcb5dfe792..ab8fb5bf62 100644
index 33e0c8724c..9d01663f43 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -25,6 +25,7 @@
@ -348,7 +348,7 @@ index fcb5dfe792..ab8fb5bf62 100644
static void spapr_reset_vcpu(PowerPCCPU *cpu)
{
@@ -259,6 +260,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr,
@@ -261,6 +262,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr,
{
CPUPPCState *env = &cpu->env;
CPUState *cs = CPU(cpu);
@ -356,7 +356,7 @@ index fcb5dfe792..ab8fb5bf62 100644
if (!qdev_realize(DEVICE(cpu), NULL, errp)) {
return false;
@@ -270,6 +272,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr,
@@ -277,6 +279,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr,
/* Set time-base frequency to 512 MHz. vhyp must be set first. */
cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
@ -375,10 +375,10 @@ index fcb5dfe792..ab8fb5bf62 100644
qdev_unrealize(DEVICE(cpu));
return false;
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 5c8aabd444..04489d5808 100644
index e91791a1a9..1951d8a2a0 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -155,6 +155,7 @@ struct SpaprMachineClass {
@@ -154,6 +154,7 @@ struct SpaprMachineClass {
bool pre_5_2_numa_associativity;
bool pre_6_2_numa_affinity;
@ -386,7 +386,7 @@ index 5c8aabd444..04489d5808 100644
bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
uint64_t *buid, hwaddr *pio,
hwaddr *mmio32, hwaddr *mmio64,
@@ -257,6 +258,9 @@ struct SpaprMachineState {
@@ -256,6 +257,9 @@ struct SpaprMachineState {
/* Set by -boot */
char *boot_device;
@ -397,7 +397,7 @@ index 5c8aabd444..04489d5808 100644
char *kvm_type;
char *host_model;
diff --git a/target/ppc/compat.c b/target/ppc/compat.c
index 7949a24f5a..f207a9ba01 100644
index ebef2cccec..ff2c00c60e 100644
--- a/target/ppc/compat.c
+++ b/target/ppc/compat.c
@@ -114,8 +114,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr)
@ -422,10 +422,10 @@ index 7949a24f5a..f207a9ba01 100644
const CompatInfo *compat = compat_by_pvr(compat_pvr);
const CompatInfo *min = compat_by_pvr(min_compat_pvr);
diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c
index cd3ff700ac..1cb49c8087 100644
index 69fddb05bc..64a05aaef3 100644
--- a/target/ppc/cpu-models.c
+++ b/target/ppc/cpu-models.c
@@ -746,6 +746,7 @@
@@ -748,6 +748,7 @@
/* PowerPC CPU aliases */
PowerPCCPUAlias ppc_cpu_aliases[] = {
@ -434,10 +434,10 @@ index cd3ff700ac..1cb49c8087 100644
{ "405cr", "405crc" },
{ "405gp", "405gpd" },
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 557d736dab..6646ec1c27 100644
index f8101ffa29..e799a2bee6 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1482,6 +1482,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch)
@@ -1635,6 +1635,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch)
/* Compatibility modes */
#if defined(TARGET_PPC64)
@ -446,18 +446,18 @@ index 557d736dab..6646ec1c27 100644
uint32_t min_compat_pvr, uint32_t max_compat_pvr);
bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr,
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 78f6fc50cd..68d06c3f8f 100644
index 9b1abe2fc4..56f1c46e8e 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -88,6 +88,7 @@ static int cap_ppc_nested_kvm_hv;
static int cap_large_decr;
@@ -89,6 +89,7 @@ static int cap_large_decr;
static int cap_fwnmi;
static int cap_rpt_invalidate;
static int cap_ail_mode_3;
+static int cap_ppc_secure_guest;
static uint32_t debug_inst_opcode;
@@ -135,6 +136,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
@@ -141,6 +142,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
kvmppc_get_cpu_characteristics(s);
cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
@ -465,8 +465,8 @@ index 78f6fc50cd..68d06c3f8f 100644
cap_large_decr = kvmppc_get_dec_bits();
cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI);
/*
@@ -2569,6 +2571,16 @@ int kvmppc_has_cap_rpt_invalidate(void)
return cap_rpt_invalidate;
@@ -2579,6 +2581,16 @@ bool kvmppc_supports_ail_3(void)
return cap_ail_mode_3;
}
+bool kvmppc_has_cap_secure_guest(void)
@ -482,7 +482,7 @@ index 78f6fc50cd..68d06c3f8f 100644
PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
{
uint32_t host_pvr = mfpvr();
@@ -2969,3 +2981,18 @@ bool kvm_arch_cpu_check_are_resettable(void)
@@ -2979,3 +2991,18 @@ bool kvm_arch_cpu_check_are_resettable(void)
void kvm_arch_accel_class_init(ObjectClass *oc)
{
}
@ -502,27 +502,27 @@ index 78f6fc50cd..68d06c3f8f 100644
+ }
+}
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 5fd9753953..b5ebfe2be0 100644
index 1975fb5ee6..d1017f98be 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -43,6 +43,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
@@ -46,6 +46,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
bool radix, bool gtse,
uint64_t proc_tbl);
+void kvmppc_svm_allow(Error **errp);
#ifndef CONFIG_USER_ONLY
bool kvmppc_spapr_use_multitce(void);
int kvmppc_spapr_enable_inkernel_multitce(void);
@@ -77,6 +78,8 @@ int kvmppc_get_cap_large_decr(void);
int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable);
void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
@@ -79,6 +80,8 @@ int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable);
int kvmppc_has_cap_rpt_invalidate(void);
bool kvmppc_supports_ail_3(void);
int kvmppc_enable_hwrng(void);
+bool kvmppc_has_cap_secure_guest(void);
+int kvmppc_enable_cap_secure_guest(void);
int kvmppc_put_books_sregs(PowerPCCPU *cpu);
PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
void kvmppc_check_papr_resize_hpt(Error **errp);
@@ -396,6 +399,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void)
@@ -427,6 +430,16 @@ static inline bool kvmppc_supports_ail_3(void)
return false;
}
@ -540,5 +540,5 @@ index 5fd9753953..b5ebfe2be0 100644
{
return -1;
--
2.39.1
2.39.3

@ -1,4 +1,4 @@
From 3c7647197729fcd76e219070c6f359bb3667d04d Mon Sep 17 00:00:00 2001
From 3623043d4a923bf9f541d439c76e7874cf0fa81d Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 19 Oct 2018 13:47:32 +0200
Subject: Add s390x machine types
@ -8,7 +8,7 @@ Adding changes to add RHEL machine types for s390x architecture.
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
--
Rebase changes (7.1.0 rc0):
Rebase changes (7.1.0):
- Moved adding rhel_old_machine_deprecation variable to general machine types commit
Merged patches (6.1.0):
@ -23,52 +23,74 @@ Merged patches (7.0.0):
- 4b0efa7e21 redhat: Add rhel8.6.0 and rhel9.0.0 machine types for s390x
- dcc64971bf RHEL: mark old machine types as deprecated (partialy)
Merged patches (7.1.0 rc0):
Merged patches (7.1.0):
- 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (only hw/s390x/s390-virtio-ccw.c chunk)
- c8ad21ca31 redhat: Update s390x machine type compatibility for rebase to QEMU 7.0.0
- 5bcf8d874c target/s390x: deprecate CPUs older than z14
Merged patches (7.2.0 rc0):
Merged patches (7.2.0):
- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts)
Merged patches (8.0.0-rc1):
Merged patches (8.0.0):
- 27c188c6a4 redhat: Update s390x machine type compatibility for QEMU 7.2.0 update
- a932b8d429 redhat: Add new rhel-9.2.0 s390x machine type
- ac88104bad s390x/s390-virtio-ccw: Activate zPCI features on s390-ccw-virtio-rhel8.6.0
Merged patches (8.1.0):
- bd5d81d286 Add RHEL 9.2.0 compat structure (s390x part)
Merged patches (8.2.0):
- 4ee284aca9 Add machine types compat bits. (partial)
---
hw/s390x/s390-virtio-ccw.c | 143 +++++++++++++++++++++++++++++++
hw/s390x/s390-virtio-ccw.c | 159 +++++++++++++++++++++++++++++++
target/s390x/cpu_models.c | 11 +++
target/s390x/cpu_models.h | 2 +
target/s390x/cpu_models_sysemu.c | 2 +
4 files changed, 158 insertions(+)
4 files changed, 174 insertions(+)
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 503f212a31..dcd3b966b0 100644
index 7262725d2e..984891b82a 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -826,6 +826,7 @@ bool css_migration_enabled(void)
@@ -855,6 +855,7 @@ bool css_migration_enabled(void)
} \
type_init(ccw_machine_register_##suffix)
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static void ccw_machine_8_0_instance_options(MachineState *machine)
static void ccw_machine_8_2_instance_options(MachineState *machine)
{
}
@@ -1201,6 +1202,148 @@ static void ccw_machine_2_4_class_options(MachineClass *mc)
@@ -1256,6 +1257,164 @@ static void ccw_machine_2_4_class_options(MachineClass *mc)
compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
}
DEFINE_CCW_MACHINE(2_4, "2.4", false);
+#endif
+
+
+static void ccw_machine_rhel940_instance_options(MachineState *machine)
+{
+}
+
+static void ccw_machine_rhel940_class_options(MachineClass *mc)
+{
+}
+DEFINE_CCW_MACHINE(rhel940, "rhel9.4.0", true);
+
+static void ccw_machine_rhel920_instance_options(MachineState *machine)
+{
+ ccw_machine_rhel940_instance_options(machine);
+}
+
+static void ccw_machine_rhel920_class_options(MachineClass *mc)
+{
+ ccw_machine_rhel940_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len);
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len);
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
+ mc->smp_props.drawers_supported = false; /* from ccw_machine_8_1 */
+ mc->smp_props.books_supported = false; /* from ccw_machine_8_1 */
+}
+DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true);
+DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", false);
+
+static void ccw_machine_rhel900_instance_options(MachineState *machine)
+{
@ -204,7 +226,7 @@ index 503f212a31..dcd3b966b0 100644
static void ccw_machine_register_types(void)
{
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 457b5cb10c..ff6b9463cb 100644
index a63d990e4e..198b81f2c0 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -46,6 +46,9 @@
@ -217,7 +239,7 @@ index 457b5cb10c..ff6b9463cb 100644
static S390CPUDef s390_cpu_defs[] = {
CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"),
CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"),
@@ -857,22 +860,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data)
@@ -856,22 +859,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data)
static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data)
{
S390CPUClass *xcc = S390_CPU_CLASS(oc);
@ -249,16 +271,16 @@ index 457b5cb10c..ff6b9463cb 100644
static void s390_qemu_cpu_model_class_init(ObjectClass *oc, void *data)
diff --git a/target/s390x/cpu_models.h b/target/s390x/cpu_models.h
index fb1adc8b21..d76745afa9 100644
index d7b8912989..1a806a97c4 100644
--- a/target/s390x/cpu_models.h
+++ b/target/s390x/cpu_models.h
@@ -38,6 +38,8 @@ struct S390CPUDef {
@@ -38,6 +38,8 @@ typedef struct S390CPUDef {
S390FeatBitmap full_feat;
/* used to init full_feat from generated data */
S390FeatInit full_init;
+ /* if deprecated, provides a suggestion */
+ const char *deprecation_note;
};
} S390CPUDef;
/* CPU model based on a CPU definition */
diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c
@ -282,5 +304,5 @@ index 87a4480c05..28c1b0486c 100644
if (cpu_list_data->model) {
Object *obj;
--
2.39.1
2.39.3

@ -1,4 +1,4 @@
From 510291040cb280e1f68b793a84ec0f7d1c88aafa Mon Sep 17 00:00:00 2001
From b432505cb28bc3b9b0c1849210ac6c63bca3fe37 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 19 Oct 2018 13:10:31 +0200
Subject: Add x86_64 machine types
@ -13,9 +13,12 @@ Rebase notes (6.1.0):
Rebase notes (7.0.0):
- Reset alias for all machine-types except latest one
Rebase notes (8.0.0-rc1):
Rebase notes (8.0.0):
- remove legacy_no_rng_seed usage (removed upstream)
Rebase notes (8.1.0):
- default_nic_model to default_nic
Merged patches (6.1.0):
- 59c284ad3b x86: Add x86 rhel8.5 machine types
- a8868b42fe redhat: x86: Enable 'kvm-asyncpf-int' by default
@ -35,35 +38,44 @@ Merged patches (7.0.0):
- dcc64971bf RHEL: mark old machine types as deprecated (partialy)
- 6b396f182b RHEL: disable "seqpacket" for "vhost-vsock-device" in rhel8.6.0
Merged patches (7.1.0 rc0):
Merged patches (7.1.0):
- 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/i386/pc.c chunk)
- 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (x86_64 specific changes)
- 35b5c8554f target/i386: deprecate CPUs older than x86_64-v2 ABI
Merged patches (7.2.0 rc0):
Merged patches (7.2.0):
- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts)
Merged patches (8.0.0-rc1):
Merged patches (8.0.0):
- f33ca8aed4 x86: rhel 9.2.0 machine type
Merged patches (8.1.0):
- bd5d81d286 Add RHEL 9.2.0 compat structure (x86_64 part)
- c6eaf73add redhat: hw/i386/pc: Update x86 machine type compatibility for QEMU 8.0.0 update
- 6cbf496e5e hw/acpi: Mark acpi blobs as resizable on RHEL pc machines version 7.6 and above
Merged patches (8.2.0):
- 4ee284aca9 Add machine types compat bits. (partial)
- 719e2ac147 Fix x86 machine type compatibility for qemu-kvm 8.1.0
---
hw/i386/pc.c | 147 +++++++++++++++++++++-
hw/i386/pc_piix.c | 86 ++++++++++++-
hw/i386/pc_q35.c | 252 ++++++++++++++++++++++++++++++++++++-
hw/i386/pc.c | 159 ++++++++++++++++++++-
hw/i386/pc_piix.c | 112 ++++++++++++++-
hw/i386/pc_q35.c | 285 ++++++++++++++++++++++++++++++++++++-
include/hw/boards.h | 2 +
include/hw/i386/pc.h | 27 ++++
target/i386/cpu.c | 21 ++++
include/hw/i386/pc.h | 33 +++++
target/i386/cpu.c | 21 +++
target/i386/kvm/kvm-cpu.c | 1 +
target/i386/kvm/kvm.c | 4 +
tests/qtest/pvpanic-test.c | 5 +-
9 files changed, 538 insertions(+), 7 deletions(-)
9 files changed, 615 insertions(+), 7 deletions(-)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 1489abf010..8abb1f872e 100644
index 29b9964733..a1faa9e92c 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -407,6 +407,149 @@ GlobalProperty pc_compat_1_4[] = {
@@ -323,6 +323,161 @@ GlobalProperty pc_compat_2_0[] = {
};
const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4);
const size_t pc_compat_2_0_len = G_N_ELEMENTS(pc_compat_2_0);
+/* This macro is for changes to properties that are RHEL specific,
+ * different to the current upstream and to be applied to the latest
@ -80,13 +92,25 @@ index 1489abf010..8abb1f872e 100644
+ { TYPE_X86_CPU, "host-phys-bits-limit", "48" },
+ { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" },
+ { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" },
+ /* bz 1508330 */
+ /* bz 1508330 */
+ { "vfio-pci", "x-no-geforce-quirks", "on" },
+ /* bz 1941397 */
+ { TYPE_X86_CPU, "kvm-asyncpf-int", "on" },
+};
+const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat);
+
+GlobalProperty pc_rhel_9_3_compat[] = {
+ /* pc_rhel_9_3_compat from pc_compat_8_0 */
+ { "virtio-mem", "unplugged-inaccessible", "auto" },
+};
+const size_t pc_rhel_9_3_compat_len = G_N_ELEMENTS(pc_rhel_9_3_compat);
+
+GlobalProperty pc_rhel_9_2_compat[] = {
+ /* pc_rhel_9_2_compat from pc_compat_7_2 */
+ { "ICH9-LPC", "noreboot", "true" },
+};
+const size_t pc_rhel_9_2_compat_len = G_N_ELEMENTS(pc_rhel_9_2_compat);
+
+GlobalProperty pc_rhel_9_0_compat[] = {
+ /* pc_rhel_9_0_compat from pc_compat_6_2 */
+ { "virtio-mem", "unplugged-inaccessible", "off" },
@ -177,27 +201,27 @@ index 1489abf010..8abb1f872e 100644
+ * machine types irrespective of host.
+ */
+GlobalProperty pc_rhel_7_6_compat[] = {
+ /* pc_rhel_7_6_compat from pc_compat_3_0 */
+ /* pc_rhel_7_6_compat from pc_compat_3_0 */
+ { TYPE_X86_CPU, "x-hv-synic-kvm-only", "on" },
+ /* pc_rhel_7_6_compat from pc_compat_3_0 */
+ /* pc_rhel_7_6_compat from pc_compat_3_0 */
+ { "Skylake-Server" "-" TYPE_X86_CPU, "pku", "off" },
+ /* pc_rhel_7_6_compat from pc_compat_3_0 */
+ /* pc_rhel_7_6_compat from pc_compat_3_0 */
+ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "pku", "off" },
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ { TYPE_X86_CPU, "x-migrate-smi-count", "off" },
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ { "Skylake-Client" "-" TYPE_X86_CPU, "mpx", "on" },
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ { "Skylake-Client-IBRS" "-" TYPE_X86_CPU, "mpx", "on" },
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ { "Skylake-Server" "-" TYPE_X86_CPU, "mpx", "on" },
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "mpx", "on" },
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ { "Cascadelake-Server" "-" TYPE_X86_CPU, "mpx", "on" },
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ { "Icelake-Client" "-" TYPE_X86_CPU, "mpx", "on" },
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ { "Icelake-Server" "-" TYPE_X86_CPU, "mpx", "on" },
+};
+const size_t pc_rhel_7_6_compat_len = G_N_ELEMENTS(pc_rhel_7_6_compat);
@ -211,15 +235,15 @@ index 1489abf010..8abb1f872e 100644
GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled)
{
GSIState *s;
@@ -1944,6 +2087,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
pcmc->pvh_enabled = true;
@@ -1826,6 +1981,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
pcmc->kvmclock_create_always = true;
pcmc->resizable_acpi_blob = true;
assert(!mc->get_hotplug_handler);
+ mc->async_pf_vmexit_disable = false;
mc->get_hotplug_handler = pc_get_hotplug_handler;
mc->hotplug_allowed = pc_hotplug_allowed;
mc->cpu_index_to_instance_props = x86_cpu_index_to_props;
@@ -1954,7 +2098,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
@@ -1836,7 +1992,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
mc->has_hotpluggable_cpus = true;
mc->default_boot_order = "cad";
mc->block_default_type = IF_IDE;
@ -230,10 +254,10 @@ index 1489abf010..8abb1f872e 100644
mc->wakeup = pc_machine_wakeup;
hc->pre_plug = pc_machine_device_pre_plug_cb;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 14a794081e..3e330fd36f 100644
index 2a9f465619..44038391fb 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -54,6 +54,7 @@
@@ -53,6 +53,7 @@
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "sysemu/xen.h"
@ -241,18 +265,18 @@ index 14a794081e..3e330fd36f 100644
#ifdef CONFIG_XEN
#include <xen/hvm/hvm_info_table.h>
#include "hw/xen/xen_pt.h"
@@ -198,8 +199,8 @@ static void pc_init1(MachineState *machine,
@@ -235,8 +236,8 @@ static void pc_init1(MachineState *machine,
if (pcmc->smbios_defaults) {
MachineClass *mc = MACHINE_GET_CLASS(machine);
/* These values are guest ABI, do not change */
- smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)",
- smbios_set_defaults("QEMU", mc->desc,
- mc->name, pcmc->smbios_legacy_mode,
+ smbios_set_defaults("Red Hat", "KVM",
+ mc->desc, pcmc->smbios_legacy_mode,
pcmc->smbios_uuid_encoded,
pcmc->smbios_stream_product,
pcmc->smbios_stream_version,
@@ -351,6 +352,7 @@ static void pc_init1(MachineState *machine,
@@ -453,6 +454,7 @@ static void pc_set_south_bridge(Object *obj, int value, Error **errp)
* hw_compat_*, pc_compat_*, or * pc_*_machine_options().
*/
@ -260,7 +284,7 @@ index 14a794081e..3e330fd36f 100644
static void pc_compat_2_3_fn(MachineState *machine)
{
X86MachineState *x86ms = X86_MACHINE(machine);
@@ -899,3 +901,83 @@ static void xenfv_3_1_machine_options(MachineClass *m)
@@ -970,3 +972,109 @@ static void xenfv_3_1_machine_options(MachineClass *m)
DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init,
xenfv_3_1_machine_options);
#endif
@ -274,8 +298,9 @@ index 14a794081e..3e330fd36f 100644
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ m->family = "pc_piix_Y";
+ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off";
+ pcmc->default_nic_model = "e1000";
+ pcmc->pci_root_uid = 0;
+ pcmc->resizable_acpi_blob = true;
+ m->default_nic = "e1000";
+ m->default_display = "std";
+ m->no_parallel = 1;
+ m->numa_mem_supported = true;
@ -296,6 +321,7 @@ index 14a794081e..3e330fd36f 100644
+static void pc_machine_rhel760_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ ObjectClass *oc = OBJECT_CLASS(m);
+ pc_machine_rhel7_options(m);
+ m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)";
+ m->async_pf_vmexit_disable = true;
@ -309,7 +335,31 @@ index 14a794081e..3e330fd36f 100644
+ pcmc->kvmclock_create_always = false;
+ /* From pc_i440fx_5_1_machine_options() */
+ pcmc->pci_root_uid = 1;
+ /* From pc_i440fx_7_0_machine_options() */
+ pcmc->enforce_amd_1tb_hole = false;
+ /* From pc_i440fx_8_0_machine_options() */
+ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32;
+ /* Introduced in QEMU 8.2 */
+ pcmc->default_south_bridge = TYPE_PIIX3_DEVICE;
+
+ object_class_property_add_enum(oc, "x-south-bridge", "PCSouthBridgeOption",
+ &PCSouthBridgeOption_lookup,
+ pc_get_south_bridge,
+ pc_set_south_bridge);
+ object_class_property_set_description(oc, "x-south-bridge",
+ "Use a different south bridge than PIIX3");
+
+
+ compat_props_add(m->compat_props, hw_compat_rhel_9_4,
+ hw_compat_rhel_9_4_len);
+ compat_props_add(m->compat_props, hw_compat_rhel_9_3,
+ hw_compat_rhel_9_3_len);
+ compat_props_add(m->compat_props, pc_rhel_9_3_compat,
+ pc_rhel_9_3_compat_len);
+ compat_props_add(m->compat_props, hw_compat_rhel_9_2,
+ hw_compat_rhel_9_2_len);
+ compat_props_add(m->compat_props, pc_rhel_9_2_compat,
+ pc_rhel_9_2_compat_len);
+ compat_props_add(m->compat_props, hw_compat_rhel_9_1,
+ hw_compat_rhel_9_1_len);
+ compat_props_add(m->compat_props, hw_compat_rhel_9_0,
@ -345,21 +395,21 @@ index 14a794081e..3e330fd36f 100644
+DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760,
+ pc_machine_rhel760_options);
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index dc0ba5f9e7..98601bb76f 100644
index 912cb0c0dc..6387df97c8 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -199,8 +199,8 @@ static void pc_q35_init(MachineState *machine)
@@ -203,8 +203,8 @@ static void pc_q35_init(MachineState *machine)
if (pcmc->smbios_defaults) {
/* These values are guest ABI, do not change */
- smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)",
- smbios_set_defaults("QEMU", mc->desc,
- mc->name, pcmc->smbios_legacy_mode,
+ smbios_set_defaults("Red Hat", "KVM",
+ mc->desc, pcmc->smbios_legacy_mode,
pcmc->smbios_uuid_encoded,
pcmc->smbios_stream_product,
pcmc->smbios_stream_version,
@@ -354,6 +354,7 @@ static void pc_q35_init(MachineState *machine)
@@ -363,6 +363,7 @@ static void pc_q35_init(MachineState *machine)
DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn)
@ -367,7 +417,7 @@ index dc0ba5f9e7..98601bb76f 100644
static void pc_q35_machine_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
@@ -663,3 +664,250 @@ static void pc_q35_2_4_machine_options(MachineClass *m)
@@ -699,3 +700,283 @@ static void pc_q35_2_4_machine_options(MachineClass *m)
DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL,
pc_q35_2_4_machine_options);
@ -379,8 +429,8 @@ index dc0ba5f9e7..98601bb76f 100644
+static void pc_q35_machine_rhel_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ pcmc->default_nic_model = "e1000e";
+ pcmc->pci_root_uid = 0;
+ m->default_nic = "e1000e";
+ m->family = "pc_q35_Z";
+ m->units_per_default_bus = 1;
+ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off";
@ -396,6 +446,24 @@ index dc0ba5f9e7..98601bb76f 100644
+ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len);
+}
+
+static void pc_q35_init_rhel940(MachineState *machine)
+{
+ pc_q35_init(machine);
+}
+
+static void pc_q35_machine_rhel940_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ pc_q35_machine_rhel_options(m);
+ m->desc = "RHEL-9.4.0 PC (Q35 + ICH9, 2009)";
+ pcmc->smbios_stream_product = "RHEL";
+ pcmc->smbios_stream_version = "9.4.0";
+}
+
+DEFINE_PC_MACHINE(q35_rhel940, "pc-q35-rhel9.4.0", pc_q35_init_rhel940,
+ pc_q35_machine_rhel940_options);
+
+
+static void pc_q35_init_rhel920(MachineState *machine)
+{
+ pc_q35_init(machine);
@ -404,10 +472,25 @@ index dc0ba5f9e7..98601bb76f 100644
+static void pc_q35_machine_rhel920_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ pc_q35_machine_rhel_options(m);
+ pc_q35_machine_rhel940_options(m);
+ m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)";
+ m->alias = NULL;
+ pcmc->smbios_stream_product = "RHEL";
+ pcmc->smbios_stream_version = "9.2.0";
+
+ /* From pc_q35_8_0_machine_options() */
+ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32;
+
+ compat_props_add(m->compat_props, hw_compat_rhel_9_4,
+ hw_compat_rhel_9_4_len);
+ compat_props_add(m->compat_props, hw_compat_rhel_9_3,
+ hw_compat_rhel_9_3_len);
+ compat_props_add(m->compat_props, pc_rhel_9_3_compat,
+ pc_rhel_9_3_compat_len);
+ compat_props_add(m->compat_props, hw_compat_rhel_9_2,
+ hw_compat_rhel_9_2_len);
+ compat_props_add(m->compat_props, pc_rhel_9_2_compat,
+ pc_rhel_9_2_compat_len);
+}
+
+DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
@ -619,10 +702,10 @@ index dc0ba5f9e7..98601bb76f 100644
+DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760,
+ pc_q35_machine_rhel760_options);
diff --git a/include/hw/boards.h b/include/hw/boards.h
index c5a965d27f..5e7446ee40 100644
index 4a21eddbf9..4edfdb0ddb 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -268,6 +268,8 @@ struct MachineClass {
@@ -277,6 +277,8 @@ struct MachineClass {
strList *allowed_dynamic_sysbus_devices;
bool auto_enable_numa_with_memhp;
bool auto_enable_numa_with_memdev;
@ -632,16 +715,22 @@ index c5a965d27f..5e7446ee40 100644
bool smbus_no_migration_support;
bool nvdimm_supported;
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 908a275736..4376f64a47 100644
index 037942d233..37644ede7e 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -293,6 +293,33 @@ extern const size_t pc_compat_1_4_len;
@@ -314,6 +314,39 @@ extern const size_t pc_compat_1_4_len;
int pc_machine_kvm_type(MachineState *machine, const char *vm_type);
+extern GlobalProperty pc_rhel_compat[];
+extern const size_t pc_rhel_compat_len;
+
+extern GlobalProperty pc_rhel_9_3_compat[];
+extern const size_t pc_rhel_9_3_compat_len;
+
+extern GlobalProperty pc_rhel_9_2_compat[];
+extern const size_t pc_rhel_9_2_compat_len;
+
+extern GlobalProperty pc_rhel_9_0_compat[];
+extern const size_t pc_rhel_9_0_compat_len;
+
@ -670,10 +759,10 @@ index 908a275736..4376f64a47 100644
static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \
{ \
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 6576287e5b..0ef2bf1b93 100644
index cd16cb893d..93203d9b91 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1834,9 +1834,13 @@ static const CPUCaches epyc_milan_cache_info = {
@@ -2190,9 +2190,13 @@ static const CPUCaches epyc_genoa_cache_info = {
* PT in VMX operation
*/
@ -687,7 +776,7 @@ index 6576287e5b..0ef2bf1b93 100644
.level = 0xd,
.vendor = CPUID_VENDOR_AMD,
.family = 15,
@@ -1857,6 +1861,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2213,6 +2217,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "phenom",
@ -695,7 +784,7 @@ index 6576287e5b..0ef2bf1b93 100644
.level = 5,
.vendor = CPUID_VENDOR_AMD,
.family = 16,
@@ -1889,6 +1894,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2245,6 +2250,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "core2duo",
@ -703,7 +792,7 @@ index 6576287e5b..0ef2bf1b93 100644
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -1931,6 +1937,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2287,6 +2293,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "kvm64",
@ -711,7 +800,7 @@ index 6576287e5b..0ef2bf1b93 100644
.level = 0xd,
.vendor = CPUID_VENDOR_INTEL,
.family = 15,
@@ -1972,6 +1979,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2328,6 +2335,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "qemu32",
@ -719,7 +808,7 @@ index 6576287e5b..0ef2bf1b93 100644
.level = 4,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -1986,6 +1994,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2342,6 +2350,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "kvm32",
@ -727,7 +816,7 @@ index 6576287e5b..0ef2bf1b93 100644
.level = 5,
.vendor = CPUID_VENDOR_INTEL,
.family = 15,
@@ -2016,6 +2025,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2372,6 +2381,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "coreduo",
@ -735,7 +824,7 @@ index 6576287e5b..0ef2bf1b93 100644
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -2049,6 +2059,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2405,6 +2415,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "486",
@ -743,7 +832,7 @@ index 6576287e5b..0ef2bf1b93 100644
.level = 1,
.vendor = CPUID_VENDOR_INTEL,
.family = 4,
@@ -2061,6 +2072,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2417,6 +2428,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "pentium",
@ -751,7 +840,7 @@ index 6576287e5b..0ef2bf1b93 100644
.level = 1,
.vendor = CPUID_VENDOR_INTEL,
.family = 5,
@@ -2073,6 +2085,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2429,6 +2441,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "pentium2",
@ -759,7 +848,7 @@ index 6576287e5b..0ef2bf1b93 100644
.level = 2,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -2085,6 +2098,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2441,6 +2454,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "pentium3",
@ -767,7 +856,7 @@ index 6576287e5b..0ef2bf1b93 100644
.level = 3,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -2097,6 +2111,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2453,6 +2467,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "athlon",
@ -775,7 +864,7 @@ index 6576287e5b..0ef2bf1b93 100644
.level = 2,
.vendor = CPUID_VENDOR_AMD,
.family = 6,
@@ -2112,6 +2127,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2468,6 +2483,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "n270",
@ -783,7 +872,7 @@ index 6576287e5b..0ef2bf1b93 100644
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -2137,6 +2153,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2493,6 +2509,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Conroe",
@ -791,7 +880,7 @@ index 6576287e5b..0ef2bf1b93 100644
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -2177,6 +2194,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2533,6 +2550,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Penryn",
@ -799,7 +888,7 @@ index 6576287e5b..0ef2bf1b93 100644
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -3893,6 +3911,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -4394,6 +4412,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Opteron_G1",
@ -807,7 +896,7 @@ index 6576287e5b..0ef2bf1b93 100644
.level = 5,
.vendor = CPUID_VENDOR_AMD,
.family = 15,
@@ -3913,6 +3932,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -4414,6 +4433,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Opteron_G2",
@ -815,7 +904,7 @@ index 6576287e5b..0ef2bf1b93 100644
.level = 5,
.vendor = CPUID_VENDOR_AMD,
.family = 15,
@@ -3935,6 +3955,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -4436,6 +4456,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Opteron_G3",
@ -824,10 +913,10 @@ index 6576287e5b..0ef2bf1b93 100644
.vendor = CPUID_VENDOR_AMD,
.family = 16,
diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
index 7237378a7d..7b8a3d5af0 100644
index 9c791b7b05..b91af5051f 100644
--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
@@ -137,6 +137,7 @@ static PropValue kvm_default_props[] = {
@@ -138,6 +138,7 @@ static PropValue kvm_default_props[] = {
{ "acpi", "off" },
{ "monitor", "off" },
{ "svm", "off" },
@ -836,10 +925,10 @@ index 7237378a7d..7b8a3d5af0 100644
};
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index de531842f6..8d82304609 100644
index 4ce80555b4..9d41edf01e 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -3822,6 +3822,7 @@ static int kvm_get_msrs(X86CPU *cpu)
@@ -3711,6 +3711,7 @@ static int kvm_get_msrs(X86CPU *cpu)
struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries;
int ret, i;
uint64_t mtrr_top_bits;
@ -847,7 +936,7 @@ index de531842f6..8d82304609 100644
kvm_msr_buf_reset(cpu);
@@ -4177,6 +4178,9 @@ static int kvm_get_msrs(X86CPU *cpu)
@@ -4065,6 +4066,9 @@ static int kvm_get_msrs(X86CPU *cpu)
break;
case MSR_KVM_ASYNC_PF_EN:
env->async_pf_en_msr = msrs[i].data;
@ -881,5 +970,5 @@ index 78f1cf8186..ac954c9b06 100644
val = qtest_inb(qts, 0x505);
g_assert_cmpuint(val, ==, 3);
--
2.39.1
2.39.3

@ -1,4 +1,4 @@
From 738db8353055eb6fd902513949c6659af8b401d0 Mon Sep 17 00:00:00 2001
From 66a0510405e5142a1f9e38e0770aa0f10aed3e03 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 2 Sep 2020 09:39:41 +0200
Subject: Enable make check
@ -24,43 +24,49 @@ Rebase changes (7.0.0):
- Remove unnecessary changes in iotest 051
- Remove changes in bios-tables-test.c and prom-env-test.c qtests
Rebase changes (7.1.0 rc0):
Rebase changes (7.1.0):
- Disable bcm2835-dma-test (added upstream)
Rebase changes (8.0.0-rc1):
Rebase changes (8.0.0):
- Removed chunks for disabling bios-table-test (protected upstream)
Rebase change (8.0.0-rc2):
- Disable new qemu-iotests execution
- Revert change in tco qtest (blocking test run)
Rebase changes (8.1.0):
- Do not disable device-plug-test for s390x
Rebase changes (8.2.0 rc1):
- Remove unneeded hack in qtest/usb-hcd-xhci-test.c
Merged patches (6.1.0):
- 2f129df7d3 redhat: Enable the 'test-block-iothread' test again
Merged patches (7.1.0 rc0):
Merged patches (7.1.0):
- 64d736640e RHEL-only: tests/avocado: Switch aarch64 tests from a53 to a57
Merged patches (8.1.0):
- f468163234 iotests: Use alternative CPU type that is not deprecated in RHEL
---
.distro/qemu-kvm.spec.template | 4 ++--
tests/avocado/replay_kernel.py | 2 +-
tests/avocado/reverse_debugging.py | 2 +-
tests/avocado/tcg_plugins.py | 6 ++---
tests/qemu-iotests/meson.build | 34 ++++++++++++++---------------
tests/qemu-iotests/testenv.py | 3 +++
tests/qtest/fuzz-e1000e-test.c | 2 +-
tests/qtest/fuzz-virtio-scsi-test.c | 2 +-
tests/qtest/intel-hda-test.c | 2 +-
tests/qtest/libqos/meson.build | 2 +-
tests/qtest/lpc-ich9-test.c | 2 +-
tests/qtest/meson.build | 2 --
tests/qtest/tco-test.c | 2 +-
tests/qtest/usb-hcd-xhci-test.c | 4 ++++
tests/qtest/meson.build | 1 -
tests/qtest/virtio-net-failover.c | 1 +
14 files changed, 35 insertions(+), 32 deletions(-)
13 files changed, 33 insertions(+), 30 deletions(-)
diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py
index f13456e1ec..2fee270a42 100644
index c37afa662c..61c95a2198 100644
--- a/tests/avocado/replay_kernel.py
+++ b/tests/avocado/replay_kernel.py
@@ -147,7 +147,7 @@ def test_aarch64_virt(self):
@@ -153,7 +153,7 @@ def test_aarch64_virt(self):
"""
:avocado: tags=arch:aarch64
:avocado: tags=machine:virt
@ -70,10 +76,10 @@ index f13456e1ec..2fee270a42 100644
kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora'
'/linux/releases/29/Everything/aarch64/os/images/pxeboot'
diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py
index 680c314cfc..71eccb8fb6 100644
index 4cce5a5598..e9248a04a2 100644
--- a/tests/avocado/reverse_debugging.py
+++ b/tests/avocado/reverse_debugging.py
@@ -206,7 +206,7 @@ def test_aarch64_virt(self):
@@ -230,7 +230,7 @@ def test_aarch64_virt(self):
"""
:avocado: tags=arch:aarch64
:avocado: tags=machine:virt
@ -83,10 +89,10 @@ index 680c314cfc..71eccb8fb6 100644
kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora'
'/linux/releases/29/Everything/aarch64/os/images/pxeboot'
diff --git a/tests/avocado/tcg_plugins.py b/tests/avocado/tcg_plugins.py
index 642d2e49e3..93b3afd823 100644
index 15fd87b2c1..f0d9d89c93 100644
--- a/tests/avocado/tcg_plugins.py
+++ b/tests/avocado/tcg_plugins.py
@@ -68,7 +68,7 @@ def test_aarch64_virt_insn(self):
@@ -66,7 +66,7 @@ def test_aarch64_virt_insn(self):
:avocado: tags=accel:tcg
:avocado: tags=arch:aarch64
:avocado: tags=machine:virt
@ -95,7 +101,7 @@ index 642d2e49e3..93b3afd823 100644
"""
kernel_path = self._grab_aarch64_kernel()
kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
@@ -94,7 +94,7 @@ def test_aarch64_virt_insn_icount(self):
@@ -96,7 +96,7 @@ def test_aarch64_virt_insn_icount(self):
:avocado: tags=accel:tcg
:avocado: tags=arch:aarch64
:avocado: tags=machine:virt
@ -104,7 +110,7 @@ index 642d2e49e3..93b3afd823 100644
"""
kernel_path = self._grab_aarch64_kernel()
kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
@@ -120,7 +120,7 @@ def test_aarch64_virt_mem_icount(self):
@@ -126,7 +126,7 @@ def test_aarch64_virt_mem_icount(self):
:avocado: tags=accel:tcg
:avocado: tags=arch:aarch64
:avocado: tags=machine:virt
@ -114,7 +120,7 @@ index 642d2e49e3..93b3afd823 100644
kernel_path = self._grab_aarch64_kernel()
kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
diff --git a/tests/qemu-iotests/meson.build b/tests/qemu-iotests/meson.build
index 9735071a29..32002335f4 100644
index 53847cb98f..a2abdb650e 100644
--- a/tests/qemu-iotests/meson.build
+++ b/tests/qemu-iotests/meson.build
@@ -51,21 +51,21 @@ foreach format, speed: qemu_iotests_formats
@ -156,6 +162,20 @@ index 9735071a29..32002335f4 100644
+# suite: suites)
+# endforeach
endforeach
diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py
index 3ff38f2661..cab9a2bd6c 100644
--- a/tests/qemu-iotests/testenv.py
+++ b/tests/qemu-iotests/testenv.py
@@ -244,6 +244,9 @@ def __init__(self, source_dir: str, build_dir: str,
if self.qemu_prog.endswith(f'qemu-system-{suffix}'):
self.qemu_options += f' -machine {machine}'
+ if self.qemu_prog.endswith('qemu-system-x86_64'):
+ self.qemu_options += ' -cpu Nehalem'
+
# QEMU_DEFAULT_MACHINE
self.qemu_default_machine = get_default_machine(self.qemu_prog)
diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c
index 5052883fb6..b5286f4b12 100644
--- a/tests/qtest/fuzz-e1000e-test.c
@ -183,20 +203,20 @@ index e37b48b2cc..88647da054 100644
qtest_outl(s, 0xcf8, 0x80001811);
diff --git a/tests/qtest/intel-hda-test.c b/tests/qtest/intel-hda-test.c
index d4a8db6fd6..1a796ec15a 100644
index 663bb6c485..2efc43e3f7 100644
--- a/tests/qtest/intel-hda-test.c
+++ b/tests/qtest/intel-hda-test.c
@@ -38,7 +38,7 @@ static void test_issue542_ich6(void)
@@ -42,7 +42,7 @@ static void test_issue542_ich6(void)
{
QTestState *s;
- s = qtest_init("-nographic -nodefaults -M pc-q35-6.2 "
+ s = qtest_init("-nographic -nodefaults -M pc-q35-rhel9.0.0 "
AUDIODEV
"-device intel-hda,id=" HDA_ID CODEC_DEVICES);
qtest_outl(s, 0xcf8, 0x80000804);
diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build
index cc209a8de5..42a7c529c9 100644
index 90aae42a22..9bc4e41af0 100644
--- a/tests/qtest/libqos/meson.build
+++ b/tests/qtest/libqos/meson.build
@@ -44,7 +44,7 @@ libqos_srcs = files(
@ -206,8 +226,8 @@ index cc209a8de5..42a7c529c9 100644
- 'virtio-iommu.c',
+# 'virtio-iommu.c',
'virtio-gpio.c',
'virtio-scmi.c',
'generic-pcihost.c',
diff --git a/tests/qtest/lpc-ich9-test.c b/tests/qtest/lpc-ich9-test.c
index 8ac95b89f7..cd2102555c 100644
--- a/tests/qtest/lpc-ich9-test.c
@ -222,10 +242,10 @@ index 8ac95b89f7..cd2102555c 100644
qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 85ea4e8d99..893afc8eeb 100644
index 47dabf91d0..0bdfa3a821 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -94,7 +94,6 @@ qtests_i386 = \
@@ -97,7 +97,6 @@ qtests_i386 = \
'drive_del-test',
'tco-test',
'cpu-plug-test',
@ -233,62 +253,11 @@ index 85ea4e8d99..893afc8eeb 100644
'vmgenid-test',
'migration-test',
'test-x86-cpuid-compat',
@@ -223,7 +222,6 @@ qtests_s390x = \
(config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \
['boot-serial-test',
'drive_del-test',
- 'device-plug-test',
'virtio-ccw-test',
'cpu-plug-test',
'migration-test']
diff --git a/tests/qtest/tco-test.c b/tests/qtest/tco-test.c
index 0547d41173..3756ce82d8 100644
--- a/tests/qtest/tco-test.c
+++ b/tests/qtest/tco-test.c
@@ -60,7 +60,7 @@ static void test_init(TestData *d)
QTestState *qs;
qs = qtest_initf("-machine q35 %s %s",
- d->noreboot ? "-global ICH9-LPC.noreboot=true" : "",
+ d->noreboot ? "" : "-global ICH9-LPC.noreboot=false",
!d->args ? "" : d->args);
qtest_irq_intercept_in(qs, "ioapic");
diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c
index 10ef9d2a91..3855873050 100644
--- a/tests/qtest/usb-hcd-xhci-test.c
+++ b/tests/qtest/usb-hcd-xhci-test.c
@@ -21,6 +21,7 @@ static void test_xhci_hotplug(void)
usb_test_hotplug(global_qtest, "xhci", "1", NULL);
}
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static void test_usb_uas_hotplug(void)
{
QTestState *qts = global_qtest;
@@ -36,6 +37,7 @@ static void test_usb_uas_hotplug(void)
qtest_qmp_device_del(qts, "scsihd");
qtest_qmp_device_del(qts, "uas");
}
+#endif
static void test_usb_ccid_hotplug(void)
{
@@ -56,7 +58,9 @@ int main(int argc, char **argv)
qtest_add_func("/xhci/pci/init", test_xhci_init);
qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug);
+#if 0 /* Disabled for Red Hat Enterprise Linux */
qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug);
+#endif
qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug);
qtest_start("-device nec-usb-xhci,id=xhci"
diff --git a/tests/qtest/virtio-net-failover.c b/tests/qtest/virtio-net-failover.c
index 4a809590bf..1bf3fa641c 100644
index 0d40bc1f2d..4c633c1584 100644
--- a/tests/qtest/virtio-net-failover.c
+++ b/tests/qtest/virtio-net-failover.c
@@ -25,6 +25,7 @@
@@ -26,6 +26,7 @@
#define PCI_SEL_BASE 0x0010
#define BASE_MACHINE "-M q35 -nodefaults " \
@ -297,5 +266,5 @@ index 4a809590bf..1bf3fa641c 100644
"-device pcie-root-port,id=root1,addr=0x2,bus=pcie.0,chassis=2 "
--
2.39.1
2.39.3

@ -1,4 +1,4 @@
From 34cb4f7ddd762ec46ed1a6a4261aebde39360ca4 Mon Sep 17 00:00:00 2001
From a27cfa0b407bd806ce389a7c69d0130bcfd35244 Mon Sep 17 00:00:00 2001
From: Bandan Das <bsd@redhat.com>
Date: Tue, 3 Dec 2013 20:05:13 +0100
Subject: vfio: cap number of devices that can be assigned
@ -26,13 +26,16 @@ Count of slots increased to 509 later so we could increase limit
to 64 as some usecases require more than 32 devices.
Signed-off-by: Bandan Das <bsd@redhat.com>
Rebase changes (231025):
- Update to upstream changes
---
hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++-
hw/vfio/pci.c | 31 ++++++++++++++++++++++++++++++-
hw/vfio/pci.h | 1 +
2 files changed, 29 insertions(+), 1 deletion(-)
2 files changed, 31 insertions(+), 1 deletion(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index ec9a854361..a779053be3 100644
index c62c02f7b6..ec98080f28 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -48,6 +48,9 @@
@ -45,13 +48,20 @@ index ec9a854361..a779053be3 100644
static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
static void vfio_msi_disable_common(VFIOPCIDevice *vdev);
@@ -2854,9 +2857,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
ssize_t len;
@@ -3076,14 +3079,37 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
{
VFIOPCIDevice *vdev = VFIO_PCI(pdev);
VFIODevice *vbasedev = &vdev->vbasedev;
+ VFIODevice *vbasedev_iter;
+ VFIOGroup *group;
char *tmp, *subsys;
Error *err = NULL;
struct stat st;
int groupid;
- int i, ret;
+ int ret, i = 0;
bool is_mdev;
char uuid[UUID_STR_LEN];
char *name;
+ if (device_limit && device_limit != vdev->assigned_device_limit) {
+ error_setg(errp, "Assigned device limit has been redefined. "
@ -77,7 +87,7 @@ index ec9a854361..a779053be3 100644
if (!vbasedev->sysfsdev) {
if (!(~vdev->host.domain || ~vdev->host.bus ||
~vdev->host.slot || ~vdev->host.function)) {
@@ -3294,6 +3318,9 @@ static Property vfio_pci_dev_properties[] = {
@@ -3501,6 +3527,9 @@ static Property vfio_pci_dev_properties[] = {
DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice,
no_geforce_quirks, false),
@ -88,10 +98,10 @@ index ec9a854361..a779053be3 100644
false),
DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd,
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 177abcc8fb..45235d38ba 100644
index fba8737ab2..eb74d9de2d 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -140,6 +140,7 @@ struct VFIOPCIDevice {
@@ -142,6 +142,7 @@ struct VFIOPCIDevice {
EventNotifier err_notifier;
EventNotifier req_notifier;
int (*resetfn)(struct VFIOPCIDevice *);
@ -100,5 +110,5 @@ index 177abcc8fb..45235d38ba 100644
uint32_t device_id;
uint32_t sub_vendor_id;
--
2.39.1
2.39.3

@ -1,4 +1,4 @@
From 8964a3e8835992442902d35b011a708787366d82 Mon Sep 17 00:00:00 2001
From 424f14d123fe1043518758605d94ed5ba50e52ad Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Wed, 4 Dec 2013 18:53:17 +0100
Subject: Add support statement to -help output
@ -17,14 +17,14 @@ as unsupported by Red Hat, and advising users to use libvirt instead.
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
---
softmmu/vl.c | 9 +++++++++
system/vl.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/softmmu/vl.c b/softmmu/vl.c
index ea20b23e4c..ad4173138d 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -834,9 +834,17 @@ static void version(void)
diff --git a/system/vl.c b/system/vl.c
index 2bcd9efb9a..93635ffc5b 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -870,9 +870,17 @@ static void version(void)
QEMU_COPYRIGHT "\n");
}
@ -42,7 +42,7 @@ index ea20b23e4c..ad4173138d 100644
printf("usage: %s [options] [disk_image]\n\n"
"'disk_image' is a raw hard disk image for IDE hard disk 0\n\n",
g_get_prgname());
@@ -862,6 +870,7 @@ static void help(int exitcode)
@@ -898,6 +906,7 @@ static void help(int exitcode)
"\n"
QEMU_HELP_BOTTOM "\n");
@ -51,5 +51,5 @@ index ea20b23e4c..ad4173138d 100644
}
--
2.39.1
2.39.3

@ -1,4 +1,4 @@
From 0b72d348fa0714de641ee242e5cee97df006e8fd Mon Sep 17 00:00:00 2001
From c683ff4a770b77dbe707413840918a46f67fa825 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 8 Jul 2020 08:35:50 +0200
Subject: Use qemu-kvm in documentation instead of qemu-system-<arch>
@ -36,10 +36,10 @@ index 52d6454b93..d74dbdeca9 100644
.. |I2C| replace:: I\ :sup:`2`\ C
.. |I2S| replace:: I\ :sup:`2`\ S
diff --git a/qemu-options.hx b/qemu-options.hx
index 59bdf67a2c..52b49f1f6a 100644
index 42fd09e4de..557118cb1f 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3296,11 +3296,11 @@ SRST
@@ -3469,11 +3469,11 @@ SRST
::
@ -57,5 +57,5 @@ index 59bdf67a2c..52b49f1f6a 100644
``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]``
Establish a vhost-vdpa netdev.
--
2.39.1
2.39.3

@ -1,4 +1,4 @@
From bd6bcebfd783fa49e283d035d378fb5240423d84 Mon Sep 17 00:00:00 2001
From 776bff1be5e98982a9bbc8345ff27274ff5b8c0f Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 20 Aug 2021 18:25:12 +0200
Subject: qcow2: Deprecation warning when opening v2 images rw
@ -44,10 +44,10 @@ Rebase notes (6.1.0):
2 files changed, 7 insertions(+)
diff --git a/block/qcow2.c b/block/qcow2.c
index 30fd53fa64..22084730f9 100644
index 13e032bd5e..7968735346 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1337,6 +1337,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
@@ -1358,6 +1358,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
ret = -ENOTSUP;
goto fail;
}
@ -61,7 +61,7 @@ index 30fd53fa64..22084730f9 100644
s->qcow_version = header.version;
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index 6b32c7fbfa..6ddda2ee64 100644
index 2846c83808..83472953a2 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -83,6 +83,7 @@ _filter_qemu()
@ -73,5 +73,5 @@ index 6b32c7fbfa..6ddda2ee64 100644
}
--
2.39.1
2.39.3

@ -0,0 +1,44 @@
From 3b9b38339346ebfaf3e8ddf0822eba1cc9e78408 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Thu, 14 Dec 2023 04:42:01 -0500
Subject: Introduce RHEL 9.4.0 qemu-kvm machine type for aarch64
Jira: https://issues.redhat.com/browse/RHEL-17168
Adding new machine type to support enabling new features.
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
hw/arm/virt.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index c541efee5e..0b17c94ad7 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3630,14 +3630,21 @@ static void rhel_machine_init(void)
}
type_init(rhel_machine_init);
+static void rhel940_virt_options(MachineClass *mc)
+{
+}
+DEFINE_RHEL_MACHINE_AS_LATEST(9, 4, 0)
+
static void rhel920_virt_options(MachineClass *mc)
{
+ rhel940_virt_options(mc);
+
compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len);
compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len);
compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
}
-DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0)
+DEFINE_RHEL_MACHINE(9, 2, 0)
static void rhel900_virt_options(MachineClass *mc)
{
--
2.39.3

@ -1,53 +0,0 @@
From 78a42cf27aa519bb71214443ab570b40e156fa9c Mon Sep 17 00:00:00 2001
From: Kfir Manor <kfir@daynix.com>
Date: Sun, 22 Jan 2023 17:33:07 +0200
Subject: qga/linux: add usb support to guest-get-fsinfo
RH-Author: Kostiantyn Kostiuk <kkostiuk@redhat.com>
RH-MergeRequest: 140: qga/linux: add usb support to guest-get-fsinfo
RH-Bugzilla: 2149191
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: yvugenfi <None>
RH-Commit: [1/1] bae929a2d0d0ad20e7308ede69c26499fc2119c7 (kostyanf14/redhat_centos-stream_src_qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2149191
Upstream patch: https://patchew.org/QEMU/20230122153307.1050593-1-kfir@daynix.com/
Signed-off-by: Kfir Manor <kfir@daynix.com>
Reviewed-by: Konstantin Kostiuk <kkostiuk@redhat.com>
Signed-off-by: Konstantin Kostiuk <kkostiuk@redhat.com>
Patch-name: kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch
Patch-id: 72
Patch-present-in-specfile: True
---
qga/commands-posix.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 079689d79a..97754930c1 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -879,7 +879,9 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
g_str_equal(driver, "sym53c8xx") ||
g_str_equal(driver, "virtio-pci") ||
g_str_equal(driver, "ahci") ||
- g_str_equal(driver, "nvme"))) {
+ g_str_equal(driver, "nvme") ||
+ g_str_equal(driver, "xhci_hcd") ||
+ g_str_equal(driver, "ehci-pci"))) {
break;
}
@@ -976,6 +978,8 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
}
} else if (strcmp(driver, "nvme") == 0) {
disk->bus_type = GUEST_DISK_BUS_TYPE_NVME;
+ } else if (strcmp(driver, "ehci-pci") == 0 || strcmp(driver, "xhci_hcd") == 0) {
+ disk->bus_type = GUEST_DISK_BUS_TYPE_USB;
} else {
g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath);
goto cleanup;
--
2.39.1

@ -1,110 +0,0 @@
From bd5d81d2865c239ffea0fecf32476732149ad05c Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 15 Feb 2023 02:03:17 -0500
Subject: Add RHEL 9.2.0 compat structure
Adding compatibility bits necessary to keep 9.2.0 machine
types same after rebase to 8.0.
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
Rebase notes (8.0.0 rc4):
- Added migration.x-preempt-pre-7-2 compat)
---
hw/arm/virt.c | 1 +
hw/core/machine.c | 10 ++++++++++
hw/i386/pc_piix.c | 2 ++
hw/i386/pc_q35.c | 3 +++
hw/s390x/s390-virtio-ccw.c | 1 +
include/hw/boards.h | 3 +++
6 files changed, 20 insertions(+)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 1ae1654be5..9be53e9355 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3669,6 +3669,7 @@ type_init(rhel_machine_init);
static void rhel920_virt_options(MachineClass *mc)
{
compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
}
DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 5aa567fad3..0e0120b7f2 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -52,6 +52,16 @@ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
const char *rhel_old_machine_deprecation =
"machine types for previous major releases are deprecated";
+GlobalProperty hw_compat_rhel_9_2[] = {
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
+ { "e1000e", "migrate-timadj", "off" },
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
+ { "virtio-mem", "x-early-migration", "false" },
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
+ { "migration", "x-preempt-pre-7-2", "true" },
+};
+const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2);
+
/*
* Mostly the same as hw_compat_7_0
*/
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 3e330fd36f..90fb6e2e03 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -947,6 +947,8 @@ static void pc_machine_rhel760_options(MachineClass *m)
/* From pc_i440fx_5_1_machine_options() */
pcmc->pci_root_uid = 1;
pcmc->enforce_amd_1tb_hole = false;
+ compat_props_add(m->compat_props, hw_compat_rhel_9_2,
+ hw_compat_rhel_9_2_len);
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
hw_compat_rhel_9_1_len);
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 98601bb76f..8945b69175 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -701,6 +701,9 @@ static void pc_q35_machine_rhel920_options(MachineClass *m)
m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)";
pcmc->smbios_stream_product = "RHEL";
pcmc->smbios_stream_version = "9.2.0";
+
+ compat_props_add(m->compat_props, hw_compat_rhel_9_2,
+ hw_compat_rhel_9_2_len);
}
DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index dcd3b966b0..6a0b93c63d 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -1211,6 +1211,7 @@ static void ccw_machine_rhel920_instance_options(MachineState *machine)
static void ccw_machine_rhel920_class_options(MachineClass *mc)
{
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
}
DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true);
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 5e7446ee40..5f08bd7550 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -461,6 +461,9 @@ extern const size_t hw_compat_2_2_len;
extern GlobalProperty hw_compat_2_1[];
extern const size_t hw_compat_2_1_len;
+extern GlobalProperty hw_compat_rhel_9_2[];
+extern const size_t hw_compat_rhel_9_2_len;
+
extern GlobalProperty hw_compat_rhel_9_1[];
extern const size_t hw_compat_rhel_9_1_len;
--
2.39.1

@ -1,76 +0,0 @@
From c6eaf73adda2e87fe91c9a3836f45dd58a553e06 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Mon, 27 Mar 2023 15:14:03 +0200
Subject: redhat: hw/i386/pc: Update x86 machine type compatibility for QEMU
8.0.0 update
Add pc_rhel_9_2_compat based on upstream pc_compat_7_2.
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
hw/i386/pc.c | 6 ++++++
hw/i386/pc_piix.c | 2 ++
hw/i386/pc_q35.c | 2 ++
include/hw/i386/pc.h | 3 +++
4 files changed, 13 insertions(+)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 8abb1f872e..f216922cee 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -429,6 +429,12 @@ GlobalProperty pc_rhel_compat[] = {
};
const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat);
+GlobalProperty pc_rhel_9_2_compat[] = {
+ /* pc_rhel_9_2_compat from pc_compat_7_2 */
+ { "ICH9-LPC", "noreboot", "true" },
+};
+const size_t pc_rhel_9_2_compat_len = G_N_ELEMENTS(pc_rhel_9_2_compat);
+
GlobalProperty pc_rhel_9_0_compat[] = {
/* pc_rhel_9_0_compat from pc_compat_6_2 */
{ "virtio-mem", "unplugged-inaccessible", "off" },
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 90fb6e2e03..fc704d783f 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -949,6 +949,8 @@ static void pc_machine_rhel760_options(MachineClass *m)
pcmc->enforce_amd_1tb_hole = false;
compat_props_add(m->compat_props, hw_compat_rhel_9_2,
hw_compat_rhel_9_2_len);
+ compat_props_add(m->compat_props, pc_rhel_9_2_compat,
+ pc_rhel_9_2_compat_len);
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
hw_compat_rhel_9_1_len);
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 8945b69175..e97655616a 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -704,6 +704,8 @@ static void pc_q35_machine_rhel920_options(MachineClass *m)
compat_props_add(m->compat_props, hw_compat_rhel_9_2,
hw_compat_rhel_9_2_len);
+ compat_props_add(m->compat_props, pc_rhel_9_2_compat,
+ pc_rhel_9_2_compat_len);
}
DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 4376f64a47..d218ad1628 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -296,6 +296,9 @@ int pc_machine_kvm_type(MachineState *machine, const char *vm_type);
extern GlobalProperty pc_rhel_compat[];
extern const size_t pc_rhel_compat_len;
+extern GlobalProperty pc_rhel_9_2_compat[];
+extern const size_t pc_rhel_9_2_compat_len;
+
extern GlobalProperty pc_rhel_9_0_compat[];
extern const size_t pc_rhel_9_0_compat_len;
--
2.39.1

@ -1,83 +0,0 @@
From 8173d2eabaf77312d36b00c618f6770948b80593 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Mon, 17 Apr 2023 01:24:18 -0400
Subject: Disable unwanted new devices
QEMU 8.0 adds two new device we do not want to support that can't
be disabled using configure switch.
1) ide-cf - virtual CompactFlash card
2) i2c-echo - testing echo device
Use manual disabling of the device by changing code (1) and meson configs (2).
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
hw/ide/qdev.c | 9 +++++++++
hw/misc/meson.build | 3 ++-
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index 1b3b4da01d..454bfa5783 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -283,10 +283,13 @@ static void ide_cd_realize(IDEDevice *dev, Error **errp)
ide_dev_initfn(dev, IDE_CD, errp);
}
+/* Disabled for Red Hat Enterprise Linux */
+#if 0
static void ide_cf_realize(IDEDevice *dev, Error **errp)
{
ide_dev_initfn(dev, IDE_CFATA, errp);
}
+#endif
#define DEFINE_IDE_DEV_PROPERTIES() \
DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \
@@ -346,6 +349,8 @@ static const TypeInfo ide_cd_info = {
.class_init = ide_cd_class_init,
};
+/* Disabled for Red Hat Enterprise Linux */
+#if 0
static Property ide_cf_properties[] = {
DEFINE_IDE_DEV_PROPERTIES(),
DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf),
@@ -371,6 +376,7 @@ static const TypeInfo ide_cf_info = {
.instance_size = sizeof(IDEDrive),
.class_init = ide_cf_class_init,
};
+#endif
static void ide_device_class_init(ObjectClass *klass, void *data)
{
@@ -396,7 +402,10 @@ static void ide_register_types(void)
type_register_static(&ide_bus_info);
type_register_static(&ide_hd_info);
type_register_static(&ide_cd_info);
+/* Disabled for Red Hat Enterprise Linux */
+#if 0
type_register_static(&ide_cf_info);
+#endif
type_register_static(&ide_device_type_info);
}
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
index a40245ad44..9cc5a61ed7 100644
--- a/hw/misc/meson.build
+++ b/hw/misc/meson.build
@@ -128,7 +128,8 @@ softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_rng.c'))
softmmu_ss.add(when: 'CONFIG_GRLIB', if_true: files('grlib_ahb_apb_pnp.c'))
-softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c'))
+# Disabled for Red Hat Enterprise Linux
+# softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c'))
specific_ss.add(when: 'CONFIG_AVR_POWER', if_true: files('avr_power.c'))
--
2.39.1

@ -28,7 +28,7 @@ avocado_qemu tests:
The avocado_qemu tests can be executed by running the following avocado command:
avocado run -p qemu_bin=/usr/libexec/qemu-kvm /usr/lib64/qemu-kvm/tests/acceptance/
Avocado needs to be installed separately using either pip or from source as
Avocado is not being packaged for RHEL-8.
Avocado is not being packaged for RHEL.
qemu-iotests:
symlinks to corresponding binaries need to be created for QEMU_PROG,
@ -36,4 +36,4 @@ QEMU_IO_PROG, QEMU_IMG_PROG, and QEMU_NBD_PROG before the iotests can be
executed.
The primary purpose of this package is to make these tests available to be
executed as gating tests for the virt module in the RHEL-8 OSCI environment.
executed as gating tests for the qemu-kvm in the RHEL OSCI environment.

@ -0,0 +1,37 @@
From 363d6aedc82314a70bdfbe9fa23b7e8fdda50138 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 11 Jan 2024 12:26:19 -0500
Subject: [PATCH 066/101] Compile IOMMUFD object on aarch64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 211: IOMMUFD backend backport
RH-Jira: RHEL-19302 RHEL-21057
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
RH-Commit: [65/67] 9358030fdd499c5fe122dee3bb4f114966fac9c2 (eauger1/centos-qemu-kvm)
Upstream: RHEL only
Compiles the IOMMUFD object on aarch64 to be able to use
the IOMMUFD VFIO backend.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 +
1 file changed, 1 insertion(+)
diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
index aec1831199..b0191d3c69 100644
--- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
+++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
@@ -39,3 +39,4 @@ CONFIG_PXB=y
CONFIG_VHOST_VSOCK=y
CONFIG_VHOST_USER_VSOCK=y
CONFIG_VHOST_USER_FS=y
+CONFIG_IOMMUFD=y
--
2.39.3

@ -0,0 +1,37 @@
From c1e9ddf8d0ea6d358fcaa5cacd3a91920f36e73b Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 11 Jan 2024 12:33:17 -0500
Subject: [PATCH 067/101] Compile IOMMUFD on s390x
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 211: IOMMUFD backend backport
RH-Jira: RHEL-19302 RHEL-21057
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
RH-Commit: [66/67] d3004aafca2bb76d817ac99c3d65973b8fbd4557 (eauger1/centos-qemu-kvm)
Upstream: RHEL only
Compiles the IOMMUFD object on s390x to be able to use
the IOMMUFD VFIO backend.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
configs/devices/s390x-softmmu/s390x-rh-devices.mak | 1 +
1 file changed, 1 insertion(+)
diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak
index 69a799adbd..24cf6dbd03 100644
--- a/configs/devices/s390x-softmmu/s390x-rh-devices.mak
+++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak
@@ -16,3 +16,4 @@ CONFIG_WDT_DIAG288=y
CONFIG_VHOST_VSOCK=y
CONFIG_VHOST_USER_VSOCK=y
CONFIG_VHOST_USER_FS=y
+CONFIG_IOMMUFD=y
--
2.39.3

@ -0,0 +1,37 @@
From be2c3d9bbee1bdec061c901f507bc999fa40a53e Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 11 Jan 2024 12:34:44 -0500
Subject: [PATCH 068/101] Compile IOMMUFD on x86_64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 211: IOMMUFD backend backport
RH-Jira: RHEL-19302 RHEL-21057
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
RH-Commit: [67/67] 411d48a5cc7ce1f05be793fd9a89c143ce34c91a (eauger1/centos-qemu-kvm)
Upstream: RHEL only
Compiles the IOMMUFD object on s390x to be able to use
the IOMMUFD VFIO backend.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 +
1 file changed, 1 insertion(+)
diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
index ce5be73633..ba41108e0c 100644
--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
@@ -108,3 +108,4 @@ CONFIG_SGX=y
CONFIG_VHOST_VSOCK=y
CONFIG_VHOST_USER_VSOCK=y
CONFIG_VHOST_USER_FS=y
+CONFIG_IOMMUFD=y
--
2.39.3

@ -1,84 +0,0 @@
From 61256a82ce78f40222455becb8850b5f5ebb5d72 Mon Sep 17 00:00:00 2001
From: Igor Mammedov <imammedo@redhat.com>
Date: Tue, 18 Apr 2023 11:04:49 +0200
Subject: [PATCH 1/3] acpi: pcihp: allow repeating hot-unplug requests
RH-Author: Igor Mammedov <imammedo@redhat.com>
RH-MergeRequest: 159: acpi: pcihp: allow repeating hot-unplug requests
RH-Bugzilla: 2087047
RH-Acked-by: Ani Sinha <None>
RH-Acked-by: Julia Suvorova <None>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: MST <mst@redhat.com>
RH-Commit: [1/1] 9c597232466b27d91f127ee6004322d6ba69755f (imammedo/qemu-kvm-c-9-s-imam)
with Q35 using ACPI PCI hotplug by default, user's request to unplug
device is ignored when it's issued before guest OS has been booted.
And any additional attempt to request device hot-unplug afterwards
results in following error:
"Device XYZ is already in the process of unplug"
arguably it can be considered as a regression introduced by [2],
before which it was possible to issue unplug request multiple
times.
Accept new uplug requests after timeout (1ms). This brings ACPI PCI
hotplug on par with native PCIe unplug behavior [1] and allows user
to repeat unplug requests at propper times.
Set expire timeout to arbitrary 1msec so user won't be able to
flood guest with SCI interrupts by calling device_del in tight loop.
PS:
ACPI spec doesn't mandate what OSPM can do with GPEx.status
bits set before it's booted => it's impl. depended.
Status bits may be retained (I tested with one Windows version)
or cleared (Linux since 2.6 kernel times) during guest's ACPI
subsystem initialization.
Clearing status bits (though not wrong per se) hides the unplug
event from guest, and it's upto user to repeat device_del later
when guest is able to handle unplug requests.
1) 18416c62e3 ("pcie: expire pending delete")
2)
Fixes: cce8944cc9ef ("qdev-monitor: Forbid repeated device_del")
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
CC: mst@redhat.com
CC: anisinha@redhat.com
CC: jusual@redhat.com
CC: kraxel@redhat.com
Message-Id: <20230418090449.2155757-1-imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Ani Sinha <anisinha@redhat.com>
(cherry picked from commit 0f689cf5ada4d5df5ab95c7f7aa9fc221afa855d)
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
hw/acpi/pcihp.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index dcfb779a7a..cdd6f775a1 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -357,6 +357,16 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev,
* acpi_pcihp_eject_slot() when the operation is completed.
*/
pdev->qdev.pending_deleted_event = true;
+ /* if unplug was requested before OSPM is initialized,
+ * linux kernel will clear GPE0.sts[] bits during boot, which effectively
+ * hides unplug event. And than followup qmp_device_del() calls remain
+ * blocked by above flag permanently.
+ * Unblock qmp_device_del() by setting expire limit, so user can
+ * repeat unplug request later when OSPM has been booted.
+ */
+ pdev->qdev.pending_deleted_expires_ms =
+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); /* 1 msec */
+
s->acpi_pcihp_pci_status[bsel].down |= (1U << slot);
acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS);
}
--
2.39.1

@ -0,0 +1,60 @@
From 6b5cfed21e20b372090046a934387255ff4bda58 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 5 Dec 2023 13:20:01 -0500
Subject: [PATCH 084/101] aio: make aio_context_acquire()/aio_context_release()
a no-op
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 214: Remove AioContext lock
RH-Jira: RHEL-15965
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [15/26] 723dcada900aaf08862e8221921be22506b561a8 (kmwolf/centos-qemu-kvm)
aio_context_acquire()/aio_context_release() has been replaced by
fine-grained locking to protect state shared by multiple threads. The
AioContext lock still plays the role of balancing locking in
AIO_WAIT_WHILE() and many functions in QEMU either require that the
AioContext lock is held or not held for this reason. In other words, the
AioContext lock is purely there for consistency with itself and serves
no real purpose anymore.
Stop actually acquiring/releasing the lock in
aio_context_acquire()/aio_context_release() so that subsequent patches
can remove callers across the codebase incrementally.
I have performed "make check" and qemu-iotests stress tests across
x86-64, ppc64le, and aarch64 to confirm that there are no failures as a
result of eliminating the lock.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Acked-by: Kevin Wolf <kwolf@redhat.com>
Message-ID: <20231205182011.1976568-5-stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
util/async.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/util/async.c b/util/async.c
index 8f90ddc304..04ee83d220 100644
--- a/util/async.c
+++ b/util/async.c
@@ -725,12 +725,12 @@ void aio_context_unref(AioContext *ctx)
void aio_context_acquire(AioContext *ctx)
{
- qemu_rec_mutex_lock(&ctx->lock);
+ /* TODO remove this function */
}
void aio_context_release(AioContext *ctx)
{
- qemu_rec_mutex_unlock(&ctx->lock);
+ /* TODO remove this function */
}
QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext)
--
2.39.3

@ -0,0 +1,102 @@
From 14913d8970090c8914dc19dad14f3b9f91985ec3 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 5 Dec 2023 13:20:07 -0500
Subject: [PATCH 090/101] aio: remove
aio_context_acquire()/aio_context_release() API
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 214: Remove AioContext lock
RH-Jira: RHEL-15965
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [21/26] 4b6d4afcac79d3248a6722b063b5fc777dc418df (kmwolf/centos-qemu-kvm)
Delete these functions because nothing calls these functions anymore.
I introduced these APIs in commit 98563fc3ec44 ("aio: add
aio_context_acquire() and aio_context_release()") in 2014. It's with a
sigh of relief that I delete these APIs almost 10 years later.
Thanks to Paolo Bonzini's vision for multi-queue QEMU, we got an
understanding of where the code needed to go in order to remove the
limitations that the original dataplane and the IOThread/AioContext
approach that followed it.
Emanuele Giuseppe Esposito had the splendid determination to convert
large parts of the codebase so that they no longer needed the AioContext
lock. This was a painstaking process, both in the actual code changes
required and the iterations of code review that Emanuele eked out of
Kevin and me over many months.
Kevin Wolf tackled multitudes of graph locking conversions to protect
in-flight I/O from run-time changes to the block graph as well as the
clang Thread Safety Analysis annotations that allow the compiler to
check whether the graph lock is being used correctly.
And me, well, I'm just here to add some pizzazz to the QEMU multi-queue
block layer :). Thank you to everyone who helped with this effort,
including Eric Blake, code reviewer extraordinaire, and others who I've
forgotten to mention.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-ID: <20231205182011.1976568-11-stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
include/block/aio.h | 17 -----------------
util/async.c | 10 ----------
2 files changed, 27 deletions(-)
diff --git a/include/block/aio.h b/include/block/aio.h
index f08b358077..af05512a7d 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -278,23 +278,6 @@ void aio_context_ref(AioContext *ctx);
*/
void aio_context_unref(AioContext *ctx);
-/* Take ownership of the AioContext. If the AioContext will be shared between
- * threads, and a thread does not want to be interrupted, it will have to
- * take ownership around calls to aio_poll(). Otherwise, aio_poll()
- * automatically takes care of calling aio_context_acquire and
- * aio_context_release.
- *
- * Note that this is separate from bdrv_drained_begin/bdrv_drained_end. A
- * thread still has to call those to avoid being interrupted by the guest.
- *
- * Bottom halves, timers and callbacks can be created or removed without
- * acquiring the AioContext.
- */
-void aio_context_acquire(AioContext *ctx);
-
-/* Relinquish ownership of the AioContext. */
-void aio_context_release(AioContext *ctx);
-
/**
* aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will
* run only once and as soon as possible.
diff --git a/util/async.c b/util/async.c
index dfd44ef612..460529057c 100644
--- a/util/async.c
+++ b/util/async.c
@@ -719,16 +719,6 @@ void aio_context_unref(AioContext *ctx)
g_source_unref(&ctx->source);
}
-void aio_context_acquire(AioContext *ctx)
-{
- /* TODO remove this function */
-}
-
-void aio_context_release(AioContext *ctx)
-{
- /* TODO remove this function */
-}
-
QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext)
AioContext *qemu_get_current_aio_context(void)
--
2.39.3

@ -0,0 +1,81 @@
From e1e2f3972065c4b5d6fcf37e0e1c4fb92a0d5260 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 5 Dec 2023 13:20:06 -0500
Subject: [PATCH 089/101] aio-wait: draw equivalence between AIO_WAIT_WHILE()
and AIO_WAIT_WHILE_UNLOCKED()
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 214: Remove AioContext lock
RH-Jira: RHEL-15965
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [20/26] 20e49777869714c99769263103f1b0c2c370cfcd (kmwolf/centos-qemu-kvm)
Now that the AioContext lock no longer exists, AIO_WAIT_WHILE() and
AIO_WAIT_WHILE_UNLOCKED() are equivalent.
A future patch will get rid of AIO_WAIT_WHILE_UNLOCKED().
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-ID: <20231205182011.1976568-10-stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
include/block/aio-wait.h | 16 ++++------------
1 file changed, 4 insertions(+), 12 deletions(-)
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
index 5449b6d742..157f105916 100644
--- a/include/block/aio-wait.h
+++ b/include/block/aio-wait.h
@@ -63,9 +63,6 @@ extern AioWait global_aio_wait;
* @ctx: the aio context, or NULL if multiple aio contexts (for which the
* caller does not hold a lock) are involved in the polling condition.
* @cond: wait while this conditional expression is true
- * @unlock: whether to unlock and then lock again @ctx. This applies
- * only when waiting for another AioContext from the main loop.
- * Otherwise it's ignored.
*
* Wait while a condition is true. Use this to implement synchronous
* operations that require event loop activity.
@@ -78,7 +75,7 @@ extern AioWait global_aio_wait;
* wait on conditions between two IOThreads since that could lead to deadlock,
* go via the main loop instead.
*/
-#define AIO_WAIT_WHILE_INTERNAL(ctx, cond, unlock) ({ \
+#define AIO_WAIT_WHILE_INTERNAL(ctx, cond) ({ \
bool waited_ = false; \
AioWait *wait_ = &global_aio_wait; \
AioContext *ctx_ = (ctx); \
@@ -95,13 +92,7 @@ extern AioWait global_aio_wait;
assert(qemu_get_current_aio_context() == \
qemu_get_aio_context()); \
while ((cond)) { \
- if (unlock && ctx_) { \
- aio_context_release(ctx_); \
- } \
aio_poll(qemu_get_aio_context(), true); \
- if (unlock && ctx_) { \
- aio_context_acquire(ctx_); \
- } \
waited_ = true; \
} \
} \
@@ -109,10 +100,11 @@ extern AioWait global_aio_wait;
waited_; })
#define AIO_WAIT_WHILE(ctx, cond) \
- AIO_WAIT_WHILE_INTERNAL(ctx, cond, true)
+ AIO_WAIT_WHILE_INTERNAL(ctx, cond)
+/* TODO replace this with AIO_WAIT_WHILE() in a future patch */
#define AIO_WAIT_WHILE_UNLOCKED(ctx, cond) \
- AIO_WAIT_WHILE_INTERNAL(ctx, cond, false)
+ AIO_WAIT_WHILE_INTERNAL(ctx, cond)
/**
* aio_wait_kick:
--
2.39.3

@ -1,55 +0,0 @@
From 5beea8b889a38aa59259679d7f1ba050f09eb0f0 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 12/21] apic: disable reentrancy detection for apic-msi
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
RH-Jira: RHEL-516
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [8/13] 329f3b1c02fc42d85c821dd14c70e6b885cf849a (jmaloy/jmaloy-qemu-kvm-2)
Jira: https://issues.redhat.com/browse/RHEL-516
Upstream: Merged
CVE: CVE-2023-2680
commit 50795ee051a342c681a9b45671c552fbd6274db8
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Thu Apr 27 17:10:13 2023 -0400
apic: disable reentrancy detection for apic-msi
As the code is designed for re-entrant calls to apic-msi, mark apic-msi
as reentrancy-safe.
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Message-Id: <20230427211013.2994127-9-alxndr@bu.edu>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/intc/apic.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 20b5a94073..ac3d47d231 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -885,6 +885,13 @@ static void apic_realize(DeviceState *dev, Error **errp)
memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi",
APIC_SPACE_SIZE);
+ /*
+ * apic-msi's apic_mem_write can call into ioapic_eoi_broadcast, which can
+ * write back to apic-msi. As such mark the apic-msi region re-entrancy
+ * safe.
+ */
+ s->io_memory.disable_reentrancy_guard = true;
+
s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s);
local_apics[s->id] = s;
--
2.39.3

@ -1,231 +0,0 @@
From f6db359f543723e2eb840653d35004af357ea5ac Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 06/21] async: Add an optional reentrancy guard to the BH API
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
RH-Jira: RHEL-516
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [2/13] 009a9a68c1c25b9ad0cd9bc0d73b3e07bee2a19d (jmaloy/jmaloy-qemu-kvm-2)
Jira: https://issues.redhat.com/browse/RHEL-516
Upstream: Merged
CVE: CVE-2023-2680
commit 9c86c97f12c060bf7484dd931f38634e166a81f0
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Thu Apr 27 17:10:07 2023 -0400
async: Add an optional reentrancy guard to the BH API
Devices can pass their MemoryReentrancyGuard (from their DeviceState),
when creating new BHes. Then, the async API will toggle the guard
before/after calling the BH call-back. This prevents bh->mmio reentrancy
issues.
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Message-Id: <20230427211013.2994127-3-alxndr@bu.edu>
[thuth: Fix "line over 90 characters" checkpatch.pl error]
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
docs/devel/multiple-iothreads.txt | 7 +++++++
include/block/aio.h | 18 ++++++++++++++++--
include/qemu/main-loop.h | 7 +++++--
tests/unit/ptimer-test-stubs.c | 3 ++-
util/async.c | 18 +++++++++++++++++-
util/main-loop.c | 6 ++++--
util/trace-events | 1 +
7 files changed, 52 insertions(+), 8 deletions(-)
diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt
index 343120f2ef..a3e949f6b3 100644
--- a/docs/devel/multiple-iothreads.txt
+++ b/docs/devel/multiple-iothreads.txt
@@ -61,6 +61,7 @@ There are several old APIs that use the main loop AioContext:
* LEGACY qemu_aio_set_event_notifier() - monitor an event notifier
* LEGACY timer_new_ms() - create a timer
* LEGACY qemu_bh_new() - create a BH
+ * LEGACY qemu_bh_new_guarded() - create a BH with a device re-entrancy guard
* LEGACY qemu_aio_wait() - run an event loop iteration
Since they implicitly work on the main loop they cannot be used in code that
@@ -72,8 +73,14 @@ Instead, use the AioContext functions directly (see include/block/aio.h):
* aio_set_event_notifier() - monitor an event notifier
* aio_timer_new() - create a timer
* aio_bh_new() - create a BH
+ * aio_bh_new_guarded() - create a BH with a device re-entrancy guard
* aio_poll() - run an event loop iteration
+The qemu_bh_new_guarded/aio_bh_new_guarded APIs accept a "MemReentrancyGuard"
+argument, which is used to check for and prevent re-entrancy problems. For
+BHs associated with devices, the reentrancy-guard is contained in the
+corresponding DeviceState and named "mem_reentrancy_guard".
+
The AioContext can be obtained from the IOThread using
iothread_get_aio_context() or for the main loop using qemu_get_aio_context().
Code that takes an AioContext argument works both in IOThreads or the main
diff --git a/include/block/aio.h b/include/block/aio.h
index 543717f294..db6f23c619 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -23,6 +23,8 @@
#include "qemu/thread.h"
#include "qemu/timer.h"
#include "block/graph-lock.h"
+#include "hw/qdev-core.h"
+
typedef struct BlockAIOCB BlockAIOCB;
typedef void BlockCompletionFunc(void *opaque, int ret);
@@ -331,9 +333,11 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
* is opaque and must be allocated prior to its use.
*
* @name: A human-readable identifier for debugging purposes.
+ * @reentrancy_guard: A guard set when entering a cb to prevent
+ * device-reentrancy issues
*/
QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
- const char *name);
+ const char *name, MemReentrancyGuard *reentrancy_guard);
/**
* aio_bh_new: Allocate a new bottom half structure
@@ -342,7 +346,17 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
* string.
*/
#define aio_bh_new(ctx, cb, opaque) \
- aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)))
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL)
+
+/**
+ * aio_bh_new_guarded: Allocate a new bottom half structure with a
+ * reentrancy_guard
+ *
+ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name
+ * string.
+ */
+#define aio_bh_new_guarded(ctx, cb, opaque, guard) \
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard)
/**
* aio_notify: Force processing of pending events.
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index b3e54e00bc..68e70e61aa 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -387,9 +387,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms);
/* internal interfaces */
+#define qemu_bh_new_guarded(cb, opaque, guard) \
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard)
#define qemu_bh_new(cb, opaque) \
- qemu_bh_new_full((cb), (opaque), (stringify(cb)))
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name);
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL)
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
+ MemReentrancyGuard *reentrancy_guard);
void qemu_bh_schedule_idle(QEMUBH *bh);
enum {
diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c
index f2bfcede93..8c9407c560 100644
--- a/tests/unit/ptimer-test-stubs.c
+++ b/tests/unit/ptimer-test-stubs.c
@@ -107,7 +107,8 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
return deadline;
}
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
+ MemReentrancyGuard *reentrancy_guard)
{
QEMUBH *bh = g_new(QEMUBH, 1);
diff --git a/util/async.c b/util/async.c
index 21016a1ac7..a9b528c370 100644
--- a/util/async.c
+++ b/util/async.c
@@ -65,6 +65,7 @@ struct QEMUBH {
void *opaque;
QSLIST_ENTRY(QEMUBH) next;
unsigned flags;
+ MemReentrancyGuard *reentrancy_guard;
};
/* Called concurrently from any thread */
@@ -137,7 +138,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb,
}
QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
- const char *name)
+ const char *name, MemReentrancyGuard *reentrancy_guard)
{
QEMUBH *bh;
bh = g_new(QEMUBH, 1);
@@ -146,13 +147,28 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
.cb = cb,
.opaque = opaque,
.name = name,
+ .reentrancy_guard = reentrancy_guard,
};
return bh;
}
void aio_bh_call(QEMUBH *bh)
{
+ bool last_engaged_in_io = false;
+
+ if (bh->reentrancy_guard) {
+ last_engaged_in_io = bh->reentrancy_guard->engaged_in_io;
+ if (bh->reentrancy_guard->engaged_in_io) {
+ trace_reentrant_aio(bh->ctx, bh->name);
+ }
+ bh->reentrancy_guard->engaged_in_io = true;
+ }
+
bh->cb(bh->opaque);
+
+ if (bh->reentrancy_guard) {
+ bh->reentrancy_guard->engaged_in_io = last_engaged_in_io;
+ }
}
/* Multiple occurrences of aio_bh_poll cannot be called concurrently. */
diff --git a/util/main-loop.c b/util/main-loop.c
index e180c85145..7022f02ef8 100644
--- a/util/main-loop.c
+++ b/util/main-loop.c
@@ -605,9 +605,11 @@ void main_loop_wait(int nonblocking)
/* Functions to operate on the main QEMU AioContext. */
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
+ MemReentrancyGuard *reentrancy_guard)
{
- return aio_bh_new_full(qemu_aio_context, cb, opaque, name);
+ return aio_bh_new_full(qemu_aio_context, cb, opaque, name,
+ reentrancy_guard);
}
/*
diff --git a/util/trace-events b/util/trace-events
index 16f78d8fe5..3f7e766683 100644
--- a/util/trace-events
+++ b/util/trace-events
@@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d"
# async.c
aio_co_schedule(void *ctx, void *co) "ctx %p co %p"
aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p"
+reentrant_aio(void *ctx, const char *name) "ctx %p name %s"
# thread-pool.c
thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
--
2.39.3

@ -1,70 +0,0 @@
From 137e84f68da06666ebf7f391766cc6209ce1c39c Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 13/21] async: avoid use-after-free on re-entrancy guard
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
RH-Jira: RHEL-516
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [9/13] d4b957108aaacf4a597122aaeeaa8e56985f1fca (jmaloy/jmaloy-qemu-kvm-2)
Jira: https://issues.redhat.com/browse/RHEL-516
Upstream: Merged
CVE: CVE-2023-2680
commit 7915bd06f25e1803778081161bf6fa10c42dc7cd
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Mon May 1 10:19:56 2023 -0400
async: avoid use-after-free on re-entrancy guard
A BH callback can free the BH, causing a use-after-free in aio_bh_call.
Fix that by keeping a local copy of the re-entrancy guard pointer.
Buglink: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58513
Fixes: 9c86c97f12 ("async: Add an optional reentrancy guard to the BH API")
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Message-Id: <20230501141956.3444868-1-alxndr@bu.edu>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
util/async.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/util/async.c b/util/async.c
index a9b528c370..cd1a1815f9 100644
--- a/util/async.c
+++ b/util/async.c
@@ -156,18 +156,20 @@ void aio_bh_call(QEMUBH *bh)
{
bool last_engaged_in_io = false;
- if (bh->reentrancy_guard) {
- last_engaged_in_io = bh->reentrancy_guard->engaged_in_io;
- if (bh->reentrancy_guard->engaged_in_io) {
+ /* Make a copy of the guard-pointer as cb may free the bh */
+ MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard;
+ if (reentrancy_guard) {
+ last_engaged_in_io = reentrancy_guard->engaged_in_io;
+ if (reentrancy_guard->engaged_in_io) {
trace_reentrant_aio(bh->ctx, bh->name);
}
- bh->reentrancy_guard->engaged_in_io = true;
+ reentrancy_guard->engaged_in_io = true;
}
bh->cb(bh->opaque);
- if (bh->reentrancy_guard) {
- bh->reentrancy_guard->engaged_in_io = last_engaged_in_io;
+ if (reentrancy_guard) {
+ reentrancy_guard->engaged_in_io = last_engaged_in_io;
}
}
--
2.39.3

@ -0,0 +1,476 @@
From 0d8255c98b3ef6f603ff0279592d3e91de26de0e Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Tue, 21 Nov 2023 16:44:00 +0800
Subject: [PATCH 021/101] backends/iommufd: Introduce the iommufd object
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 211: IOMMUFD backend backport
RH-Jira: RHEL-19302 RHEL-21057
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
RH-Commit: [20/67] 8a56344ab4a2126f248bfa492ccddd19265f39be (eauger1/centos-qemu-kvm)
Introduce an iommufd object which allows the interaction
with the host /dev/iommu device.
The /dev/iommu can have been already pre-opened outside of qemu,
in which case the fd can be passed directly along with the
iommufd object:
This allows the iommufd object to be shared accross several
subsystems (VFIO, VDPA, ...). For example, libvirt would open
the /dev/iommu once.
If no fd is passed along with the iommufd object, the /dev/iommu
is opened by the qemu code.
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
(cherry picked from commit 6e6d8ac62b5b38dc9d4b69ffdf073f0a0b43b7be)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
MAINTAINERS | 8 ++
backends/Kconfig | 4 +
backends/iommufd.c | 245 +++++++++++++++++++++++++++++++++++++++
backends/meson.build | 1 +
backends/trace-events | 10 ++
include/sysemu/iommufd.h | 38 ++++++
qapi/qom.json | 19 +++
qemu-options.hx | 12 ++
8 files changed, 337 insertions(+)
create mode 100644 backends/iommufd.c
create mode 100644 include/sysemu/iommufd.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 695e0bd34f..a5a446914a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2167,6 +2167,14 @@ F: hw/vfio/ap.c
F: docs/system/s390x/vfio-ap.rst
L: qemu-s390x@nongnu.org
+iommufd
+M: Yi Liu <yi.l.liu@intel.com>
+M: Eric Auger <eric.auger@redhat.com>
+M: Zhenzhong Duan <zhenzhong.duan@intel.com>
+S: Supported
+F: backends/iommufd.c
+F: include/sysemu/iommufd.h
+
vhost
M: Michael S. Tsirkin <mst@redhat.com>
S: Supported
diff --git a/backends/Kconfig b/backends/Kconfig
index f35abc1609..2cb23f62fa 100644
--- a/backends/Kconfig
+++ b/backends/Kconfig
@@ -1 +1,5 @@
source tpm/Kconfig
+
+config IOMMUFD
+ bool
+ depends on VFIO
diff --git a/backends/iommufd.c b/backends/iommufd.c
new file mode 100644
index 0000000000..ba58a0eb0d
--- /dev/null
+++ b/backends/iommufd.c
@@ -0,0 +1,245 @@
+/*
+ * iommufd container backend
+ *
+ * Copyright (C) 2023 Intel Corporation.
+ * Copyright Red Hat, Inc. 2023
+ *
+ * Authors: Yi Liu <yi.l.liu@intel.com>
+ * Eric Auger <eric.auger@redhat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/iommufd.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/module.h"
+#include "qom/object_interfaces.h"
+#include "qemu/error-report.h"
+#include "monitor/monitor.h"
+#include "trace.h"
+#include <sys/ioctl.h>
+#include <linux/iommufd.h>
+
+static void iommufd_backend_init(Object *obj)
+{
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
+
+ be->fd = -1;
+ be->users = 0;
+ be->owned = true;
+ qemu_mutex_init(&be->lock);
+}
+
+static void iommufd_backend_finalize(Object *obj)
+{
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
+
+ if (be->owned) {
+ close(be->fd);
+ be->fd = -1;
+ }
+}
+
+static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp)
+{
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
+ int fd = -1;
+
+ fd = monitor_fd_param(monitor_cur(), str, errp);
+ if (fd == -1) {
+ error_prepend(errp, "Could not parse remote object fd %s:", str);
+ return;
+ }
+ qemu_mutex_lock(&be->lock);
+ be->fd = fd;
+ be->owned = false;
+ qemu_mutex_unlock(&be->lock);
+ trace_iommu_backend_set_fd(be->fd);
+}
+
+static bool iommufd_backend_can_be_deleted(UserCreatable *uc)
+{
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(uc);
+
+ return !be->users;
+}
+
+static void iommufd_backend_class_init(ObjectClass *oc, void *data)
+{
+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+ ucc->can_be_deleted = iommufd_backend_can_be_deleted;
+
+ object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd);
+}
+
+int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
+{
+ int fd, ret = 0;
+
+ qemu_mutex_lock(&be->lock);
+ if (be->users == UINT32_MAX) {
+ error_setg(errp, "too many connections");
+ ret = -E2BIG;
+ goto out;
+ }
+ if (be->owned && !be->users) {
+ fd = qemu_open_old("/dev/iommu", O_RDWR);
+ if (fd < 0) {
+ error_setg_errno(errp, errno, "/dev/iommu opening failed");
+ ret = fd;
+ goto out;
+ }
+ be->fd = fd;
+ }
+ be->users++;
+out:
+ trace_iommufd_backend_connect(be->fd, be->owned,
+ be->users, ret);
+ qemu_mutex_unlock(&be->lock);
+ return ret;
+}
+
+void iommufd_backend_disconnect(IOMMUFDBackend *be)
+{
+ qemu_mutex_lock(&be->lock);
+ if (!be->users) {
+ goto out;
+ }
+ be->users--;
+ if (!be->users && be->owned) {
+ close(be->fd);
+ be->fd = -1;
+ }
+out:
+ trace_iommufd_backend_disconnect(be->fd, be->users);
+ qemu_mutex_unlock(&be->lock);
+}
+
+int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
+ Error **errp)
+{
+ int ret, fd = be->fd;
+ struct iommu_ioas_alloc alloc_data = {
+ .size = sizeof(alloc_data),
+ .flags = 0,
+ };
+
+ ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data);
+ if (ret) {
+ error_setg_errno(errp, errno, "Failed to allocate ioas");
+ return ret;
+ }
+
+ *ioas_id = alloc_data.out_ioas_id;
+ trace_iommufd_backend_alloc_ioas(fd, *ioas_id, ret);
+
+ return ret;
+}
+
+void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id)
+{
+ int ret, fd = be->fd;
+ struct iommu_destroy des = {
+ .size = sizeof(des),
+ .id = id,
+ };
+
+ ret = ioctl(fd, IOMMU_DESTROY, &des);
+ trace_iommufd_backend_free_id(fd, id, ret);
+ if (ret) {
+ error_report("Failed to free id: %u %m", id);
+ }
+}
+
+int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
+ ram_addr_t size, void *vaddr, bool readonly)
+{
+ int ret, fd = be->fd;
+ struct iommu_ioas_map map = {
+ .size = sizeof(map),
+ .flags = IOMMU_IOAS_MAP_READABLE |
+ IOMMU_IOAS_MAP_FIXED_IOVA,
+ .ioas_id = ioas_id,
+ .__reserved = 0,
+ .user_va = (uintptr_t)vaddr,
+ .iova = iova,
+ .length = size,
+ };
+
+ if (!readonly) {
+ map.flags |= IOMMU_IOAS_MAP_WRITEABLE;
+ }
+
+ ret = ioctl(fd, IOMMU_IOAS_MAP, &map);
+ trace_iommufd_backend_map_dma(fd, ioas_id, iova, size,
+ vaddr, readonly, ret);
+ if (ret) {
+ ret = -errno;
+
+ /* TODO: Not support mapping hardware PCI BAR region for now. */
+ if (errno == EFAULT) {
+ warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?");
+ } else {
+ error_report("IOMMU_IOAS_MAP failed: %m");
+ }
+ }
+ return ret;
+}
+
+int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
+ hwaddr iova, ram_addr_t size)
+{
+ int ret, fd = be->fd;
+ struct iommu_ioas_unmap unmap = {
+ .size = sizeof(unmap),
+ .ioas_id = ioas_id,
+ .iova = iova,
+ .length = size,
+ };
+
+ ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap);
+ /*
+ * IOMMUFD takes mapping as some kind of object, unmapping
+ * nonexistent mapping is treated as deleting a nonexistent
+ * object and return ENOENT. This is different from legacy
+ * backend which allows it. vIOMMU may trigger a lot of
+ * redundant unmapping, to avoid flush the log, treat them
+ * as succeess for IOMMUFD just like legacy backend.
+ */
+ if (ret && errno == ENOENT) {
+ trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret);
+ ret = 0;
+ } else {
+ trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret);
+ }
+
+ if (ret) {
+ ret = -errno;
+ error_report("IOMMU_IOAS_UNMAP failed: %m");
+ }
+ return ret;
+}
+
+static const TypeInfo iommufd_backend_info = {
+ .name = TYPE_IOMMUFD_BACKEND,
+ .parent = TYPE_OBJECT,
+ .instance_size = sizeof(IOMMUFDBackend),
+ .instance_init = iommufd_backend_init,
+ .instance_finalize = iommufd_backend_finalize,
+ .class_size = sizeof(IOMMUFDBackendClass),
+ .class_init = iommufd_backend_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_USER_CREATABLE },
+ { }
+ }
+};
+
+static void register_types(void)
+{
+ type_register_static(&iommufd_backend_info);
+}
+
+type_init(register_types);
diff --git a/backends/meson.build b/backends/meson.build
index 914c7c4afb..9a5cea480d 100644
--- a/backends/meson.build
+++ b/backends/meson.build
@@ -20,6 +20,7 @@ if have_vhost_user
system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c'))
endif
system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c'))
+system_ss.add(when: 'CONFIG_IOMMUFD', if_true: files('iommufd.c'))
if have_vhost_user_crypto
system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c'))
endif
diff --git a/backends/trace-events b/backends/trace-events
index 652eb76a57..d45c6e31a6 100644
--- a/backends/trace-events
+++ b/backends/trace-events
@@ -5,3 +5,13 @@ dbus_vmstate_pre_save(void)
dbus_vmstate_post_load(int version_id) "version_id: %d"
dbus_vmstate_loading(const char *id) "id: %s"
dbus_vmstate_saving(const char *id) "id: %s"
+
+# iommufd.c
+iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d owned=%d users=%d (%d)"
+iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d"
+iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d"
+iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)"
+iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
+iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
+iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)"
+iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
new file mode 100644
index 0000000000..9c5524b0ed
--- /dev/null
+++ b/include/sysemu/iommufd.h
@@ -0,0 +1,38 @@
+#ifndef SYSEMU_IOMMUFD_H
+#define SYSEMU_IOMMUFD_H
+
+#include "qom/object.h"
+#include "qemu/thread.h"
+#include "exec/hwaddr.h"
+#include "exec/cpu-common.h"
+
+#define TYPE_IOMMUFD_BACKEND "iommufd"
+OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND)
+
+struct IOMMUFDBackendClass {
+ ObjectClass parent_class;
+};
+
+struct IOMMUFDBackend {
+ Object parent;
+
+ /*< protected >*/
+ int fd; /* /dev/iommu file descriptor */
+ bool owned; /* is the /dev/iommu opened internally */
+ QemuMutex lock;
+ uint32_t users;
+
+ /*< public >*/
+};
+
+int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp);
+void iommufd_backend_disconnect(IOMMUFDBackend *be);
+
+int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
+ Error **errp);
+void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id);
+int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
+ ram_addr_t size, void *vaddr, bool readonly);
+int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
+ hwaddr iova, ram_addr_t size);
+#endif
diff --git a/qapi/qom.json b/qapi/qom.json
index c53ef978ff..95516ba325 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -794,6 +794,23 @@
{ 'struct': 'VfioUserServerProperties',
'data': { 'socket': 'SocketAddress', 'device': 'str' } }
+##
+# @IOMMUFDProperties:
+#
+# Properties for iommufd objects.
+#
+# @fd: file descriptor name previously passed via 'getfd' command,
+# which represents a pre-opened /dev/iommu. This allows the
+# iommufd object to be shared accross several subsystems
+# (VFIO, VDPA, ...), and the file descriptor to be shared
+# with other process, e.g. DPDK. (default: QEMU opens
+# /dev/iommu by itself)
+#
+# Since: 9.0
+##
+{ 'struct': 'IOMMUFDProperties',
+ 'data': { '*fd': 'str' } }
+
##
# @RngProperties:
#
@@ -934,6 +951,7 @@
'input-barrier',
{ 'name': 'input-linux',
'if': 'CONFIG_LINUX' },
+ 'iommufd',
'iothread',
'main-loop',
{ 'name': 'memory-backend-epc',
@@ -1003,6 +1021,7 @@
'input-barrier': 'InputBarrierProperties',
'input-linux': { 'type': 'InputLinuxProperties',
'if': 'CONFIG_LINUX' },
+ 'iommufd': 'IOMMUFDProperties',
'iothread': 'IothreadProperties',
'main-loop': 'MainLoopProperties',
'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties',
diff --git a/qemu-options.hx b/qemu-options.hx
index 557118cb1f..0814f43066 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -5224,6 +5224,18 @@ SRST
The ``share`` boolean option is on by default with memfd.
+ ``-object iommufd,id=id[,fd=fd]``
+ Creates an iommufd backend which allows control of DMA mapping
+ through the ``/dev/iommu`` device.
+
+ The ``id`` parameter is a unique ID which frontends (such as
+ vfio-pci of vdpa) will use to connect with the iommufd backend.
+
+ The ``fd`` parameter is an optional pre-opened file descriptor
+ resulting from ``/dev/iommu`` opening. Usually the iommufd is shared
+ across all subsystems, bringing the benefit of centralized
+ reference counting.
+
``-object rng-builtin,id=id``
Creates a random number generator backend which obtains entropy
from QEMU builtin functions. The ``id`` parameter is a unique ID
--
2.39.3

@ -0,0 +1,47 @@
From da9a24793e876f6f2727d57f939d882be26a47b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
Date: Fri, 22 Dec 2023 08:55:23 +0100
Subject: [PATCH 064/101] backends/iommufd: Remove check on number of backend
users
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 211: IOMMUFD backend backport
RH-Jira: RHEL-19302 RHEL-21057
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
RH-Commit: [63/67] ac4d4589d1f2de5ac3f0adfd8d1f27dbf6bbfdee (eauger1/centos-qemu-kvm)
QOM already has a ref count on objects and it will assert much
earlier, when INT_MAX is reached.
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
(cherry picked from commit c2ab3a6f7411c895e538e8350fee8948ac07c1a0)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
backends/iommufd.c | 5 -----
1 file changed, 5 deletions(-)
diff --git a/backends/iommufd.c b/backends/iommufd.c
index ba58a0eb0d..393c0d9a37 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -80,11 +80,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
int fd, ret = 0;
qemu_mutex_lock(&be->lock);
- if (be->users == UINT32_MAX) {
- error_setg(errp, "too many connections");
- ret = -E2BIG;
- goto out;
- }
if (be->owned && !be->users) {
fd = qemu_open_old("/dev/iommu", O_RDWR);
if (fd < 0) {
--
2.39.3

@ -0,0 +1,112 @@
From 92aff3cc1a412de01e9563802fa48848eae5283f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
Date: Thu, 21 Dec 2023 16:58:41 +0100
Subject: [PATCH 065/101] backends/iommufd: Remove mutex
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 211: IOMMUFD backend backport
RH-Jira: RHEL-19302 RHEL-21057
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
RH-Commit: [64/67] 65518432b18f18ceadafe1b0698cdaa962e84f61 (eauger1/centos-qemu-kvm)
Coverity reports a concurrent data access violation because be->users
is being accessed in iommufd_backend_can_be_deleted() without holding
the mutex.
However, these routines are called from the QEMU main thread when a
device is created. In this case, the code paths should be protected by
the BQL lock and it should be safe to drop the IOMMUFD backend mutex.
Simply remove it.
Fixes: CID 1531550
Fixes: CID 1531549
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
(cherry picked from commit 19368b1905b4b917e915526fcbd5bfa3f7439451)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
backends/iommufd.c | 7 -------
include/sysemu/iommufd.h | 2 --
2 files changed, 9 deletions(-)
diff --git a/backends/iommufd.c b/backends/iommufd.c
index 393c0d9a37..1ef683c7b0 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -29,7 +29,6 @@ static void iommufd_backend_init(Object *obj)
be->fd = -1;
be->users = 0;
be->owned = true;
- qemu_mutex_init(&be->lock);
}
static void iommufd_backend_finalize(Object *obj)
@@ -52,10 +51,8 @@ static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp)
error_prepend(errp, "Could not parse remote object fd %s:", str);
return;
}
- qemu_mutex_lock(&be->lock);
be->fd = fd;
be->owned = false;
- qemu_mutex_unlock(&be->lock);
trace_iommu_backend_set_fd(be->fd);
}
@@ -79,7 +76,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
{
int fd, ret = 0;
- qemu_mutex_lock(&be->lock);
if (be->owned && !be->users) {
fd = qemu_open_old("/dev/iommu", O_RDWR);
if (fd < 0) {
@@ -93,13 +89,11 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
out:
trace_iommufd_backend_connect(be->fd, be->owned,
be->users, ret);
- qemu_mutex_unlock(&be->lock);
return ret;
}
void iommufd_backend_disconnect(IOMMUFDBackend *be)
{
- qemu_mutex_lock(&be->lock);
if (!be->users) {
goto out;
}
@@ -110,7 +104,6 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be)
}
out:
trace_iommufd_backend_disconnect(be->fd, be->users);
- qemu_mutex_unlock(&be->lock);
}
int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index 9c5524b0ed..9af27ebd6c 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -2,7 +2,6 @@
#define SYSEMU_IOMMUFD_H
#include "qom/object.h"
-#include "qemu/thread.h"
#include "exec/hwaddr.h"
#include "exec/cpu-common.h"
@@ -19,7 +18,6 @@ struct IOMMUFDBackend {
/*< protected >*/
int fd; /* /dev/iommu file descriptor */
bool owned; /* is the /dev/iommu opened internally */
- QemuMutex lock;
uint32_t users;
/*< public >*/
--
2.39.3

@ -1,57 +0,0 @@
From 40866640d15e6a8c9f6af7e437edc1ec1e17ba34 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 10/21] bcm2835_property: disable reentrancy detection for
iomem
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
RH-Jira: RHEL-516
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [6/13] 128ebc85e228674af66553af82fba70eb87960e6 (jmaloy/jmaloy-qemu-kvm-2)
Jira: https://issues.redhat.com/browse/RHEL-516
Upstream: Merged
CVE: CVE-2023-2680
commit 985c4a4e547afb9573b6bd6843d20eb2c3d1d1cd
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Thu Apr 27 17:10:11 2023 -0400
bcm2835_property: disable reentrancy detection for iomem
As the code is designed for re-entrant calls from bcm2835_property to
bcm2835_mbox and back into bcm2835_property, mark iomem as
reentrancy-safe.
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <20230427211013.2994127-7-alxndr@bu.edu>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/misc/bcm2835_property.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c
index 890ae7bae5..de056ea2df 100644
--- a/hw/misc/bcm2835_property.c
+++ b/hw/misc/bcm2835_property.c
@@ -382,6 +382,13 @@ static void bcm2835_property_init(Object *obj)
memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_property_ops, s,
TYPE_BCM2835_PROPERTY, 0x10);
+
+ /*
+ * bcm2835_property_ops call into bcm2835_mbox, which in-turn reads from
+ * iomem. As such, mark iomem as re-entracy safe.
+ */
+ s->iomem.disable_reentrancy_guard = true;
+
sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq);
}
--
2.39.3

@ -1,354 +0,0 @@
From ff05c0b0d3414c0e5b3903048280accdc6c75ca0 Mon Sep 17 00:00:00 2001
From: Hanna Czenczek <hreitz@redhat.com>
Date: Tue, 11 Apr 2023 19:34:16 +0200
Subject: [PATCH 2/9] block: Collapse padded I/O vecs exceeding IOV_MAX
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX
RH-Bugzilla: 2174676
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [2/5] 84c56bd16f841a18cf2baa918dfeab3240e3944d (hreitz/qemu-kvm-c-9-s)
When processing vectored guest requests that are not aligned to the
storage request alignment, we pad them by adding head and/or tail
buffers for a read-modify-write cycle.
The guest can submit I/O vectors up to IOV_MAX (1024) in length, but
with this padding, the vector can exceed that limit. As of
4c002cef0e9abe7135d7916c51abce47f7fc1ee2 ("util/iov: make
qemu_iovec_init_extended() honest"), we refuse to pad vectors beyond the
limit, instead returning an error to the guest.
To the guest, this appears as a random I/O error. We should not return
an I/O error to the guest when it issued a perfectly valid request.
Before 4c002cef0e9abe7135d7916c51abce47f7fc1ee2, we just made the vector
longer than IOV_MAX, which generally seems to work (because the guest
assumes a smaller alignment than we really have, file-posix's
raw_co_prw() will generally see bdrv_qiov_is_aligned() return false, and
so emulate the request, so that the IOV_MAX does not matter). However,
that does not seem exactly great.
I see two ways to fix this problem:
1. We split such long requests into two requests.
2. We join some elements of the vector into new buffers to make it
shorter.
I am wary of (1), because it seems like it may have unintended side
effects.
(2) on the other hand seems relatively simple to implement, with
hopefully few side effects, so this patch does that.
To do this, the use of qemu_iovec_init_extended() in bdrv_pad_request()
is effectively replaced by the new function bdrv_create_padded_qiov(),
which not only wraps the request IOV with padding head/tail, but also
ensures that the resulting vector will not have more than IOV_MAX
elements. Putting that functionality into qemu_iovec_init_extended() is
infeasible because it requires allocating a bounce buffer; doing so
would require many more parameters (buffer alignment, how to initialize
the buffer, and out parameters like the buffer, its length, and the
original elements), which is not reasonable.
Conversely, it is not difficult to move qemu_iovec_init_extended()'s
functionality into bdrv_create_padded_qiov() by using public
qemu_iovec_* functions, so that is what this patch does.
Because bdrv_pad_request() was the only "serious" user of
qemu_iovec_init_extended(), the next patch will remove the latter
function, so the functionality is not implemented twice.
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2141964
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
Message-Id: <20230411173418.19549-3-hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
(cherry picked from commit 18743311b829cafc1737a5f20bc3248d5f91ee2a)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/io.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 151 insertions(+), 15 deletions(-)
diff --git a/block/io.c b/block/io.c
index 2e267a85ab..4e8e90208b 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1439,6 +1439,14 @@ out:
* @merge_reads is true for small requests,
* if @buf_len == @head + bytes + @tail. In this case it is possible that both
* head and tail exist but @buf_len == align and @tail_buf == @buf.
+ *
+ * @write is true for write requests, false for read requests.
+ *
+ * If padding makes the vector too long (exceeding IOV_MAX), then we need to
+ * merge existing vector elements into a single one. @collapse_bounce_buf acts
+ * as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse
+ * I/O vector elements so for read requests, the data can be copied back after
+ * the read is done.
*/
typedef struct BdrvRequestPadding {
uint8_t *buf;
@@ -1447,11 +1455,17 @@ typedef struct BdrvRequestPadding {
size_t head;
size_t tail;
bool merge_reads;
+ bool write;
QEMUIOVector local_qiov;
+
+ uint8_t *collapse_bounce_buf;
+ size_t collapse_len;
+ QEMUIOVector pre_collapse_qiov;
} BdrvRequestPadding;
static bool bdrv_init_padding(BlockDriverState *bs,
int64_t offset, int64_t bytes,
+ bool write,
BdrvRequestPadding *pad)
{
int64_t align = bs->bl.request_alignment;
@@ -1483,6 +1497,8 @@ static bool bdrv_init_padding(BlockDriverState *bs,
pad->tail_buf = pad->buf + pad->buf_len - align;
}
+ pad->write = write;
+
return true;
}
@@ -1547,8 +1563,23 @@ zero_mem:
return 0;
}
-static void bdrv_padding_destroy(BdrvRequestPadding *pad)
+/**
+ * Free *pad's associated buffers, and perform any necessary finalization steps.
+ */
+static void bdrv_padding_finalize(BdrvRequestPadding *pad)
{
+ if (pad->collapse_bounce_buf) {
+ if (!pad->write) {
+ /*
+ * If padding required elements in the vector to be collapsed into a
+ * bounce buffer, copy the bounce buffer content back
+ */
+ qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0,
+ pad->collapse_bounce_buf, pad->collapse_len);
+ }
+ qemu_vfree(pad->collapse_bounce_buf);
+ qemu_iovec_destroy(&pad->pre_collapse_qiov);
+ }
if (pad->buf) {
qemu_vfree(pad->buf);
qemu_iovec_destroy(&pad->local_qiov);
@@ -1556,6 +1587,101 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
memset(pad, 0, sizeof(*pad));
}
+/*
+ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while
+ * ensuring that the resulting vector will not exceed IOV_MAX elements.
+ *
+ * To ensure this, when necessary, the first two or three elements of @iov are
+ * merged into pad->collapse_bounce_buf and replaced by a reference to that
+ * bounce buffer in pad->local_qiov.
+ *
+ * After performing a read request, the data from the bounce buffer must be
+ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()).
+ */
+static int bdrv_create_padded_qiov(BlockDriverState *bs,
+ BdrvRequestPadding *pad,
+ struct iovec *iov, int niov,
+ size_t iov_offset, size_t bytes)
+{
+ int padded_niov, surplus_count, collapse_count;
+
+ /* Assert this invariant */
+ assert(niov <= IOV_MAX);
+
+ /*
+ * Cannot pad if resulting length would exceed SIZE_MAX. Returning an error
+ * to the guest is not ideal, but there is little else we can do. At least
+ * this will practically never happen on 64-bit systems.
+ */
+ if (SIZE_MAX - pad->head < bytes ||
+ SIZE_MAX - pad->head - bytes < pad->tail)
+ {
+ return -EINVAL;
+ }
+
+ /* Length of the resulting IOV if we just concatenated everything */
+ padded_niov = !!pad->head + niov + !!pad->tail;
+
+ qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX));
+
+ if (pad->head) {
+ qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head);
+ }
+
+ /*
+ * If padded_niov > IOV_MAX, we cannot just concatenate everything.
+ * Instead, merge the first two or three elements of @iov to reduce the
+ * number of vector elements as necessary.
+ */
+ if (padded_niov > IOV_MAX) {
+ /*
+ * Only head and tail can have lead to the number of entries exceeding
+ * IOV_MAX, so we can exceed it by the head and tail at most. We need
+ * to reduce the number of elements by `surplus_count`, so we merge that
+ * many elements plus one into one element.
+ */
+ surplus_count = padded_niov - IOV_MAX;
+ assert(surplus_count <= !!pad->head + !!pad->tail);
+ collapse_count = surplus_count + 1;
+
+ /*
+ * Move the elements to collapse into `pad->pre_collapse_qiov`, then
+ * advance `iov` (and associated variables) by those elements.
+ */
+ qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count);
+ qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov,
+ collapse_count, iov_offset, SIZE_MAX);
+ iov += collapse_count;
+ iov_offset = 0;
+ niov -= collapse_count;
+ bytes -= pad->pre_collapse_qiov.size;
+
+ /*
+ * Construct the bounce buffer to match the length of the to-collapse
+ * vector elements, and for write requests, initialize it with the data
+ * from those elements. Then add it to `pad->local_qiov`.
+ */
+ pad->collapse_len = pad->pre_collapse_qiov.size;
+ pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len);
+ if (pad->write) {
+ qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0,
+ pad->collapse_bounce_buf, pad->collapse_len);
+ }
+ qemu_iovec_add(&pad->local_qiov,
+ pad->collapse_bounce_buf, pad->collapse_len);
+ }
+
+ qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes);
+
+ if (pad->tail) {
+ qemu_iovec_add(&pad->local_qiov,
+ pad->buf + pad->buf_len - pad->tail, pad->tail);
+ }
+
+ assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX));
+ return 0;
+}
+
/*
* bdrv_pad_request
*
@@ -1563,6 +1689,8 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
* read of padding, bdrv_padding_rmw_read() should be called separately if
* needed.
*
+ * @write is true for write requests, false for read requests.
+ *
* Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out:
* - on function start they represent original request
* - on failure or when padding is not needed they are unchanged
@@ -1571,26 +1699,34 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
static int bdrv_pad_request(BlockDriverState *bs,
QEMUIOVector **qiov, size_t *qiov_offset,
int64_t *offset, int64_t *bytes,
+ bool write,
BdrvRequestPadding *pad, bool *padded,
BdrvRequestFlags *flags)
{
int ret;
+ struct iovec *sliced_iov;
+ int sliced_niov;
+ size_t sliced_head, sliced_tail;
bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
- if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
+ if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
if (padded) {
*padded = false;
}
return 0;
}
- ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
- *qiov, *qiov_offset, *bytes,
- pad->buf + pad->buf_len - pad->tail,
- pad->tail);
+ sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
+ &sliced_head, &sliced_tail,
+ &sliced_niov);
+
+ /* Guaranteed by bdrv_check_qiov_request() */
+ assert(*bytes <= SIZE_MAX);
+ ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
+ sliced_head, *bytes);
if (ret < 0) {
- bdrv_padding_destroy(pad);
+ bdrv_padding_finalize(pad);
return ret;
}
*bytes += pad->head + pad->tail;
@@ -1657,8 +1793,8 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
flags |= BDRV_REQ_COPY_ON_READ;
}
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
- NULL, &flags);
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false,
+ &pad, NULL, &flags);
if (ret < 0) {
goto fail;
}
@@ -1668,7 +1804,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
bs->bl.request_alignment,
qiov, qiov_offset, flags);
tracked_request_end(&req);
- bdrv_padding_destroy(&pad);
+ bdrv_padding_finalize(&pad);
fail:
bdrv_dec_in_flight(bs);
@@ -2000,7 +2136,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
/* This flag doesn't make sense for padding or zero writes */
flags &= ~BDRV_REQ_REGISTERED_BUF;
- padding = bdrv_init_padding(bs, offset, bytes, &pad);
+ padding = bdrv_init_padding(bs, offset, bytes, true, &pad);
if (padding) {
assert(!(flags & BDRV_REQ_NO_WAIT));
bdrv_make_request_serialising(req, align);
@@ -2048,7 +2184,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
}
out:
- bdrv_padding_destroy(&pad);
+ bdrv_padding_finalize(&pad);
return ret;
}
@@ -2116,8 +2252,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
* bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
* alignment only if there is no ZERO flag.
*/
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
- &padded, &flags);
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true,
+ &pad, &padded, &flags);
if (ret < 0) {
return ret;
}
@@ -2147,7 +2283,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
qiov, qiov_offset, flags);
- bdrv_padding_destroy(&pad);
+ bdrv_padding_finalize(&pad);
out:
tracked_request_end(&req);
--
2.39.3

@ -1,56 +0,0 @@
From dfa2811e88afaf996345552330e97f0513c1803c Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 4 May 2023 13:57:34 +0200
Subject: [PATCH 53/56] block: Don't call no_coroutine_fns in
qmp_block_resize()
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize()
RH-Bugzilla: 2185688
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [2/4] 7ac7e34821cfc8bd5f0daadd7a1c4a5596bc60a6 (kmwolf/centos-qemu-kvm)
This QMP handler runs in a coroutine, so it must use the corresponding
no_co_wrappers instead.
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2185688
Cc: qemu-stable@nongnu.org
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20230504115750.54437-5-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 0c7d204f50c382c6baac8c94bd57af4a022b3888)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
blockdev.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/blockdev.c b/blockdev.c
index d7b5c18f0a..eb509cf964 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2430,7 +2430,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
return;
}
- blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
+ blk = blk_co_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
if (!blk) {
return;
}
@@ -2445,7 +2445,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
bdrv_co_lock(bs);
bdrv_drained_end(bs);
- blk_unref(blk);
+ blk_co_unref(blk);
bdrv_co_unlock(bs);
}
--
2.39.1

@ -1,73 +0,0 @@
From 547f6bf93734f7c13675eebb93273ef2273f7c31 Mon Sep 17 00:00:00 2001
From: Hanna Czenczek <hreitz@redhat.com>
Date: Fri, 14 Jul 2023 10:59:38 +0200
Subject: [PATCH 5/9] block: Fix pad_request's request restriction
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX
RH-Bugzilla: 2174676
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [5/5] e8abc0485f6e0608a1ec55143ff40a14d273dfc8 (hreitz/qemu-kvm-c-9-s)
bdrv_pad_request() relies on requests' lengths not to exceed SIZE_MAX,
which bdrv_check_qiov_request() does not guarantee.
bdrv_check_request32() however will guarantee this, and both of
bdrv_pad_request()'s callers (bdrv_co_preadv_part() and
bdrv_co_pwritev_part()) already run it before calling
bdrv_pad_request(). Therefore, bdrv_pad_request() can safely call
bdrv_check_request32() without expecting error, too.
In effect, this patch will not change guest-visible behavior. It is a
clean-up to tighten a condition to match what is guaranteed by our
callers, and which exists purely to show clearly why the subsequent
assertion (`assert(*bytes <= SIZE_MAX)`) is always true.
Note there is a difference between the interfaces of
bdrv_check_qiov_request() and bdrv_check_request32(): The former takes
an errp, the latter does not, so we can no longer just pass
&error_abort. Instead, we need to check the returned value. While we
do expect success (because the callers have already run this function),
an assert(ret == 0) is not much simpler than just to return an error if
it occurs, so let us handle errors by returning them up the stack now.
Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
Message-id: 20230714085938.202730-1-hreitz@redhat.com
Fixes: 18743311b829cafc1737a5f20bc3248d5f91ee2a
("block: Collapse padded I/O vecs exceeding IOV_MAX")
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block/io.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/block/io.c b/block/io.c
index 4e8e90208b..807c9fb720 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1708,7 +1708,11 @@ static int bdrv_pad_request(BlockDriverState *bs,
int sliced_niov;
size_t sliced_head, sliced_tail;
- bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
+ /* Should have been checked by the caller already */
+ ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset);
+ if (ret < 0) {
+ return ret;
+ }
if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
if (padded) {
@@ -1721,7 +1725,7 @@ static int bdrv_pad_request(BlockDriverState *bs,
&sliced_head, &sliced_tail,
&sliced_niov);
- /* Guaranteed by bdrv_check_qiov_request() */
+ /* Guaranteed by bdrv_check_request32() */
assert(*bytes <= SIZE_MAX);
ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
sliced_head, *bytes);
--
2.39.3

@ -0,0 +1,104 @@
From afa842e9fdf6e1d6e5d5785679a22779632142bd Mon Sep 17 00:00:00 2001
From: Hanna Czenczek <hreitz@redhat.com>
Date: Fri, 2 Feb 2024 15:47:54 +0100
Subject: [PATCH 03/22] block-backend: Allow concurrent context changes
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 222: Allow concurrent BlockBackend context changes
RH-Jira: RHEL-24593
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [1/2] 9e1b535f60f7afa94a0817dc3e71136e41631c71 (hreitz/qemu-kvm-c-9-s)
Since AioContext locks have been removed, a BlockBackend's AioContext
may really change at any time (only exception is that it is often
confined to a drained section, as noted in this patch). Therefore,
blk_get_aio_context() cannot rely on its root node's context always
matching that of the BlockBackend.
In practice, whether they match does not matter anymore anyway: Requests
can be sent to BDSs from any context, so anyone who requests the BB's
context should have no reason to require the root node to have the same
context. Therefore, we can and should remove the assertion to that
effect.
In addition, because the context can be set and queried from different
threads concurrently, it has to be accessed with atomic operations.
Buglink: https://issues.redhat.com/browse/RHEL-19381
Suggested-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
Message-ID: <20240202144755.671354-2-hreitz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit ad893672027ffe26db498947d70cde6d4f58a111)
---
block/block-backend.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/block/block-backend.c b/block/block-backend.c
index 209eb07528..9c4de79e6b 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -44,7 +44,7 @@ struct BlockBackend {
char *name;
int refcnt;
BdrvChild *root;
- AioContext *ctx;
+ AioContext *ctx; /* access with atomic operations only */
DriveInfo *legacy_dinfo; /* null unless created by drive_new() */
QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */
QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
@@ -2414,22 +2414,22 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason)
}
}
+/**
+ * Return BB's current AioContext. Note that this context may change
+ * concurrently at any time, with one exception: If the BB has a root node
+ * attached, its context will only change through bdrv_try_change_aio_context(),
+ * which creates a drained section. Therefore, incrementing such a BB's
+ * in-flight counter will prevent its context from changing.
+ */
AioContext *blk_get_aio_context(BlockBackend *blk)
{
- BlockDriverState *bs;
IO_CODE();
if (!blk) {
return qemu_get_aio_context();
}
- bs = blk_bs(blk);
- if (bs) {
- AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
- assert(ctx == blk->ctx);
- }
-
- return blk->ctx;
+ return qatomic_read(&blk->ctx);
}
int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
@@ -2442,7 +2442,7 @@ int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
GLOBAL_STATE_CODE();
if (!bs) {
- blk->ctx = new_context;
+ qatomic_set(&blk->ctx, new_context);
return 0;
}
@@ -2471,7 +2471,7 @@ static void blk_root_set_aio_ctx_commit(void *opaque)
AioContext *new_context = s->new_ctx;
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
- blk->ctx = new_context;
+ qatomic_set(&blk->ctx, new_context);
if (tgm->throttle_state) {
throttle_group_detach_aio_context(tgm);
throttle_group_attach_aio_context(tgm, new_context);
--
2.39.3

@ -1,386 +0,0 @@
From 7baea25be90e184175dd5a919ee5878cbd4970c2 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 4 May 2023 13:57:33 +0200
Subject: [PATCH 52/56] block: bdrv/blk_co_unref() for calls in coroutine
context
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize()
RH-Bugzilla: 2185688
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [1/4] 8ebf8486b082c30ca1b39a6ede35e471eaaccfa3 (kmwolf/centos-qemu-kvm)
These functions must not be called in coroutine context, because they
need write access to the graph.
Cc: qemu-stable@nongnu.org
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20230504115750.54437-4-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit b2ab5f545fa1eaaf2955dd617bee19a8b3279786)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block.c | 2 +-
block/crypto.c | 6 +++---
block/parallels.c | 6 +++---
block/qcow.c | 6 +++---
block/qcow2.c | 14 +++++++-------
block/qed.c | 6 +++---
block/vdi.c | 6 +++---
block/vhdx.c | 6 +++---
block/vmdk.c | 18 +++++++++---------
block/vpc.c | 6 +++---
include/block/block-global-state.h | 3 ++-
include/sysemu/block-backend-global-state.h | 5 ++++-
12 files changed, 44 insertions(+), 40 deletions(-)
diff --git a/block.c b/block.c
index d79a52ca74..a48112f945 100644
--- a/block.c
+++ b/block.c
@@ -680,7 +680,7 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
ret = 0;
out:
- blk_unref(blk);
+ blk_co_unref(blk);
return ret;
}
diff --git a/block/crypto.c b/block/crypto.c
index ca67289187..8fd3ad0054 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -355,7 +355,7 @@ block_crypto_co_create_generic(BlockDriverState *bs, int64_t size,
ret = 0;
cleanup:
qcrypto_block_free(crypto);
- blk_unref(blk);
+ blk_co_unref(blk);
return ret;
}
@@ -661,7 +661,7 @@ block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp)
ret = 0;
fail:
- bdrv_unref(bs);
+ bdrv_co_unref(bs);
return ret;
}
@@ -730,7 +730,7 @@ fail:
bdrv_co_delete_file_noerr(bs);
}
- bdrv_unref(bs);
+ bdrv_co_unref(bs);
qapi_free_QCryptoBlockCreateOptions(create_opts);
qobject_unref(cryptoopts);
return ret;
diff --git a/block/parallels.c b/block/parallels.c
index 013684801a..b49c35929e 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -613,8 +613,8 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts,
ret = 0;
out:
- blk_unref(blk);
- bdrv_unref(bs);
+ blk_co_unref(blk);
+ bdrv_co_unref(bs);
return ret;
exit:
@@ -691,7 +691,7 @@ parallels_co_create_opts(BlockDriver *drv, const char *filename,
done:
qobject_unref(qdict);
- bdrv_unref(bs);
+ bdrv_co_unref(bs);
qapi_free_BlockdevCreateOptions(create_options);
return ret;
}
diff --git a/block/qcow.c b/block/qcow.c
index 490e4f819e..a0c701f578 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -915,8 +915,8 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
g_free(tmp);
ret = 0;
exit:
- blk_unref(qcow_blk);
- bdrv_unref(bs);
+ blk_co_unref(qcow_blk);
+ bdrv_co_unref(bs);
qcrypto_block_free(crypto);
return ret;
}
@@ -1015,7 +1015,7 @@ qcow_co_create_opts(BlockDriver *drv, const char *filename,
fail:
g_free(backing_fmt);
qobject_unref(qdict);
- bdrv_unref(bs);
+ bdrv_co_unref(bs);
qapi_free_BlockdevCreateOptions(create_options);
return ret;
}
diff --git a/block/qcow2.c b/block/qcow2.c
index 22084730f9..0b8beb8b47 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3711,7 +3711,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
goto out;
}
- blk_unref(blk);
+ blk_co_unref(blk);
blk = NULL;
/*
@@ -3791,7 +3791,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
}
}
- blk_unref(blk);
+ blk_co_unref(blk);
blk = NULL;
/* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning.
@@ -3816,9 +3816,9 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
ret = 0;
out:
- blk_unref(blk);
- bdrv_unref(bs);
- bdrv_unref(data_bs);
+ blk_co_unref(blk);
+ bdrv_co_unref(bs);
+ bdrv_co_unref(data_bs);
return ret;
}
@@ -3949,8 +3949,8 @@ finish:
}
qobject_unref(qdict);
- bdrv_unref(bs);
- bdrv_unref(data_bs);
+ bdrv_co_unref(bs);
+ bdrv_co_unref(data_bs);
qapi_free_BlockdevCreateOptions(create_options);
return ret;
}
diff --git a/block/qed.c b/block/qed.c
index 0705a7b4e2..aff2a2076e 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -748,8 +748,8 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
ret = 0; /* success */
out:
g_free(l1_table);
- blk_unref(blk);
- bdrv_unref(bs);
+ blk_co_unref(blk);
+ bdrv_co_unref(bs);
return ret;
}
@@ -819,7 +819,7 @@ bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename,
fail:
qobject_unref(qdict);
- bdrv_unref(bs);
+ bdrv_co_unref(bs);
qapi_free_BlockdevCreateOptions(create_options);
return ret;
}
diff --git a/block/vdi.c b/block/vdi.c
index f2434d6153..08331d2dd7 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -886,8 +886,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
ret = 0;
exit:
- blk_unref(blk);
- bdrv_unref(bs_file);
+ blk_co_unref(blk);
+ bdrv_co_unref(bs_file);
g_free(bmap);
return ret;
}
@@ -975,7 +975,7 @@ vdi_co_create_opts(BlockDriver *drv, const char *filename,
done:
qobject_unref(qdict);
qapi_free_BlockdevCreateOptions(create_options);
- bdrv_unref(bs_file);
+ bdrv_co_unref(bs_file);
return ret;
}
diff --git a/block/vhdx.c b/block/vhdx.c
index 81420722a1..00777da91a 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -2053,8 +2053,8 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts,
ret = 0;
delete_and_exit:
- blk_unref(blk);
- bdrv_unref(bs);
+ blk_co_unref(blk);
+ bdrv_co_unref(bs);
g_free(creator);
return ret;
}
@@ -2144,7 +2144,7 @@ vhdx_co_create_opts(BlockDriver *drv, const char *filename,
fail:
qobject_unref(qdict);
- bdrv_unref(bs);
+ bdrv_co_unref(bs);
qapi_free_BlockdevCreateOptions(create_options);
return ret;
}
diff --git a/block/vmdk.c b/block/vmdk.c
index f5f49018fe..01ca13c82b 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -2306,7 +2306,7 @@ exit:
if (pbb) {
*pbb = blk;
} else {
- blk_unref(blk);
+ blk_co_unref(blk);
blk = NULL;
}
}
@@ -2516,12 +2516,12 @@ vmdk_co_do_create(int64_t size,
if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) {
error_setg(errp, "Invalid backing file format: %s. Must be vmdk",
blk_bs(backing)->drv->format_name);
- blk_unref(backing);
+ blk_co_unref(backing);
ret = -EINVAL;
goto exit;
}
ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid);
- blk_unref(backing);
+ blk_co_unref(backing);
if (ret) {
error_setg(errp, "Failed to read parent CID");
goto exit;
@@ -2542,14 +2542,14 @@ vmdk_co_do_create(int64_t size,
blk_bs(extent_blk)->filename);
created_size += cur_size;
extent_idx++;
- blk_unref(extent_blk);
+ blk_co_unref(extent_blk);
}
/* Check whether we got excess extents */
extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain,
opaque, NULL);
if (extent_blk) {
- blk_unref(extent_blk);
+ blk_co_unref(extent_blk);
error_setg(errp, "List of extents contains unused extents");
ret = -EINVAL;
goto exit;
@@ -2590,7 +2590,7 @@ vmdk_co_do_create(int64_t size,
ret = 0;
exit:
if (blk) {
- blk_unref(blk);
+ blk_co_unref(blk);
}
g_free(desc);
g_free(parent_desc_line);
@@ -2641,7 +2641,7 @@ vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split,
errp)) {
goto exit;
}
- bdrv_unref(bs);
+ bdrv_co_unref(bs);
exit:
g_free(ext_filename);
return blk;
@@ -2797,12 +2797,12 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx,
return NULL;
}
blk_set_allow_write_beyond_eof(blk, true);
- bdrv_unref(bs);
+ bdrv_co_unref(bs);
if (size != -1) {
ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp);
if (ret) {
- blk_unref(blk);
+ blk_co_unref(blk);
blk = NULL;
}
}
diff --git a/block/vpc.c b/block/vpc.c
index b89b0ff8e2..07ddda5b99 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -1082,8 +1082,8 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts,
}
out:
- blk_unref(blk);
- bdrv_unref(bs);
+ blk_co_unref(blk);
+ bdrv_co_unref(bs);
return ret;
}
@@ -1162,7 +1162,7 @@ vpc_co_create_opts(BlockDriver *drv, const char *filename,
fail:
qobject_unref(qdict);
- bdrv_unref(bs);
+ bdrv_co_unref(bs);
qapi_free_BlockdevCreateOptions(create_options);
return ret;
}
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
index 399200a9a3..cd4ea554bf 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
@@ -214,7 +214,8 @@ void bdrv_img_create(const char *filename, const char *fmt,
bool quiet, Error **errp);
void bdrv_ref(BlockDriverState *bs);
-void bdrv_unref(BlockDriverState *bs);
+void no_coroutine_fn bdrv_unref(BlockDriverState *bs);
+void coroutine_fn no_co_wrapper bdrv_co_unref(BlockDriverState *bs);
void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h
index 2b6d27db7c..fa83f9389c 100644
--- a/include/sysemu/block-backend-global-state.h
+++ b/include/sysemu/block-backend-global-state.h
@@ -42,7 +42,10 @@ blk_co_new_open(const char *filename, const char *reference, QDict *options,
int blk_get_refcnt(BlockBackend *blk);
void blk_ref(BlockBackend *blk);
-void blk_unref(BlockBackend *blk);
+
+void no_coroutine_fn blk_unref(BlockBackend *blk);
+void coroutine_fn no_co_wrapper blk_co_unref(BlockBackend *blk);
+
void blk_remove_all_bs(void);
BlockBackend *blk_by_name(const char *name);
BlockBackend *blk_next(BlockBackend *blk);
--
2.39.1

@ -1,74 +0,0 @@
From b1f0546548e561856252c2bc610a8f4f8fcdf007 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Wed, 26 Jul 2023 09:48:07 +0200
Subject: [PATCH 02/14] block/blkio: do not use open flags in qemu_open()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
RH-Bugzilla: 2225354 2225439
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Alberto Faria <None>
RH-Commit: [2/6] 1ccd0ef56182bb5e2374c3b5be98ee1ec05066d6 (sgarzarella/qemu-kvm-c-9-s)
qemu_open() in blkio_virtio_blk_common_open() is used to open the
character device (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or in
the future eventually the unix socket.
In all these cases we cannot open the path in read-only mode,
when the `read-only` option of blockdev is on, because the exchange
of IOCTL commands for example will fail.
In order to open the device read-only, we have to use the `read-only`
property of the libblkio driver as we already do in blkio_file_open().
Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk")
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2225439
Reported-by: Qing Wang <qinwang@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Message-id: 20230726074807.14041-1-sgarzare@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit a5942c177b7bcc1357e496b7d68668befcfc2bb9)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/blkio.c | 21 ++++++++++++---------
1 file changed, 12 insertions(+), 9 deletions(-)
diff --git a/block/blkio.c b/block/blkio.c
index 3ea9841bd8..5a82c6cb1a 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -685,15 +685,18 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs,
* layer through the "/dev/fdset/N" special path.
*/
if (fd_supported) {
- int open_flags;
-
- if (flags & BDRV_O_RDWR) {
- open_flags = O_RDWR;
- } else {
- open_flags = O_RDONLY;
- }
-
- fd = qemu_open(path, open_flags, errp);
+ /*
+ * `path` can contain the path of a character device
+ * (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or a unix socket.
+ *
+ * So, we should always open it with O_RDWR flag, also if BDRV_O_RDWR
+ * is not set in the open flags, because the exchange of IOCTL commands
+ * for example will fail.
+ *
+ * In order to open the device read-only, we are using the `read-only`
+ * property of the libblkio driver in blkio_file_open().
+ */
+ fd = qemu_open(path, O_RDWR, errp);
if (fd < 0) {
return -EINVAL;
}
--
2.39.3

@ -1,54 +0,0 @@
From ef99db21e9469f3fc946b7bf3edc1837d7b24e0b Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Tue, 25 Jul 2023 12:37:44 +0200
Subject: [PATCH 01/14] block/blkio: enable the completion eventfd
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
RH-Bugzilla: 2225354 2225439
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Alberto Faria <None>
RH-Commit: [1/6] d91b3a465942863550130105ae2f38f47a82a360 (sgarzarella/qemu-kvm-c-9-s)
Until libblkio 1.3.0, virtio-blk drivers had completion eventfd
notifications enabled from the start, but from the next releases
this is no longer the case, so we have to explicitly enable them.
In fact, the libblkio documentation says they could be disabled,
so we should always enable them at the start if we want to be
sure to get completion eventfd notifications:
By default, the driver might not generate completion events for
requests so it is necessary to explicitly enable the completion
file descriptor before use:
void blkioq_set_completion_fd_enabled(struct blkioq *q, bool enable);
I discovered this while trying a development version of libblkio:
the guest kernel hangs during boot, while probing the device.
Fixes: fd66dbd424f5 ("blkio: add libblkio block driver")
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20230725103744.77343-1-sgarzare@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit 9359c459889fce1804c4e1b2a2ff8f182b4a9ae8)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/blkio.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/block/blkio.c b/block/blkio.c
index afcec359f2..3ea9841bd8 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -844,6 +844,7 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
QLIST_INIT(&s->bounce_bufs);
s->blkioq = blkio_get_queue(s->blkio, 0);
s->completion_fd = blkioq_get_completion_fd(s->blkioq);
+ blkioq_set_completion_fd_enabled(s->blkioq, true);
blkio_attach_aio_context(bs, bdrv_get_aio_context(bs));
return 0;
--
2.39.3

@ -1,67 +0,0 @@
From c1ce3ba81698b9d52ac9dff83c01ee8141ca403d Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Thu, 27 Jul 2023 18:10:19 +0200
Subject: [PATCH 05/14] block/blkio: fall back on using `path` when `fd`
setting fails
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
RH-Bugzilla: 2225354 2225439
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Alberto Faria <None>
RH-Commit: [5/6] c03cea95146a59b2830ffe2dd56ef77a6630ce3e (sgarzarella/qemu-kvm-c-9-s)
qemu_open() fails if called with an unix domain socket in this way:
-blockdev node-name=drive0,driver=virtio-blk-vhost-user,path=vhost-user-blk.sock,cache.direct=on: Could not open 'vhost-user-blk.sock': No such device or address
Since virtio-blk-vhost-user does not support fd passing, let`s always fall back
on using `path` if we fail the fd passing.
Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk")
Reported-by: Qing Wang <qinwang@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20230727161020.84213-4-sgarzare@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit 723bea27b127969931fa26bc0de79372a3d9e148)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/blkio.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/block/blkio.c b/block/blkio.c
index 93a8f8fc5c..eef80e9ce5 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -710,19 +710,19 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
* In order to open the device read-only, we are using the `read-only`
* property of the libblkio driver in blkio_file_open().
*/
- fd = qemu_open(path, O_RDWR, errp);
+ fd = qemu_open(path, O_RDWR, NULL);
if (fd < 0) {
- return -EINVAL;
+ fd_supported = false;
+ } else {
+ ret = blkio_set_int(s->blkio, "fd", fd);
+ if (ret < 0) {
+ fd_supported = false;
+ qemu_close(fd);
+ }
}
+ }
- ret = blkio_set_int(s->blkio, "fd", fd);
- if (ret < 0) {
- error_setg_errno(errp, -ret, "failed to set fd: %s",
- blkio_get_error_msg());
- qemu_close(fd);
- return ret;
- }
- } else {
+ if (!fd_supported) {
ret = blkio_set_str(s->blkio, "path", path);
if (ret < 0) {
error_setg_errno(errp, -ret, "failed to set path: %s",
--
2.39.3

@ -1,205 +0,0 @@
From 545482400ea87d54b1b839587f8aaad41e30692f Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 4 Jul 2023 14:34:36 +0200
Subject: [PATCH 36/37] block/blkio: fix module_block.py parsing
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
RH-MergeRequest: 181: block/blkio: fix module_block.py parsing
RH-Bugzilla: 2213317
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [1/2] c85df95824f4889526a73527771dec9efcb06926 (stefanha/centos-stream-qemu-kvm)
When QEMU is built with --enable-modules, the module_block.py script
parses block/*.c to find block drivers that are built as modules. The
script generates a table of block drivers called block_driver_modules[].
This table is used for block driver module loading.
The blkio.c driver uses macros to define its BlockDriver structs. This
was done to avoid code duplication but the module_block.py script is
unable to parse the macro. The result is that libblkio-based block
drivers can be built as modules but will not be found at runtime.
One fix is to make the module_block.py script or build system fancier so
it can parse C macros (e.g. by parsing the preprocessed source code). I
chose not to do this because it raises the complexity of the build,
making future issues harder to debug.
Keep things simple: use the macro to avoid duplicating BlockDriver
function pointers but define .format_name and .protocol_name manually
for each BlockDriver. This way the module_block.py is able to parse the
code.
Also get rid of the block driver name macros (e.g. DRIVER_IO_URING)
because module_block.py cannot parse them either.
Fixes: fd66dbd424f5 ("blkio: add libblkio block driver")
Reported-by: Qing Wang <qinwang@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20230704123436.187761-1-stefanha@redhat.com
Cc: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit c21eae1ccc782440f320accb6f90c66cb8f45ee9)
Conflicts:
- Downstream lacks commit 28ff7b4dfbb5 ("block/blkio: convert to
blk_io_plug_call() API") so keep the .bdrv_co_io_unplug callback.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block/blkio.c | 118 ++++++++++++++++++++++++++------------------------
1 file changed, 61 insertions(+), 57 deletions(-)
diff --git a/block/blkio.c b/block/blkio.c
index 6a6f20f923..afcec359f2 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -21,16 +21,6 @@
#include "block/block-io.h"
-/*
- * Keep the QEMU BlockDriver names identical to the libblkio driver names.
- * Using macros instead of typing out the string literals avoids typos.
- */
-#define DRIVER_IO_URING "io_uring"
-#define DRIVER_NVME_IO_URING "nvme-io_uring"
-#define DRIVER_VIRTIO_BLK_VFIO_PCI "virtio-blk-vfio-pci"
-#define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user"
-#define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa"
-
/*
* Allocated bounce buffers are kept in a list sorted by buffer address.
*/
@@ -743,15 +733,15 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
return ret;
}
- if (strcmp(blkio_driver, DRIVER_IO_URING) == 0) {
+ if (strcmp(blkio_driver, "io_uring") == 0) {
ret = blkio_io_uring_open(bs, options, flags, errp);
- } else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) {
+ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
ret = blkio_nvme_io_uring(bs, options, flags, errp);
- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VFIO_PCI) == 0) {
+ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) {
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) {
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
} else {
g_assert_not_reached();
@@ -1027,50 +1017,64 @@ static void blkio_refresh_limits(BlockDriverState *bs, Error **errp)
* - truncate
*/
-#define BLKIO_DRIVER(name, ...) \
- { \
- .format_name = name, \
- .protocol_name = name, \
- .instance_size = sizeof(BDRVBlkioState), \
- .bdrv_file_open = blkio_file_open, \
- .bdrv_close = blkio_close, \
- .bdrv_co_getlength = blkio_co_getlength, \
- .bdrv_co_truncate = blkio_truncate, \
- .bdrv_co_get_info = blkio_co_get_info, \
- .bdrv_attach_aio_context = blkio_attach_aio_context, \
- .bdrv_detach_aio_context = blkio_detach_aio_context, \
- .bdrv_co_pdiscard = blkio_co_pdiscard, \
- .bdrv_co_preadv = blkio_co_preadv, \
- .bdrv_co_pwritev = blkio_co_pwritev, \
- .bdrv_co_flush_to_disk = blkio_co_flush, \
- .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
- .bdrv_co_io_unplug = blkio_co_io_unplug, \
- .bdrv_refresh_limits = blkio_refresh_limits, \
- .bdrv_register_buf = blkio_register_buf, \
- .bdrv_unregister_buf = blkio_unregister_buf, \
- __VA_ARGS__ \
- }
-
-static BlockDriver bdrv_io_uring = BLKIO_DRIVER(
- DRIVER_IO_URING,
- .bdrv_needs_filename = true,
-);
-
-static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER(
- DRIVER_NVME_IO_URING,
-);
-
-static BlockDriver bdrv_virtio_blk_vfio_pci = BLKIO_DRIVER(
- DRIVER_VIRTIO_BLK_VFIO_PCI
-);
+/*
+ * Do not include .format_name and .protocol_name because module_block.py
+ * does not parse macros in the source code.
+ */
+#define BLKIO_DRIVER_COMMON \
+ .instance_size = sizeof(BDRVBlkioState), \
+ .bdrv_file_open = blkio_file_open, \
+ .bdrv_close = blkio_close, \
+ .bdrv_co_getlength = blkio_co_getlength, \
+ .bdrv_co_truncate = blkio_truncate, \
+ .bdrv_co_get_info = blkio_co_get_info, \
+ .bdrv_attach_aio_context = blkio_attach_aio_context, \
+ .bdrv_detach_aio_context = blkio_detach_aio_context, \
+ .bdrv_co_pdiscard = blkio_co_pdiscard, \
+ .bdrv_co_preadv = blkio_co_preadv, \
+ .bdrv_co_pwritev = blkio_co_pwritev, \
+ .bdrv_co_flush_to_disk = blkio_co_flush, \
+ .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
+ .bdrv_co_io_unplug = blkio_co_io_unplug, \
+ .bdrv_refresh_limits = blkio_refresh_limits, \
+ .bdrv_register_buf = blkio_register_buf, \
+ .bdrv_unregister_buf = blkio_unregister_buf,
-static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER(
- DRIVER_VIRTIO_BLK_VHOST_USER
-);
+/*
+ * Use the same .format_name and .protocol_name as the libblkio driver name for
+ * consistency.
+ */
-static BlockDriver bdrv_virtio_blk_vhost_vdpa = BLKIO_DRIVER(
- DRIVER_VIRTIO_BLK_VHOST_VDPA
-);
+static BlockDriver bdrv_io_uring = {
+ .format_name = "io_uring",
+ .protocol_name = "io_uring",
+ .bdrv_needs_filename = true,
+ BLKIO_DRIVER_COMMON
+};
+
+static BlockDriver bdrv_nvme_io_uring = {
+ .format_name = "nvme-io_uring",
+ .protocol_name = "nvme-io_uring",
+ BLKIO_DRIVER_COMMON
+};
+
+static BlockDriver bdrv_virtio_blk_vfio_pci = {
+ .format_name = "virtio-blk-vfio-pci",
+ .protocol_name = "virtio-blk-vfio-pci",
+ BLKIO_DRIVER_COMMON
+};
+
+static BlockDriver bdrv_virtio_blk_vhost_user = {
+ .format_name = "virtio-blk-vhost-user",
+ .protocol_name = "virtio-blk-vhost-user",
+ BLKIO_DRIVER_COMMON
+};
+
+static BlockDriver bdrv_virtio_blk_vhost_vdpa = {
+ .format_name = "virtio-blk-vhost-vdpa",
+ .protocol_name = "virtio-blk-vhost-vdpa",
+ BLKIO_DRIVER_COMMON
+};
static void bdrv_blkio_init(void)
{
--
2.39.3

@ -1,151 +0,0 @@
From 458c33c9f19ed01beeb9b2b494ce6ed10d2ed4ac Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Thu, 27 Jul 2023 18:10:17 +0200
Subject: [PATCH 03/14] block/blkio: move blkio_connect() in the drivers
functions
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
RH-Bugzilla: 2225354 2225439
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Alberto Faria <None>
RH-Commit: [3/6] c356108d7dfe1ba2098c094f8d12b6e40853560c (sgarzarella/qemu-kvm-c-9-s)
This is in preparation for the next patch, where for virtio-blk
drivers we need to handle the failure of blkio_connect().
Let's also rename the *_open() functions to *_connect() to make
the code reflect the changes applied.
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20230727161020.84213-2-sgarzare@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit 69785d66ae1ec43f77fc65109a21721992bead9f)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/blkio.c | 67 ++++++++++++++++++++++++++++++---------------------
1 file changed, 40 insertions(+), 27 deletions(-)
diff --git a/block/blkio.c b/block/blkio.c
index 5a82c6cb1a..85d1eed5fb 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -602,8 +602,8 @@ static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size)
}
}
-static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
- Error **errp)
+static int blkio_io_uring_connect(BlockDriverState *bs, QDict *options,
+ int flags, Error **errp)
{
const char *filename = qdict_get_str(options, "filename");
BDRVBlkioState *s = bs->opaque;
@@ -626,11 +626,18 @@ static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
}
}
+ ret = blkio_connect(s->blkio);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
+ blkio_get_error_msg());
+ return ret;
+ }
+
return 0;
}
-static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags,
- Error **errp)
+static int blkio_nvme_io_uring_connect(BlockDriverState *bs, QDict *options,
+ int flags, Error **errp)
{
const char *path = qdict_get_try_str(options, "path");
BDRVBlkioState *s = bs->opaque;
@@ -654,11 +661,18 @@ static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags,
return -EINVAL;
}
+ ret = blkio_connect(s->blkio);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
+ blkio_get_error_msg());
+ return ret;
+ }
+
return 0;
}
-static int blkio_virtio_blk_common_open(BlockDriverState *bs,
- QDict *options, int flags, Error **errp)
+static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
+ int flags, Error **errp)
{
const char *path = qdict_get_try_str(options, "path");
BDRVBlkioState *s = bs->opaque;
@@ -717,6 +731,13 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs,
}
}
+ ret = blkio_connect(s->blkio);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
+ blkio_get_error_msg());
+ return ret;
+ }
+
qdict_del(options, "path");
return 0;
@@ -736,24 +757,6 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
return ret;
}
- if (strcmp(blkio_driver, "io_uring") == 0) {
- ret = blkio_io_uring_open(bs, options, flags, errp);
- } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
- ret = blkio_nvme_io_uring(bs, options, flags, errp);
- } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
- ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
- } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
- ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
- } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
- ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
- } else {
- g_assert_not_reached();
- }
- if (ret < 0) {
- blkio_destroy(&s->blkio);
- return ret;
- }
-
if (!(flags & BDRV_O_RDWR)) {
ret = blkio_set_bool(s->blkio, "read-only", true);
if (ret < 0) {
@@ -764,10 +767,20 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
}
}
- ret = blkio_connect(s->blkio);
+ if (strcmp(blkio_driver, "io_uring") == 0) {
+ ret = blkio_io_uring_connect(bs, options, flags, errp);
+ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
+ ret = blkio_nvme_io_uring_connect(bs, options, flags, errp);
+ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
+ ret = blkio_virtio_blk_connect(bs, options, flags, errp);
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
+ ret = blkio_virtio_blk_connect(bs, options, flags, errp);
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
+ ret = blkio_virtio_blk_connect(bs, options, flags, errp);
+ } else {
+ g_assert_not_reached();
+ }
if (ret < 0) {
- error_setg_errno(errp, -ret, "blkio_connect failed: %s",
- blkio_get_error_msg());
blkio_destroy(&s->blkio);
return ret;
}
--
2.39.3

@ -1,85 +0,0 @@
From ece855a71d9234c58497f37cb5498f507742167d Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Thu, 27 Jul 2023 18:10:18 +0200
Subject: [PATCH 04/14] block/blkio: retry blkio_connect() if it fails using
`fd`
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
RH-Bugzilla: 2225354 2225439
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Alberto Faria <None>
RH-Commit: [4/6] 14ebc1f333617ce22c68693dec1c9a186d4f8a08 (sgarzarella/qemu-kvm-c-9-s)
libblkio 1.3.0 added support of "fd" property for virtio-blk-vhost-vdpa
driver. In QEMU, starting from commit cad2ccc395 ("block/blkio: use
qemu_open() to support fd passing for virtio-blk") we are using
`blkio_get_int(..., "fd")` to check if the "fd" property is supported
for all the virtio-blk-* driver.
Unfortunately that property is also available for those driver that do
not support it, such as virtio-blk-vhost-user.
So, `blkio_get_int()` is not enough to check whether the driver supports
the `fd` property or not. This is because the virito-blk common libblkio
driver only checks whether or not `fd` is set during `blkio_connect()`
and fails with -EINVAL for those transports that do not support it
(all except vhost-vdpa for now).
So let's handle the `blkio_connect()` failure, retrying it using `path`
directly.
Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk")
Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20230727161020.84213-3-sgarzare@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit 809c319f8a089fbc49223dc29e1cc2b978beeada)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/blkio.c | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/block/blkio.c b/block/blkio.c
index 85d1eed5fb..93a8f8fc5c 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -732,6 +732,35 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
}
ret = blkio_connect(s->blkio);
+ /*
+ * If the libblkio driver doesn't support the `fd` property, blkio_connect()
+ * will fail with -EINVAL. So let's try calling blkio_connect() again by
+ * directly setting `path`.
+ */
+ if (fd_supported && ret == -EINVAL) {
+ qemu_close(fd);
+
+ /*
+ * We need to clear the `fd` property we set previously by setting
+ * it to -1.
+ */
+ ret = blkio_set_int(s->blkio, "fd", -1);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "failed to set fd: %s",
+ blkio_get_error_msg());
+ return ret;
+ }
+
+ ret = blkio_set_str(s->blkio, "path", path);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "failed to set path: %s",
+ blkio_get_error_msg());
+ return ret;
+ }
+
+ ret = blkio_connect(s->blkio);
+ }
+
if (ret < 0) {
error_setg_errno(errp, -ret, "blkio_connect failed: %s",
blkio_get_error_msg());
--
2.39.3

@ -1,49 +0,0 @@
From 2f4436e7cc2f63d198229dc8ba32783460c0b185 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Thu, 27 Jul 2023 18:10:20 +0200
Subject: [PATCH 06/14] block/blkio: use blkio_set_int("fd") to check fd
support
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
RH-Bugzilla: 2225354 2225439
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Alberto Faria <None>
RH-Commit: [6/6] d57aafb2c3a8ed13aa3c6dcce5525a9cc8f5aa21 (sgarzarella/qemu-kvm-c-9-s)
Setting the `fd` property fails with virtio-blk-* libblkio drivers
that do not support fd passing since
https://gitlab.com/libblkio/libblkio/-/merge_requests/208.
Getting the `fd` property, on the other hand, always succeeds for
virtio-blk-* libblkio drivers even when they don't support fd passing.
This patch switches to setting the `fd` property because it is a
better mechanism for probing fd passing support than getting the `fd`
property.
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20230727161020.84213-5-sgarzare@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit 1c38fe69e2b8a05c1762b122292fa7e3662f06fd)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/blkio.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/blkio.c b/block/blkio.c
index eef80e9ce5..8defbf744f 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -689,7 +689,7 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
return -EINVAL;
}
- if (blkio_get_int(s->blkio, "fd", &fd) == 0) {
+ if (blkio_set_int(s->blkio, "fd", -1) == 0) {
fd_supported = true;
}
--
2.39.3

@ -1,108 +0,0 @@
From fd57241cf0f8c2906fa56118f8da1e65a5b1e4d8 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Tue, 30 May 2023 09:19:40 +0200
Subject: [PATCH 3/5] block/blkio: use qemu_open() to support fd passing for
virtio-blk
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 169: block/blkio: support fd passing for virtio-blk-vhost-vdpa driver
RH-Bugzilla: 2180076
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [1/2] 9ff1a1510500db101648341207a36318a0c41c5a (sgarzarella/qemu-kvm-c-9-s)
Some virtio-blk drivers (e.g. virtio-blk-vhost-vdpa) supports the fd
passing. Let's expose this to the user, so the management layer
can pass the file descriptor of an already opened path.
If the libblkio virtio-blk driver supports fd passing, let's always
use qemu_open() to open the `path`, so we can handle fd passing
from the management layer through the "/dev/fdset/N" special path.
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20230530071941.8954-2-sgarzare@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit cad2ccc395c7113fb30bc9390774b67b34f06c68)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/blkio.c | 53 ++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 44 insertions(+), 9 deletions(-)
diff --git a/block/blkio.c b/block/blkio.c
index 0cdc99a729..6a6f20f923 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -672,25 +672,60 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs,
{
const char *path = qdict_get_try_str(options, "path");
BDRVBlkioState *s = bs->opaque;
- int ret;
+ bool fd_supported = false;
+ int fd, ret;
if (!path) {
error_setg(errp, "missing 'path' option");
return -EINVAL;
}
- ret = blkio_set_str(s->blkio, "path", path);
- qdict_del(options, "path");
- if (ret < 0) {
- error_setg_errno(errp, -ret, "failed to set path: %s",
- blkio_get_error_msg());
- return ret;
- }
-
if (!(flags & BDRV_O_NOCACHE)) {
error_setg(errp, "cache.direct=off is not supported");
return -EINVAL;
}
+
+ if (blkio_get_int(s->blkio, "fd", &fd) == 0) {
+ fd_supported = true;
+ }
+
+ /*
+ * If the libblkio driver supports fd passing, let's always use qemu_open()
+ * to open the `path`, so we can handle fd passing from the management
+ * layer through the "/dev/fdset/N" special path.
+ */
+ if (fd_supported) {
+ int open_flags;
+
+ if (flags & BDRV_O_RDWR) {
+ open_flags = O_RDWR;
+ } else {
+ open_flags = O_RDONLY;
+ }
+
+ fd = qemu_open(path, open_flags, errp);
+ if (fd < 0) {
+ return -EINVAL;
+ }
+
+ ret = blkio_set_int(s->blkio, "fd", fd);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "failed to set fd: %s",
+ blkio_get_error_msg());
+ qemu_close(fd);
+ return ret;
+ }
+ } else {
+ ret = blkio_set_str(s->blkio, "path", path);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "failed to set path: %s",
+ blkio_get_error_msg());
+ return ret;
+ }
+ }
+
+ qdict_del(options, "path");
+
return 0;
}
--
2.39.3

@ -1,121 +0,0 @@
From d9190117f3c701380701d6e9b2aa3c2446b9708f Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Mon, 1 May 2023 13:34:43 -0400
Subject: [PATCH 01/21] block: compile out assert_bdrv_graph_readable() by
default
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 166: block/graph-lock: Disable locking for now
RH-Bugzilla: 2186725
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [1/4] d8cb4bb832c85e8216d97e57679a34c7bc6a8f71 (kmwolf/centos-qemu-kvm)
reader_count() is a performance bottleneck because the global
aio_context_list_lock mutex causes thread contention. Put this debugging
assertion behind a new ./configure --enable-debug-graph-lock option and
disable it by default.
The --enable-debug-graph-lock option is also enabled by the more general
--enable-debug option.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20230501173443.153062-1-stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 58a2e3f5c37be02dac3086b81bdda9414b931edf)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/graph-lock.c | 3 +++
configure | 1 +
meson.build | 2 ++
meson_options.txt | 2 ++
scripts/meson-buildoptions.sh | 4 ++++
5 files changed, 12 insertions(+)
diff --git a/block/graph-lock.c b/block/graph-lock.c
index 454c31e691..259a7a0bde 100644
--- a/block/graph-lock.c
+++ b/block/graph-lock.c
@@ -265,7 +265,10 @@ void bdrv_graph_rdunlock_main_loop(void)
void assert_bdrv_graph_readable(void)
{
+ /* reader_count() is slow due to aio_context_list_lock lock contention */
+#ifdef CONFIG_DEBUG_GRAPH_LOCK
assert(qemu_in_main_thread() || reader_count());
+#endif
}
void assert_bdrv_graph_writable(void)
diff --git a/configure b/configure
index 800b5850f4..a62a3e6be9 100755
--- a/configure
+++ b/configure
@@ -806,6 +806,7 @@ for opt do
--enable-debug)
# Enable debugging options that aren't excessively noisy
debug_tcg="yes"
+ meson_option_parse --enable-debug-graph-lock ""
meson_option_parse --enable-debug-mutex ""
meson_option_add -Doptimization=0
fortify_source="no"
diff --git a/meson.build b/meson.build
index c44d05a13f..d964e741e7 100644
--- a/meson.build
+++ b/meson.build
@@ -1956,6 +1956,7 @@ if get_option('debug_stack_usage') and have_coroutine_pool
have_coroutine_pool = false
endif
config_host_data.set10('CONFIG_COROUTINE_POOL', have_coroutine_pool)
+config_host_data.set('CONFIG_DEBUG_GRAPH_LOCK', get_option('debug_graph_lock'))
config_host_data.set('CONFIG_DEBUG_MUTEX', get_option('debug_mutex'))
config_host_data.set('CONFIG_DEBUG_STACK_USAGE', get_option('debug_stack_usage'))
config_host_data.set('CONFIG_GPROF', get_option('gprof'))
@@ -3833,6 +3834,7 @@ summary_info += {'PIE': get_option('b_pie')}
summary_info += {'static build': config_host.has_key('CONFIG_STATIC')}
summary_info += {'malloc trim support': has_malloc_trim}
summary_info += {'membarrier': have_membarrier}
+summary_info += {'debug graph lock': get_option('debug_graph_lock')}
summary_info += {'debug stack usage': get_option('debug_stack_usage')}
summary_info += {'mutex debugging': get_option('debug_mutex')}
summary_info += {'memory allocator': get_option('malloc')}
diff --git a/meson_options.txt b/meson_options.txt
index fc9447d267..bc857fe68b 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -311,6 +311,8 @@ option('rng_none', type: 'boolean', value: false,
description: 'dummy RNG, avoid using /dev/(u)random and getrandom()')
option('coroutine_pool', type: 'boolean', value: true,
description: 'coroutine freelist (better performance)')
+option('debug_graph_lock', type: 'boolean', value: false,
+ description: 'graph lock debugging support')
option('debug_mutex', type: 'boolean', value: false,
description: 'mutex debugging support')
option('debug_stack_usage', type: 'boolean', value: false,
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 009fab1515..30e1f25259 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -21,6 +21,8 @@ meson_options_help() {
printf "%s\n" ' QEMU'
printf "%s\n" ' --enable-cfi Control-Flow Integrity (CFI)'
printf "%s\n" ' --enable-cfi-debug Verbose errors in case of CFI violation'
+ printf "%s\n" ' --enable-debug-graph-lock'
+ printf "%s\n" ' graph lock debugging support'
printf "%s\n" ' --enable-debug-mutex mutex debugging support'
printf "%s\n" ' --enable-debug-stack-usage'
printf "%s\n" ' measure coroutine stack usage'
@@ -249,6 +251,8 @@ _meson_option_parse() {
--datadir=*) quote_sh "-Ddatadir=$2" ;;
--enable-dbus-display) printf "%s" -Ddbus_display=enabled ;;
--disable-dbus-display) printf "%s" -Ddbus_display=disabled ;;
+ --enable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=true ;;
+ --disable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=false ;;
--enable-debug-mutex) printf "%s" -Ddebug_mutex=true ;;
--disable-debug-mutex) printf "%s" -Ddebug_mutex=false ;;
--enable-debug-stack-usage) printf "%s" -Ddebug_stack_usage=true ;;
--
2.39.3

@ -0,0 +1,69 @@
From b1a68aebadecd7d339cf5eaffeda15099c998528 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 12 Sep 2023 19:10:37 -0400
Subject: [PATCH 095/101] block-coroutine-wrapper: use
qemu_get_current_aio_context()
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 214: Remove AioContext lock
RH-Jira: RHEL-15965
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [26/26] cde767bcdc626e90721792e3889952057a548ac5 (kmwolf/centos-qemu-kvm)
Use qemu_get_current_aio_context() in mixed wrappers and coroutine
wrappers so that code runs in the caller's AioContext instead of moving
to the BlockDriverState's AioContext. This change is necessary for the
multi-queue block layer where any thread can call into the block layer.
Most wrappers are IO_CODE where it's safe to use the current AioContext
nowadays. BlockDrivers and the core block layer use their own locks and
no longer depend on the AioContext lock for thread-safety.
The bdrv_create() wrapper invokes GLOBAL_STATE code. Using the current
AioContext is safe because this code is only called with the BQL held
from the main loop thread.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-ID: <20230912231037.826804-6-stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
scripts/block-coroutine-wrapper.py | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py
index c9c09fcacd..dbbde99e39 100644
--- a/scripts/block-coroutine-wrapper.py
+++ b/scripts/block-coroutine-wrapper.py
@@ -92,8 +92,6 @@ def __init__(self, wrapper_type: str, return_type: str, name: str,
f"{self.name}")
self.target_name = f'{subsystem}_{subname}'
- self.ctx = self.gen_ctx()
-
self.get_result = 's->ret = '
self.ret = 'return s.ret;'
self.co_ret = 'return '
@@ -167,7 +165,7 @@ def create_mixed_wrapper(func: FuncDecl) -> str:
{func.co_ret}{name}({ func.gen_list('{name}') });
}} else {{
{struct_name} s = {{
- .poll_state.ctx = {func.ctx},
+ .poll_state.ctx = qemu_get_current_aio_context(),
.poll_state.in_progress = true,
{ func.gen_block(' .{name} = {name},') }
@@ -191,7 +189,7 @@ def create_co_wrapper(func: FuncDecl) -> str:
{func.return_type} {func.name}({ func.gen_list('{decl}') })
{{
{struct_name} s = {{
- .poll_state.ctx = {func.ctx},
+ .poll_state.ctx = qemu_get_current_aio_context(),
.poll_state.in_progress = true,
{ func.gen_block(' .{name} = {name},') }
--
2.39.3

@ -0,0 +1,217 @@
From 25cce5df341861e8ba8ec57722558e2dee3ce56a Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 14 Sep 2023 10:00:58 -0400
Subject: [PATCH 073/101] block/file-posix: set up Linux AIO and io_uring in
the current thread
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 214: Remove AioContext lock
RH-Jira: RHEL-15965
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [4/26] 74c7daf805daefe706378308c3afeb28d861164b (kmwolf/centos-qemu-kvm)
The file-posix block driver currently only sets up Linux AIO and
io_uring in the BDS's AioContext. In the multi-queue block layer we must
be able to submit I/O requests in AioContexts that do not have Linux AIO
and io_uring set up yet since any thread can call into the block driver.
Set up Linux AIO and io_uring for the current AioContext during request
submission. We lose the ability to return an error from
.bdrv_file_open() when Linux AIO and io_uring setup fails (e.g. due to
resource limits). Instead the user only gets warnings and we fall back
to aio=threads. This is still better than a fatal error after startup.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-ID: <20230914140101.1065008-2-stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/file-posix.c | 103 ++++++++++++++++++++++-----------------------
1 file changed, 51 insertions(+), 52 deletions(-)
diff --git a/block/file-posix.c b/block/file-posix.c
index b862406c71..35684f7e21 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -712,17 +712,11 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
#ifdef CONFIG_LINUX_AIO
/* Currently Linux does AIO only for files opened with O_DIRECT */
- if (s->use_linux_aio) {
- if (!(s->open_flags & O_DIRECT)) {
- error_setg(errp, "aio=native was specified, but it requires "
- "cache.direct=on, which was not specified.");
- ret = -EINVAL;
- goto fail;
- }
- if (!aio_setup_linux_aio(bdrv_get_aio_context(bs), errp)) {
- error_prepend(errp, "Unable to use native AIO: ");
- goto fail;
- }
+ if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) {
+ error_setg(errp, "aio=native was specified, but it requires "
+ "cache.direct=on, which was not specified.");
+ ret = -EINVAL;
+ goto fail;
}
#else
if (s->use_linux_aio) {
@@ -733,14 +727,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
}
#endif /* !defined(CONFIG_LINUX_AIO) */
-#ifdef CONFIG_LINUX_IO_URING
- if (s->use_linux_io_uring) {
- if (!aio_setup_linux_io_uring(bdrv_get_aio_context(bs), errp)) {
- error_prepend(errp, "Unable to use io_uring: ");
- goto fail;
- }
- }
-#else
+#ifndef CONFIG_LINUX_IO_URING
if (s->use_linux_io_uring) {
error_setg(errp, "aio=io_uring was specified, but is not supported "
"in this build.");
@@ -2444,6 +2431,48 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
return true;
}
+#ifdef CONFIG_LINUX_IO_URING
+static inline bool raw_check_linux_io_uring(BDRVRawState *s)
+{
+ Error *local_err = NULL;
+ AioContext *ctx;
+
+ if (!s->use_linux_io_uring) {
+ return false;
+ }
+
+ ctx = qemu_get_current_aio_context();
+ if (unlikely(!aio_setup_linux_io_uring(ctx, &local_err))) {
+ error_reportf_err(local_err, "Unable to use linux io_uring, "
+ "falling back to thread pool: ");
+ s->use_linux_io_uring = false;
+ return false;
+ }
+ return true;
+}
+#endif
+
+#ifdef CONFIG_LINUX_AIO
+static inline bool raw_check_linux_aio(BDRVRawState *s)
+{
+ Error *local_err = NULL;
+ AioContext *ctx;
+
+ if (!s->use_linux_aio) {
+ return false;
+ }
+
+ ctx = qemu_get_current_aio_context();
+ if (unlikely(!aio_setup_linux_aio(ctx, &local_err))) {
+ error_reportf_err(local_err, "Unable to use Linux AIO, "
+ "falling back to thread pool: ");
+ s->use_linux_aio = false;
+ return false;
+ }
+ return true;
+}
+#endif
+
static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
uint64_t bytes, QEMUIOVector *qiov, int type)
{
@@ -2474,13 +2503,13 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) {
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_IO_URING
- } else if (s->use_linux_io_uring) {
+ } else if (raw_check_linux_io_uring(s)) {
assert(qiov->size == bytes);
ret = luring_co_submit(bs, s->fd, offset, qiov, type);
goto out;
#endif
#ifdef CONFIG_LINUX_AIO
- } else if (s->use_linux_aio) {
+ } else if (raw_check_linux_aio(s)) {
assert(qiov->size == bytes);
ret = laio_co_submit(s->fd, offset, qiov, type,
s->aio_max_batch);
@@ -2567,39 +2596,13 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
};
#ifdef CONFIG_LINUX_IO_URING
- if (s->use_linux_io_uring) {
+ if (raw_check_linux_io_uring(s)) {
return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
}
#endif
return raw_thread_pool_submit(handle_aiocb_flush, &acb);
}
-static void raw_aio_attach_aio_context(BlockDriverState *bs,
- AioContext *new_context)
-{
- BDRVRawState __attribute__((unused)) *s = bs->opaque;
-#ifdef CONFIG_LINUX_AIO
- if (s->use_linux_aio) {
- Error *local_err = NULL;
- if (!aio_setup_linux_aio(new_context, &local_err)) {
- error_reportf_err(local_err, "Unable to use native AIO, "
- "falling back to thread pool: ");
- s->use_linux_aio = false;
- }
- }
-#endif
-#ifdef CONFIG_LINUX_IO_URING
- if (s->use_linux_io_uring) {
- Error *local_err = NULL;
- if (!aio_setup_linux_io_uring(new_context, &local_err)) {
- error_reportf_err(local_err, "Unable to use linux io_uring, "
- "falling back to thread pool: ");
- s->use_linux_io_uring = false;
- }
- }
-#endif
-}
-
static void raw_close(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
@@ -3896,7 +3899,6 @@ BlockDriver bdrv_file = {
.bdrv_co_copy_range_from = raw_co_copy_range_from,
.bdrv_co_copy_range_to = raw_co_copy_range_to,
.bdrv_refresh_limits = raw_refresh_limits,
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
.bdrv_co_truncate = raw_co_truncate,
.bdrv_co_getlength = raw_co_getlength,
@@ -4266,7 +4268,6 @@ static BlockDriver bdrv_host_device = {
.bdrv_co_copy_range_from = raw_co_copy_range_from,
.bdrv_co_copy_range_to = raw_co_copy_range_to,
.bdrv_refresh_limits = raw_refresh_limits,
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
.bdrv_co_truncate = raw_co_truncate,
.bdrv_co_getlength = raw_co_getlength,
@@ -4402,7 +4403,6 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_co_pwritev = raw_co_pwritev,
.bdrv_co_flush_to_disk = raw_co_flush_to_disk,
.bdrv_refresh_limits = cdrom_refresh_limits,
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
.bdrv_co_truncate = raw_co_truncate,
.bdrv_co_getlength = raw_co_getlength,
@@ -4528,7 +4528,6 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_co_pwritev = raw_co_pwritev,
.bdrv_co_flush_to_disk = raw_co_flush_to_disk,
.bdrv_refresh_limits = cdrom_refresh_limits,
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
.bdrv_co_truncate = raw_co_truncate,
.bdrv_co_getlength = raw_co_getlength,
--
2.39.3

File diff suppressed because it is too large Load Diff

@ -0,0 +1,97 @@
From d0514c7d5d6cc1aa140119c95d5ea2c1591b01e9 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 5 Dec 2023 13:20:04 -0500
Subject: [PATCH 087/101] block: remove bdrv_co_lock()
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 214: Remove AioContext lock
RH-Jira: RHEL-15965
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [18/26] a303f861ea5e84d8e89fd51e530fd0cb2da17b89 (kmwolf/centos-qemu-kvm)
The bdrv_co_lock() and bdrv_co_unlock() functions are already no-ops.
Remove them.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-ID: <20231205182011.1976568-8-stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block.c | 10 ----------
blockdev.c | 5 -----
include/block/block-global-state.h | 14 --------------
3 files changed, 29 deletions(-)
diff --git a/block.c b/block.c
index 91ace5d2d5..434b7f4d72 100644
--- a/block.c
+++ b/block.c
@@ -7431,16 +7431,6 @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx)
bdrv_dec_in_flight(bs);
}
-void coroutine_fn bdrv_co_lock(BlockDriverState *bs)
-{
- /* TODO removed in next patch */
-}
-
-void coroutine_fn bdrv_co_unlock(BlockDriverState *bs)
-{
- /* TODO removed in next patch */
-}
-
static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
{
GLOBAL_STATE_CODE();
diff --git a/blockdev.c b/blockdev.c
index 5d8b3a23eb..3a5e7222ec 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2264,18 +2264,13 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
return;
}
- bdrv_co_lock(bs);
bdrv_drained_begin(bs);
- bdrv_co_unlock(bs);
old_ctx = bdrv_co_enter(bs);
blk_co_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp);
bdrv_co_leave(bs, old_ctx);
- bdrv_co_lock(bs);
bdrv_drained_end(bs);
- bdrv_co_unlock(bs);
-
blk_co_unref(blk);
}
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
index 0327f1c605..4ec0b217f0 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
@@ -267,20 +267,6 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
-/**
- * Locks the AioContext of @bs if it's not the current AioContext. This avoids
- * double locking which could lead to deadlocks: This is a coroutine_fn, so we
- * know we already own the lock of the current AioContext.
- *
- * May only be called in the main thread.
- */
-void coroutine_fn bdrv_co_lock(BlockDriverState *bs);
-
-/**
- * Unlocks the AioContext of @bs if it's not the current AioContext.
- */
-void coroutine_fn bdrv_co_unlock(BlockDriverState *bs);
-
bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx,
GHashTable *visited, Transaction *tran,
Error **errp);
--
2.39.3

@ -0,0 +1,411 @@
From dc4eb64185957a01948217814478abc450ce5f26 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 5 Dec 2023 13:20:11 -0500
Subject: [PATCH 094/101] block: remove outdated AioContext locking comments
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 214: Remove AioContext lock
RH-Jira: RHEL-15965
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [25/26] 395e18fb40d28d4bc961acee1a00da7f60748076 (kmwolf/centos-qemu-kvm)
The AioContext lock no longer exists.
There is one noteworthy change:
- * More specifically, these functions use BDRV_POLL_WHILE(bs), which
- * requires the caller to be either in the main thread and hold
- * the BlockdriverState (bs) AioContext lock, or directly in the
- * home thread that runs the bs AioContext. Calling them from
- * another thread in another AioContext would cause deadlocks.
+ * More specifically, these functions use BDRV_POLL_WHILE(bs), which requires
+ * the caller to be either in the main thread or directly in the home thread
+ * that runs the bs AioContext. Calling them from another thread in another
+ * AioContext would cause deadlocks.
I am not sure whether deadlocks are still possible. Maybe they have just
moved to the fine-grained locks that have replaced the AioContext. Since
I am not sure if the deadlocks are gone, I have kept the substance
unchanged and just removed mention of the AioContext.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-ID: <20231205182011.1976568-15-stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block.c | 73 ++++++----------------------
block/block-backend.c | 8 ---
block/export/vhost-user-blk-server.c | 4 --
include/block/block-common.h | 3 --
include/block/block-io.h | 9 ++--
include/block/block_int-common.h | 2 -
tests/qemu-iotests/202 | 2 +-
tests/qemu-iotests/203 | 3 +-
8 files changed, 22 insertions(+), 82 deletions(-)
diff --git a/block.c b/block.c
index 434b7f4d72..a097772238 100644
--- a/block.c
+++ b/block.c
@@ -1616,11 +1616,6 @@ out:
g_free(gen_node_name);
}
-/*
- * The caller must always hold @bs AioContext lock, because this function calls
- * bdrv_refresh_total_sectors() which polls when called from non-coroutine
- * context.
- */
static int no_coroutine_fn GRAPH_UNLOCKED
bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name,
QDict *options, int open_flags, Error **errp)
@@ -2901,7 +2896,7 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
* Replaces the node that a BdrvChild points to without updating permissions.
*
* If @new_bs is non-NULL, the parent of @child must already be drained through
- * @child and the caller must hold the AioContext lock for @new_bs.
+ * @child.
*/
static void GRAPH_WRLOCK
bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs)
@@ -3041,9 +3036,8 @@ static TransactionActionDrv bdrv_attach_child_common_drv = {
*
* Returns new created child.
*
- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
- * @child_bs can move to a different AioContext in this function. Callers must
- * make sure that their AioContext locking is still correct after this.
+ * Both @parent_bs and @child_bs can move to a different AioContext in this
+ * function.
*/
static BdrvChild * GRAPH_WRLOCK
bdrv_attach_child_common(BlockDriverState *child_bs,
@@ -3142,9 +3136,8 @@ bdrv_attach_child_common(BlockDriverState *child_bs,
/*
* Function doesn't update permissions, caller is responsible for this.
*
- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
- * @child_bs can move to a different AioContext in this function. Callers must
- * make sure that their AioContext locking is still correct after this.
+ * Both @parent_bs and @child_bs can move to a different AioContext in this
+ * function.
*
* After calling this function, the transaction @tran may only be completed
* while holding a writer lock for the graph.
@@ -3184,9 +3177,6 @@ bdrv_attach_child_noperm(BlockDriverState *parent_bs,
*
* On failure NULL is returned, errp is set and the reference to
* child_bs is also dropped.
- *
- * The caller must hold the AioContext lock @child_bs, but not that of @ctx
- * (unless @child_bs is already in @ctx).
*/
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
const char *child_name,
@@ -3226,9 +3216,6 @@ out:
*
* On failure NULL is returned, errp is set and the reference to
* child_bs is also dropped.
- *
- * If @parent_bs and @child_bs are in different AioContexts, the caller must
- * hold the AioContext lock for @child_bs, but not for @parent_bs.
*/
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
@@ -3418,9 +3405,8 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
*
* Function doesn't update permissions, caller is responsible for this.
*
- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
- * @child_bs can move to a different AioContext in this function. Callers must
- * make sure that their AioContext locking is still correct after this.
+ * Both @parent_bs and @child_bs can move to a different AioContext in this
+ * function.
*
* After calling this function, the transaction @tran may only be completed
* while holding a writer lock for the graph.
@@ -3513,9 +3499,8 @@ out:
}
/*
- * The caller must hold the AioContext lock for @backing_hd. Both @bs and
- * @backing_hd can move to a different AioContext in this function. Callers must
- * make sure that their AioContext locking is still correct after this.
+ * Both @bs and @backing_hd can move to a different AioContext in this
+ * function.
*
* If a backing child is already present (i.e. we're detaching a node), that
* child node must be drained.
@@ -3574,8 +3559,6 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
* itself, all options starting with "${bdref_key}." are considered part of the
* BlockdevRef.
*
- * The caller must hold the main AioContext lock.
- *
* TODO Can this be unified with bdrv_open_image()?
*/
int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
@@ -3745,9 +3728,7 @@ done:
*
* The BlockdevRef will be removed from the options QDict.
*
- * The caller must hold the lock of the main AioContext and no other AioContext.
- * @parent can move to a different AioContext in this function. Callers must
- * make sure that their AioContext locking is still correct after this.
+ * @parent can move to a different AioContext in this function.
*/
BdrvChild *bdrv_open_child(const char *filename,
QDict *options, const char *bdref_key,
@@ -3778,9 +3759,7 @@ BdrvChild *bdrv_open_child(const char *filename,
/*
* Wrapper on bdrv_open_child() for most popular case: open primary child of bs.
*
- * The caller must hold the lock of the main AioContext and no other AioContext.
- * @parent can move to a different AioContext in this function. Callers must
- * make sure that their AioContext locking is still correct after this.
+ * @parent can move to a different AioContext in this function.
*/
int bdrv_open_file_child(const char *filename,
QDict *options, const char *bdref_key,
@@ -3923,8 +3902,6 @@ out:
* The reference parameter may be used to specify an existing block device which
* should be opened. If specified, neither options nor a filename may be given,
* nor can an existing BDS be reused (that is, *pbs has to be NULL).
- *
- * The caller must always hold the main AioContext lock.
*/
static BlockDriverState * no_coroutine_fn
bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
@@ -4217,7 +4194,6 @@ close_and_fail:
return NULL;
}
-/* The caller must always hold the main AioContext lock. */
BlockDriverState *bdrv_open(const char *filename, const char *reference,
QDict *options, int flags, Error **errp)
{
@@ -4665,10 +4641,7 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
*
* Return 0 on success, otherwise return < 0 and set @errp.
*
- * The caller must hold the AioContext lock of @reopen_state->bs.
* @reopen_state->bs can move to a different AioContext in this function.
- * Callers must make sure that their AioContext locking is still correct after
- * this.
*/
static int GRAPH_UNLOCKED
bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
@@ -4801,8 +4774,6 @@ out_rdlock:
* It is the responsibility of the caller to then call the abort() or
* commit() for any other BDS that have been left in a prepare() state
*
- * The caller must hold the AioContext lock of @reopen_state->bs.
- *
* After calling this function, the transaction @change_child_tran may only be
* completed while holding a writer lock for the graph.
*/
@@ -5437,8 +5408,6 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
* child.
*
* This function does not create any image files.
- *
- * The caller must hold the AioContext lock for @bs_top.
*/
int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
Error **errp)
@@ -5545,9 +5514,8 @@ static void bdrv_delete(BlockDriverState *bs)
* after the call (even on failure), so if the caller intends to reuse the
* dictionary, it needs to use qobject_ref() before calling bdrv_open.
*
- * The caller holds the AioContext lock for @bs. It must make sure that @bs
- * stays in the same AioContext, i.e. @options must not refer to nodes in a
- * different AioContext.
+ * The caller must make sure that @bs stays in the same AioContext, i.e.
+ * @options must not refer to nodes in a different AioContext.
*/
BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
int flags, Error **errp)
@@ -7565,10 +7533,6 @@ static TransactionActionDrv set_aio_context = {
*
* Must be called from the main AioContext.
*
- * The caller must own the AioContext lock for the old AioContext of bs, but it
- * must not own the AioContext lock for new_context (unless new_context is the
- * same as the current context of bs).
- *
* @visited will accumulate all visited BdrvChild objects. The caller is
* responsible for freeing the list afterwards.
*/
@@ -7621,13 +7585,6 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
*
* If ignore_child is not NULL, that child (and its subgraph) will not
* be touched.
- *
- * This function still requires the caller to take the bs current
- * AioContext lock, otherwise draining will fail since AIO_WAIT_WHILE
- * assumes the lock is always held if bs is in another AioContext.
- * For the same reason, it temporarily also holds the new AioContext, since
- * bdrv_drained_end calls BDRV_POLL_WHILE that assumes the lock is taken too.
- * Therefore the new AioContext lock must not be taken by the caller.
*/
int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
BdrvChild *ignore_child, Error **errp)
@@ -7653,8 +7610,8 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
/*
* Linear phase: go through all callbacks collected in the transaction.
- * Run all callbacks collected in the recursion to switch all nodes
- * AioContext lock (transaction commit), or undo all changes done in the
+ * Run all callbacks collected in the recursion to switch every node's
+ * AioContext (transaction commit), or undo all changes done in the
* recursion (transaction abort).
*/
diff --git a/block/block-backend.c b/block/block-backend.c
index f412bed274..209eb07528 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -390,8 +390,6 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
* Both sets of permissions can be changed later using blk_set_perm().
*
* Return the new BlockBackend on success, null on failure.
- *
- * Callers must hold the AioContext lock of @bs.
*/
BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
uint64_t shared_perm, Error **errp)
@@ -416,8 +414,6 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
* Just as with bdrv_open(), after having called this function the reference to
* @options belongs to the block layer (even on failure).
*
- * Called without holding an AioContext lock.
- *
* TODO: Remove @filename and @flags; it should be possible to specify a whole
* BDS tree just by specifying the @options QDict (or @reference,
* alternatively). At the time of adding this function, this is not possible,
@@ -872,8 +868,6 @@ BlockBackend *blk_by_public(BlockBackendPublic *public)
/*
* Disassociates the currently associated BlockDriverState from @blk.
- *
- * The caller must hold the AioContext lock for the BlockBackend.
*/
void blk_remove_bs(BlockBackend *blk)
{
@@ -915,8 +909,6 @@ void blk_remove_bs(BlockBackend *blk)
/*
* Associates a new BlockDriverState with @blk.
- *
- * Callers must hold the AioContext lock of @bs.
*/
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
{
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
index 16f48388d3..50c358e8cd 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -278,7 +278,6 @@ static void vu_blk_exp_resize(void *opaque)
vu_config_change_msg(&vexp->vu_server.vu_dev);
}
-/* Called with vexp->export.ctx acquired */
static void vu_blk_drained_begin(void *opaque)
{
VuBlkExport *vexp = opaque;
@@ -287,7 +286,6 @@ static void vu_blk_drained_begin(void *opaque)
vhost_user_server_detach_aio_context(&vexp->vu_server);
}
-/* Called with vexp->export.blk AioContext acquired */
static void vu_blk_drained_end(void *opaque)
{
VuBlkExport *vexp = opaque;
@@ -300,8 +298,6 @@ static void vu_blk_drained_end(void *opaque)
* Ensures that bdrv_drained_begin() waits until in-flight requests complete
* and the server->co_trip coroutine has terminated. It will be restarted in
* vhost_user_server_attach_aio_context().
- *
- * Called with vexp->export.ctx acquired.
*/
static bool vu_blk_drained_poll(void *opaque)
{
diff --git a/include/block/block-common.h b/include/block/block-common.h
index d7599564db..a846023a09 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -70,9 +70,6 @@
* automatically takes the graph rdlock when calling the wrapped function. In
* the same way, no_co_wrapper_bdrv_wrlock functions automatically take the
* graph wrlock.
- *
- * If the first parameter of the function is a BlockDriverState, BdrvChild or
- * BlockBackend pointer, the AioContext lock for it is taken in the wrapper.
*/
#define no_co_wrapper
#define no_co_wrapper_bdrv_rdlock
diff --git a/include/block/block-io.h b/include/block/block-io.h
index 8eb39a858b..b49e0537dd 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -332,11 +332,10 @@ bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
* "I/O or GS" API functions. These functions can run without
* the BQL, but only in one specific iothread/main loop.
*
- * More specifically, these functions use BDRV_POLL_WHILE(bs), which
- * requires the caller to be either in the main thread and hold
- * the BlockdriverState (bs) AioContext lock, or directly in the
- * home thread that runs the bs AioContext. Calling them from
- * another thread in another AioContext would cause deadlocks.
+ * More specifically, these functions use BDRV_POLL_WHILE(bs), which requires
+ * the caller to be either in the main thread or directly in the home thread
+ * that runs the bs AioContext. Calling them from another thread in another
+ * AioContext would cause deadlocks.
*
* Therefore, these functions are not proper I/O, because they
* can't run in *any* iothreads, but only in a specific one.
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 4e31d161c5..151279d481 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -1192,8 +1192,6 @@ struct BlockDriverState {
/* The error object in use for blocking operations on backing_hd */
Error *backing_blocker;
- /* Protected by AioContext lock */
-
/*
* If we are reading a disk image, give its size in sectors.
* Generally read-only; it is written to by load_snapshot and
diff --git a/tests/qemu-iotests/202 b/tests/qemu-iotests/202
index b784dcd791..13304242e5 100755
--- a/tests/qemu-iotests/202
+++ b/tests/qemu-iotests/202
@@ -21,7 +21,7 @@
# Check that QMP 'transaction' blockdev-snapshot-sync with multiple drives on a
# single IOThread completes successfully. This particular command triggered a
# hang due to recursive AioContext locking and BDRV_POLL_WHILE(). Protect
-# against regressions.
+# against regressions even though the AioContext lock no longer exists.
import iotests
diff --git a/tests/qemu-iotests/203 b/tests/qemu-iotests/203
index ab80fd0e44..1ba878522b 100755
--- a/tests/qemu-iotests/203
+++ b/tests/qemu-iotests/203
@@ -21,7 +21,8 @@
# Check that QMP 'migrate' with multiple drives on a single IOThread completes
# successfully. This particular command triggered a hang in the source QEMU
# process due to recursive AioContext locking in bdrv_invalidate_all() and
-# BDRV_POLL_WHILE().
+# BDRV_POLL_WHILE(). Protect against regressions even though the AioContext
+# lock no longer exists.
import iotests
--
2.39.3

@ -1,55 +0,0 @@
From 961bc392ee60743344236ddd247ab646a0eec914 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 07/21] checkpatch: add qemu_bh_new/aio_bh_new checks
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
RH-Jira: RHEL-516
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [3/13] e0473487f0e3186c42559a5c36a8650f27ab26ae (jmaloy/jmaloy-qemu-kvm-2)
Jira: https://issues.redhat.com/browse/RHEL-516
Upstream: Merged
CVE: CVE-2023-2680
commit ef56ffbdd6b0605dc1e305611287b948c970e236
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Thu Apr 27 17:10:08 2023 -0400
checkpatch: add qemu_bh_new/aio_bh_new checks
Advise authors to use the _guarded versions of the APIs, instead.
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Message-Id: <20230427211013.2994127-4-alxndr@bu.edu>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
scripts/checkpatch.pl | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index d768171dcf..eeaec436eb 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2865,6 +2865,14 @@ sub process {
if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) {
ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr);
}
+# recommend qemu_bh_new_guarded instead of qemu_bh_new
+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) {
+ ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr);
+ }
+# recommend aio_bh_new_guarded instead of aio_bh_new
+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) {
+ ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr);
+ }
# check for module_init(), use category-specific init macros explicitly please
if ($line =~ /^module_init\s*\(/) {
ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr);
--
2.39.3

@ -0,0 +1,75 @@
From ac9dc8ea241ef6d3a0447d696620d4d4053b71bf Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Mon, 4 Dec 2023 11:42:59 -0500
Subject: [PATCH 080/101] dma-helpers: don't lock AioContext in dma_blk_cb()
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 214: Remove AioContext lock
RH-Jira: RHEL-15965
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [11/26] a8580463ba6aee4ca248c0b947b9e72bd9e87aab (kmwolf/centos-qemu-kvm)
Commit abfcd2760b3e ("dma-helpers: prevent dma_blk_cb() vs
dma_aio_cancel() race") acquired the AioContext lock inside dma_blk_cb()
to avoid a race with scsi_device_purge_requests() running in the main
loop thread.
The SCSI code no longer calls dma_aio_cancel() from the main loop thread
while I/O is running in the IOThread AioContext. Therefore it is no
longer necessary to take this lock to protect DMAAIOCB fields. The
->cb() function also does not require the lock because blk_aio_*() and
friends do not need the AioContext lock.
Both hw/ide/core.c and hw/ide/macio.c also call dma_blk_io() but don't
rely on it taking the AioContext lock, so this change is safe.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-ID: <20231204164259.1515217-5-stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
system/dma-helpers.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/system/dma-helpers.c b/system/dma-helpers.c
index 36211acc7e..528117f256 100644
--- a/system/dma-helpers.c
+++ b/system/dma-helpers.c
@@ -119,13 +119,12 @@ static void dma_blk_cb(void *opaque, int ret)
trace_dma_blk_cb(dbs, ret);
- aio_context_acquire(ctx);
dbs->acb = NULL;
dbs->offset += dbs->iov.size;
if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
dma_complete(dbs, ret);
- goto out;
+ return;
}
dma_blk_unmap(dbs);
@@ -168,7 +167,7 @@ static void dma_blk_cb(void *opaque, int ret)
trace_dma_map_wait(dbs);
dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
cpu_register_map_client(dbs->bh);
- goto out;
+ return;
}
if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
@@ -179,8 +178,6 @@ static void dma_blk_cb(void *opaque, int ret)
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
dma_blk_cb, dbs, dbs->io_func_opaque);
assert(dbs->acb);
-out:
- aio_context_release(ctx);
}
static void dma_aio_cancel(BlockAIOCB *acb)
--
2.39.3

@ -0,0 +1,228 @@
From 71aa0219f7c84cbf175eb2a091d48d5fd5daa40b Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Date: Tue, 21 Nov 2023 16:44:26 +0800
Subject: [PATCH 047/101] docs/devel: Add VFIO iommufd backend documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 211: IOMMUFD backend backport
RH-Jira: RHEL-19302 RHEL-21057
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
RH-Commit: [46/67] 6cf49d00e87788f894d690a985bb6798eae24505 (eauger1/centos-qemu-kvm)
Suggested-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
(cherry picked from commit 98dad2b01931f6064c6c4b48ca3c2a1d9f542cd8)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
MAINTAINERS | 1 +
docs/devel/index-internals.rst | 1 +
docs/devel/vfio-iommufd.rst | 166 +++++++++++++++++++++++++++++++++
3 files changed, 168 insertions(+)
create mode 100644 docs/devel/vfio-iommufd.rst
diff --git a/MAINTAINERS b/MAINTAINERS
index ca70bb4e64..0ddb20a35f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2176,6 +2176,7 @@ F: backends/iommufd.c
F: include/sysemu/iommufd.h
F: include/qemu/chardev_open.h
F: util/chardev_open.c
+F: docs/devel/vfio-iommufd.rst
vhost
M: Michael S. Tsirkin <mst@redhat.com>
diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst
index 6f81df92bc..3def4a138b 100644
--- a/docs/devel/index-internals.rst
+++ b/docs/devel/index-internals.rst
@@ -18,5 +18,6 @@ Details about QEMU's various subsystems including how to add features to them.
s390-dasd-ipl
tracing
vfio-migration
+ vfio-iommufd
writing-monitor-commands
virtio-backends
diff --git a/docs/devel/vfio-iommufd.rst b/docs/devel/vfio-iommufd.rst
new file mode 100644
index 0000000000..3d1c11f175
--- /dev/null
+++ b/docs/devel/vfio-iommufd.rst
@@ -0,0 +1,166 @@
+===============================
+IOMMUFD BACKEND usage with VFIO
+===============================
+
+(Same meaning for backend/container/BE)
+
+With the introduction of iommufd, the Linux kernel provides a generic
+interface for user space drivers to propagate their DMA mappings to kernel
+for assigned devices. While the legacy kernel interface is group-centric,
+the new iommufd interface is device-centric, relying on device fd and iommufd.
+
+To support both interfaces in the QEMU VFIO device, introduce a base container
+to abstract the common part of VFIO legacy and iommufd container. So that the
+generic VFIO code can use either container.
+
+The base container implements generic functions such as memory_listener and
+address space management whereas the derived container implements callbacks
+specific to either legacy or iommufd. Each container has its own way to setup
+secure context and dma management interface. The below diagram shows how it
+looks like with both containers.
+
+::
+
+ VFIO AddressSpace/Memory
+ +-------+ +----------+ +-----+ +-----+
+ | pci | | platform | | ap | | ccw |
+ +---+---+ +----+-----+ +--+--+ +--+--+ +----------------------+
+ | | | | | AddressSpace |
+ | | | | +------------+---------+
+ +---V-----------V-----------V--------V----+ /
+ | VFIOAddressSpace | <------------+
+ | | | MemoryListener
+ | VFIOContainerBase list |
+ +-------+----------------------------+----+
+ | |
+ | |
+ +-------V------+ +--------V----------+
+ | iommufd | | vfio legacy |
+ | container | | container |
+ +-------+------+ +--------+----------+
+ | |
+ | /dev/iommu | /dev/vfio/vfio
+ | /dev/vfio/devices/vfioX | /dev/vfio/$group_id
+ Userspace | |
+ ============+============================+===========================
+ Kernel | device fd |
+ +---------------+ | group/container fd
+ | (BIND_IOMMUFD | | (SET_CONTAINER/SET_IOMMU)
+ | ATTACH_IOAS) | | device fd
+ | | |
+ | +-------V------------V-----------------+
+ iommufd | | vfio |
+ (map/unmap | +---------+--------------------+-------+
+ ioas_copy) | | | map/unmap
+ | | |
+ +------V------+ +-----V------+ +------V--------+
+ | iommfd core | | device | | vfio iommu |
+ +-------------+ +------------+ +---------------+
+
+* Secure Context setup
+
+ - iommufd BE: uses device fd and iommufd to setup secure context
+ (bind_iommufd, attach_ioas)
+ - vfio legacy BE: uses group fd and container fd to setup secure context
+ (set_container, set_iommu)
+
+* Device access
+
+ - iommufd BE: device fd is opened through ``/dev/vfio/devices/vfioX``
+ - vfio legacy BE: device fd is retrieved from group fd ioctl
+
+* DMA Mapping flow
+
+ 1. VFIOAddressSpace receives MemoryRegion add/del via MemoryListener
+ 2. VFIO populates DMA map/unmap via the container BEs
+ * iommufd BE: uses iommufd
+ * vfio legacy BE: uses container fd
+
+Example configuration
+=====================
+
+Step 1: configure the host device
+---------------------------------
+
+It's exactly same as the VFIO device with legacy VFIO container.
+
+Step 2: configure QEMU
+----------------------
+
+Interactions with the ``/dev/iommu`` are abstracted by a new iommufd
+object (compiled in with the ``CONFIG_IOMMUFD`` option).
+
+Any QEMU device (e.g. VFIO device) wishing to use ``/dev/iommu`` must
+be linked with an iommufd object. It gets a new optional property
+named iommufd which allows to pass an iommufd object. Take ``vfio-pci``
+device for example:
+
+.. code-block:: bash
+
+ -object iommufd,id=iommufd0
+ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0
+
+Note the ``/dev/iommu`` and VFIO cdev can be externally opened by a
+management layer. In such a case the fd is passed, the fd supports a
+string naming the fd or a number, for example:
+
+.. code-block:: bash
+
+ -object iommufd,id=iommufd0,fd=22
+ -device vfio-pci,iommufd=iommufd0,fd=23
+
+If the ``fd`` property is not passed, the fd is opened by QEMU.
+
+If no ``iommufd`` object is passed to the ``vfio-pci`` device, iommufd
+is not used and the user gets the behavior based on the legacy VFIO
+container:
+
+.. code-block:: bash
+
+ -device vfio-pci,host=0000:02:00.0
+
+Supported platform
+==================
+
+Supports x86, ARM and s390x currently.
+
+Caveats
+=======
+
+Dirty page sync
+---------------
+
+Dirty page sync with iommufd backend is unsupported yet, live migration is
+disabled by default. But it can be force enabled like below, low efficient
+though.
+
+.. code-block:: bash
+
+ -object iommufd,id=iommufd0
+ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0,enable-migration=on
+
+P2P DMA
+-------
+
+PCI p2p DMA is unsupported as IOMMUFD doesn't support mapping hardware PCI
+BAR region yet. Below warning shows for assigned PCI device, it's not a bug.
+
+.. code-block:: none
+
+ qemu-system-x86_64: warning: IOMMU_IOAS_MAP failed: Bad address, PCI BAR?
+ qemu-system-x86_64: vfio_container_dma_map(0x560cb6cb1620, 0xe000000021000, 0x3000, 0x7f32ed55c000) = -14 (Bad address)
+
+FD passing with mdev
+--------------------
+
+``vfio-pci`` device checks sysfsdev property to decide if backend is a mdev.
+If FD passing is used, there is no way to know that and the mdev is treated
+like a real PCI device. There is an error as below if user wants to enable
+RAM discarding for mdev.
+
+.. code-block:: none
+
+ qemu-system-x86_64: -device vfio-pci,iommufd=iommufd0,x-balloon-allowed=on,fd=9: vfio VFIO_FD9: x-balloon-allowed only potentially compatible with mdev devices
+
+``vfio-ap`` and ``vfio-ccw`` devices don't have same issue as their backend
+devices are always mdev and RAM discarding is force enabled.
--
2.39.3

@ -0,0 +1,98 @@
From fc69df3a70bed5722643cc16828ca20beae3a20d Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 5 Dec 2023 13:20:08 -0500
Subject: [PATCH 091/101] docs: remove AioContext lock from IOThread docs
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 214: Remove AioContext lock
RH-Jira: RHEL-15965
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [22/26] ab89cda483e74ded983d26e1c6e50217405e0a55 (kmwolf/centos-qemu-kvm)
Encourage the use of locking primitives and stop mentioning the
AioContext lock since it is being removed.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-ID: <20231205182011.1976568-12-stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
docs/devel/multiple-iothreads.txt | 47 +++++++++++--------------------
1 file changed, 16 insertions(+), 31 deletions(-)
diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt
index a3e949f6b3..4865196bde 100644
--- a/docs/devel/multiple-iothreads.txt
+++ b/docs/devel/multiple-iothreads.txt
@@ -88,27 +88,18 @@ loop, depending on which AioContext instance the caller passes in.
How to synchronize with an IOThread
-----------------------------------
-AioContext is not thread-safe so some rules must be followed when using file
-descriptors, event notifiers, timers, or BHs across threads:
+Variables that can be accessed by multiple threads require some form of
+synchronization such as qemu_mutex_lock(), rcu_read_lock(), etc.
-1. AioContext functions can always be called safely. They handle their
-own locking internally.
-
-2. Other threads wishing to access the AioContext must use
-aio_context_acquire()/aio_context_release() for mutual exclusion. Once the
-context is acquired no other thread can access it or run event loop iterations
-in this AioContext.
-
-Legacy code sometimes nests aio_context_acquire()/aio_context_release() calls.
-Do not use nesting anymore, it is incompatible with the BDRV_POLL_WHILE() macro
-used in the block layer and can lead to hangs.
-
-There is currently no lock ordering rule if a thread needs to acquire multiple
-AioContexts simultaneously. Therefore, it is only safe for code holding the
-QEMU global mutex to acquire other AioContexts.
+AioContext functions like aio_set_fd_handler(), aio_set_event_notifier(),
+aio_bh_new(), and aio_timer_new() are thread-safe. They can be used to trigger
+activity in an IOThread.
Side note: the best way to schedule a function call across threads is to call
-aio_bh_schedule_oneshot(). No acquire/release or locking is needed.
+aio_bh_schedule_oneshot().
+
+The main loop thread can wait synchronously for a condition using
+AIO_WAIT_WHILE().
AioContext and the block layer
------------------------------
@@ -124,22 +115,16 @@ Block layer code must therefore expect to run in an IOThread and avoid using
old APIs that implicitly use the main loop. See the "How to program for
IOThreads" above for information on how to do that.
-If main loop code such as a QMP function wishes to access a BlockDriverState
-it must first call aio_context_acquire(bdrv_get_aio_context(bs)) to ensure
-that callbacks in the IOThread do not run in parallel.
-
Code running in the monitor typically needs to ensure that past
requests from the guest are completed. When a block device is running
in an IOThread, the IOThread can also process requests from the guest
(via ioeventfd). To achieve both objects, wrap the code between
bdrv_drained_begin() and bdrv_drained_end(), thus creating a "drained
-section". The functions must be called between aio_context_acquire()
-and aio_context_release(). You can freely release and re-acquire the
-AioContext within a drained section.
-
-Long-running jobs (usually in the form of coroutines) are best scheduled in
-the BlockDriverState's AioContext to avoid the need to acquire/release around
-each bdrv_*() call. The functions bdrv_add/remove_aio_context_notifier,
-or alternatively blk_add/remove_aio_context_notifier if you use BlockBackends,
-can be used to get a notification whenever bdrv_try_change_aio_context() moves a
+section".
+
+Long-running jobs (usually in the form of coroutines) are often scheduled in
+the BlockDriverState's AioContext. The functions
+bdrv_add/remove_aio_context_notifier, or alternatively
+blk_add/remove_aio_context_notifier if you use BlockBackends, can be used to
+get a notification whenever bdrv_try_change_aio_context() moves a
BlockDriverState to a different AioContext.
--
2.39.3

@ -1,153 +0,0 @@
From 516bf44de08a13d97c08e210137078e642ce8e88 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 17 May 2023 17:28:32 +0200
Subject: [PATCH 02/21] graph-lock: Disable locking for now
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 166: block/graph-lock: Disable locking for now
RH-Bugzilla: 2186725
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [2/4] 39d42fb527aad0491a018743289de7b762108317 (kmwolf/centos-qemu-kvm)
In QEMU 8.0, we've been seeing deadlocks in bdrv_graph_wrlock(). They
come from callers that hold an AioContext lock, which is not allowed
during polling. In theory, we could temporarily release the lock, but
callers are inconsistent about whether they hold a lock, and if they do,
some are also confused about which one they hold. While all of this is
fixable, it's not trivial, and the best course of action for 8.0.1 is
probably just disabling the graph locking code temporarily.
We don't currently rely on graph locking yet. It is supposed to replace
the AioContext lock eventually to enable multiqueue support, but as long
as we still have the AioContext lock, it is sufficient without the graph
lock. Once the AioContext lock goes away, the deadlock doesn't exist any
more either and this commit can be reverted. (Of course, it can also be
reverted while the AioContext lock still exists if the callers have been
fixed.)
Cc: qemu-stable@nongnu.org
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20230517152834.277483-2-kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 80fc5d260002432628710f8b0c7cfc7d9b97bb9d)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/graph-lock.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/block/graph-lock.c b/block/graph-lock.c
index 259a7a0bde..2490926c90 100644
--- a/block/graph-lock.c
+++ b/block/graph-lock.c
@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock;
/* Protects the list of aiocontext and orphaned_reader_count */
static QemuMutex aio_context_list_lock;
+#if 0
/* Written and read with atomic operations. */
static int has_writer;
+#endif
/*
* A reader coroutine could move from an AioContext to another.
@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx)
g_free(ctx->bdrv_graph);
}
+#if 0
static uint32_t reader_count(void)
{
BdrvGraphRWlock *brdv_graph;
@@ -105,10 +108,17 @@ static uint32_t reader_count(void)
assert((int32_t)rd >= 0);
return rd;
}
+#endif
void bdrv_graph_wrlock(void)
{
GLOBAL_STATE_CODE();
+ /*
+ * TODO Some callers hold an AioContext lock when this is called, which
+ * causes deadlocks. Reenable once the AioContext locking is cleaned up (or
+ * AioContext locks are gone).
+ */
+#if 0
assert(!qatomic_read(&has_writer));
/* Make sure that constantly arriving new I/O doesn't cause starvation */
@@ -139,11 +149,13 @@ void bdrv_graph_wrlock(void)
} while (reader_count() >= 1);
bdrv_drain_all_end();
+#endif
}
void bdrv_graph_wrunlock(void)
{
GLOBAL_STATE_CODE();
+#if 0
QEMU_LOCK_GUARD(&aio_context_list_lock);
assert(qatomic_read(&has_writer));
@@ -155,10 +167,13 @@ void bdrv_graph_wrunlock(void)
/* Wake up all coroutine that are waiting to read the graph */
qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
+#endif
}
void coroutine_fn bdrv_graph_co_rdlock(void)
{
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
BdrvGraphRWlock *bdrv_graph;
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
@@ -223,10 +238,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void)
qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
}
}
+#endif
}
void coroutine_fn bdrv_graph_co_rdunlock(void)
{
+#if 0
BdrvGraphRWlock *bdrv_graph;
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
@@ -249,6 +266,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void)
if (qatomic_read(&has_writer)) {
aio_wait_kick();
}
+#endif
}
void bdrv_graph_rdlock_main_loop(void)
@@ -266,13 +284,19 @@ void bdrv_graph_rdunlock_main_loop(void)
void assert_bdrv_graph_readable(void)
{
/* reader_count() is slow due to aio_context_list_lock lock contention */
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
#ifdef CONFIG_DEBUG_GRAPH_LOCK
assert(qemu_in_main_thread() || reader_count());
#endif
+#endif
}
void assert_bdrv_graph_writable(void)
{
assert(qemu_in_main_thread());
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
assert(qatomic_read(&has_writer));
+#endif
}
--
2.39.3

File diff suppressed because it is too large Load Diff

@ -0,0 +1,94 @@
From a5b4eec5f456b1ca3fe753e1d76f96cf3f8914ef Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 17 Jan 2024 14:55:53 +0100
Subject: [PATCH 01/22] hv-balloon: use get_min_alignment() to express 32 GiB
alignment
RH-Author: David Hildenbrand <david@redhat.com>
RH-MergeRequest: 221: memory-device: reintroduce memory region size check
RH-Jira: RHEL-20341
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
RH-Commit: [1/2] cbe092fe549552928270892253b31cd8fe199825
https://issues.redhat.com/browse/RHEL-20341
Let's implement the get_min_alignment() callback for memory devices, and
copy for the device memory region the alignment of the host memory
region. This mimics what virtio-mem does, and allows for re-introducing
proper alignment checks for the memory region size (where we don't care
about additional device requirements) in memory device core.
Message-ID: <20240117135554.787344-2-david@redhat.com>
Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
(cherry picked from commit f77c5f38f49c71bc14cf1019ac92b0b95f572414)
Signed-off-by: David Hildenbrand <david@redhat.com>
---
hw/hyperv/hv-balloon.c | 37 +++++++++++++++++++++----------------
1 file changed, 21 insertions(+), 16 deletions(-)
diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c
index 66f297c1d7..0829c495b0 100644
--- a/hw/hyperv/hv-balloon.c
+++ b/hw/hyperv/hv-balloon.c
@@ -1476,22 +1476,7 @@ static void hv_balloon_ensure_mr(HvBalloon *balloon)
balloon->mr = g_new0(MemoryRegion, 1);
memory_region_init(balloon->mr, OBJECT(balloon), TYPE_HV_BALLOON,
memory_region_size(hostmem_mr));
-
- /*
- * The VM can indicate an alignment up to 32 GiB. Memory device core can
- * usually only handle/guarantee 1 GiB alignment. The user will have to
- * specify a larger maxmem eventually.
- *
- * The memory device core will warn the user in case maxmem might have to be
- * increased and will fail plugging the device if there is not sufficient
- * space after alignment.
- *
- * TODO: we could do the alignment ourselves in a slightly bigger region.
- * But this feels better, although the warning might be annoying. Maybe
- * we can optimize that in the future (e.g., with such a device on the
- * cmdline place/size the device memory region differently.
- */
- balloon->mr->align = MAX(32 * GiB, memory_region_get_alignment(hostmem_mr));
+ balloon->mr->align = memory_region_get_alignment(hostmem_mr);
}
static void hv_balloon_free_mr(HvBalloon *balloon)
@@ -1653,6 +1638,25 @@ static MemoryRegion *hv_balloon_md_get_memory_region(MemoryDeviceState *md,
return balloon->mr;
}
+static uint64_t hv_balloon_md_get_min_alignment(const MemoryDeviceState *md)
+{
+ /*
+ * The VM can indicate an alignment up to 32 GiB. Memory device core can
+ * usually only handle/guarantee 1 GiB alignment. The user will have to
+ * specify a larger maxmem eventually.
+ *
+ * The memory device core will warn the user in case maxmem might have to be
+ * increased and will fail plugging the device if there is not sufficient
+ * space after alignment.
+ *
+ * TODO: we could do the alignment ourselves in a slightly bigger region.
+ * But this feels better, although the warning might be annoying. Maybe
+ * we can optimize that in the future (e.g., with such a device on the
+ * cmdline place/size the device memory region differently.
+ */
+ return 32 * GiB;
+}
+
static void hv_balloon_md_fill_device_info(const MemoryDeviceState *md,
MemoryDeviceInfo *info)
{
@@ -1765,5 +1769,6 @@ static void hv_balloon_class_init(ObjectClass *klass, void *data)
mdc->get_memory_region = hv_balloon_md_get_memory_region;
mdc->decide_memslots = hv_balloon_decide_memslots;
mdc->get_memslots = hv_balloon_get_memslots;
+ mdc->get_min_alignment = hv_balloon_md_get_min_alignment;
mdc->fill_device_info = hv_balloon_md_fill_device_info;
}
--
2.39.3

@ -1,40 +0,0 @@
From b4645e7682aa1bde6f89df0eff2a9de83720eecc Mon Sep 17 00:00:00 2001
From: Ani Sinha <anisinha@redhat.com>
Date: Tue, 2 May 2023 15:51:53 +0530
Subject: [PATCH 3/3] hw/acpi: Mark acpi blobs as resizable on RHEL pc machines
version 7.6 and above
RH-Author: Ani Sinha <None>
RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3
RH-Bugzilla: 1934134
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: MST <mst@redhat.com>
RH-Commit: [2/2] 95d443af6e75c569d89d04d028012c3c56c0c3a4 (anisinha/centos-qemu-kvm)
Please look at QEMU upstream commit
1af507756bae7 ("hw/acpi: limit warning on acpi table size to pc machines older than version 2.3")
This patch adapts the above change so that it applies to RHEL pc machines of
version 7.6 and newer. These are the machine types that are currently supported
in RHEL. Q35 machines are not affected.
Signed-off-by: Ani Sinha <anisinha@redhat.com>
---
hw/i386/pc_piix.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 4d5880e249..6c7be628e1 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -914,6 +914,7 @@ static void pc_machine_rhel7_options(MachineClass *m)
m->default_machine_opts = "firmware=bios-256k.bin,hpet=off";
pcmc->default_nic_model = "e1000";
pcmc->pci_root_uid = 0;
+ pcmc->resizable_acpi_blob = true;
m->default_display = "std";
m->no_parallel = 1;
m->numa_mem_supported = true;
--
2.39.1

@ -1,101 +0,0 @@
From 3f70da88788c398877b8ded0b27689530385302b Mon Sep 17 00:00:00 2001
From: Ani Sinha <anisinha@redhat.com>
Date: Wed, 29 Mar 2023 10:27:26 +0530
Subject: [PATCH 2/3] hw/acpi: limit warning on acpi table size to pc machines
older than version 2.3
RH-Author: Ani Sinha <None>
RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3
RH-Bugzilla: 1934134
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: MST <mst@redhat.com>
RH-Commit: [1/2] 96c3b6d51e16734eb4e8de52635e0ca036964090 (anisinha/centos-qemu-kvm)
i440fx machine versions 2.3 and newer supports dynamic ram
resizing. See commit a1666142db6233 ("acpi-build: make ROMs RAM blocks resizeable") .
Currently supported all q35 machine types (versions 2.4 and newer) supports
resizable RAM/ROM blocks.Therefore the warning generated when the ACPI table
size exceeds a pre-defined value does not apply to those machine versions.
Add a check limiting the warning message to only those machines that does not
support expandable ram blocks (that is, i440fx machines with version 2.2
and older).
Signed-off-by: Ani Sinha <anisinha@redhat.com>
Message-Id: <20230329045726.14028-1-anisinha@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 1af507756bae775028c27d30e602e2b9c72cd074)
---
hw/i386/acpi-build.c | 6 ++++--
hw/i386/pc.c | 1 +
hw/i386/pc_piix.c | 1 +
include/hw/i386/pc.h | 3 +++
4 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index ec857a117e..9bc4d8a981 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2695,7 +2695,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
int legacy_table_size =
ROUND_UP(tables_blob->len - aml_len + legacy_aml_len,
ACPI_BUILD_ALIGN_SIZE);
- if (tables_blob->len > legacy_table_size) {
+ if ((tables_blob->len > legacy_table_size) &&
+ !pcmc->resizable_acpi_blob) {
/* Should happen only with PCI bridges and -M pc-i440fx-2.0. */
warn_report("ACPI table size %u exceeds %d bytes,"
" migration may not work",
@@ -2706,7 +2707,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
g_array_set_size(tables_blob, legacy_table_size);
} else {
/* Make sure we have a buffer in case we need to resize the tables. */
- if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) {
+ if ((tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) &&
+ !pcmc->resizable_acpi_blob) {
/* As of QEMU 2.1, this fires with 160 VCPUs and 255 memory slots. */
warn_report("ACPI table size %u exceeds %d bytes,"
" migration may not work",
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index f216922cee..7db5a2348f 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -2092,6 +2092,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
pcmc->acpi_data_size = 0x20000 + 0x8000;
pcmc->pvh_enabled = true;
pcmc->kvmclock_create_always = true;
+ pcmc->resizable_acpi_blob = true;
assert(!mc->get_hotplug_handler);
mc->async_pf_vmexit_disable = false;
mc->get_hotplug_handler = pc_get_hotplug_handler;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index fc704d783f..4d5880e249 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -750,6 +750,7 @@ static void pc_i440fx_2_2_machine_options(MachineClass *m)
compat_props_add(m->compat_props, hw_compat_2_2, hw_compat_2_2_len);
compat_props_add(m->compat_props, pc_compat_2_2, pc_compat_2_2_len);
pcmc->rsdp_in_ram = false;
+ pcmc->resizable_acpi_blob = false;
}
DEFINE_I440FX_MACHINE(v2_2, "pc-i440fx-2.2", pc_compat_2_2_fn,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index d218ad1628..2f514d13d8 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -130,6 +130,9 @@ struct PCMachineClass {
/* create kvmclock device even when KVM PV features are not exposed */
bool kvmclock_create_always;
+
+ /* resizable acpi blob compat */
+ bool resizable_acpi_blob;
};
#define TYPE_PC_MACHINE "generic-pc-machine"
--
2.39.1

@ -0,0 +1,42 @@
From ceaee9c4372bbdc4196cb6808515047388f7aa26 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
Date: Tue, 21 Nov 2023 16:44:18 +0800
Subject: [PATCH 039/101] hw/arm: Activate IOMMUFD for virt machines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 211: IOMMUFD backend backport
RH-Jira: RHEL-19302 RHEL-21057
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
RH-Commit: [38/67] 0a059ae661616e95eb8455e17f35774495cae8e7 (eauger1/centos-qemu-kvm)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
(cherry picked from commit 0970238343af45a8b547695bfc22f18d4eb7da7e)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/arm/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 3ada335a24..660f49db49 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -8,6 +8,7 @@ config ARM_VIRT
imply TPM_TIS_SYSBUS
imply TPM_TIS_I2C
imply NVDIMM
+ imply IOMMUFD
select ARM_GIC
select ACPI
select ARM_SMMUV3
--
2.39.3

@ -1,60 +0,0 @@
From 7b57aec372fc238cbaafe86557f9fb4b560895b1 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Tue, 27 Jun 2023 20:20:09 +1000
Subject: [PATCH 2/6] hw/arm: Validate cluster and NUMA node boundary
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines
RH-Bugzilla: 2171363
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [2/3] fcac7ea85d9f73613989903c642fc1bf6c51946b
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363
There are two ARM machines where NUMA is aware: 'virt' and 'sbsa-ref'.
Both of them are required to follow cluster-NUMA-node boundary. To
enable the validation to warn about the irregular configuration where
multiple CPUs in one cluster have been associated with different NUMA
nodes.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Acked-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <20230509002739.18388-3-gshan@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit fecff672351ace5e39adf7dbcf7a8ee748b201cb)
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/sbsa-ref.c | 2 ++
hw/arm/virt.c | 2 ++
2 files changed, 4 insertions(+)
diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
index 0b93558dde..efb380e7c8 100644
--- a/hw/arm/sbsa-ref.c
+++ b/hw/arm/sbsa-ref.c
@@ -864,6 +864,8 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data)
mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids;
mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props;
mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id;
+ /* platform instead of architectural choice */
+ mc->cpu_cluster_has_numa_boundary = true;
}
static const TypeInfo sbsa_ref_info = {
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 9be53e9355..df6a0231bc 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3083,6 +3083,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
mc->smp_props.clusters_supported = true;
mc->auto_enable_numa_with_memhp = true;
mc->auto_enable_numa_with_memdev = true;
+ /* platform instead of architectural choice */
+ mc->cpu_cluster_has_numa_boundary = true;
mc->default_ram_id = "mach-virt.ram";
object_class_property_add(oc, "acpi", "OnOffAuto",
--
2.39.3

@ -1,166 +0,0 @@
From a3412036477e8c91e0b71fcd91de4e24a9904077 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Tue, 25 Jul 2023 10:56:51 +0100
Subject: [PATCH 09/14] hw/arm/smmu: Handle big-endian hosts correctly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes
RH-Bugzilla: 2229133
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Commit: [3/3] df9c8d228b25273e0c4927a10b21e66fb4bef5f0 (eauger1/centos-qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133
The implementation of the SMMUv3 has multiple places where it reads a
data structure from the guest and directly operates on it without
doing a guest-to-host endianness conversion. Since all SMMU data
structures are little-endian, this means that the SMMU doesn't work
on a big-endian host. In particular, this causes the Avocado test
machine_aarch64_virt.py:Aarch64VirtMachine.test_alpine_virt_tcg_gic_max
to fail on an s390x host.
Add appropriate byte-swapping on reads and writes of guest in-memory
data structures so that the device works correctly on big-endian
hosts.
As part of this we constrain queue_read() to operate only on Cmd
structs and queue_write() on Evt structs, because in practice these
are the only data structures the two functions are used with, and we
need to know what the data structure is to be able to byte-swap its
parts correctly.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Tested-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Message-id: 20230717132641.764660-1-peter.maydell@linaro.org
Cc: qemu-stable@nongnu.org
(cherry picked from commit c6445544d4cea2628fbad3bad09f3d3a03c749d3)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/arm/smmu-common.c | 3 +--
hw/arm/smmuv3.c | 39 +++++++++++++++++++++++++++++++--------
2 files changed, 32 insertions(+), 10 deletions(-)
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index e7f1c1f219..daa02ce798 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -192,8 +192,7 @@ static int get_pte(dma_addr_t baseaddr, uint32_t index, uint64_t *pte,
dma_addr_t addr = baseaddr + index * sizeof(*pte);
/* TODO: guarantee 64-bit single-copy atomicity */
- ret = dma_memory_read(&address_space_memory, addr, pte, sizeof(*pte),
- MEMTXATTRS_UNSPECIFIED);
+ ret = ldq_le_dma(&address_space_memory, addr, pte, MEMTXATTRS_UNSPECIFIED);
if (ret != MEMTX_OK) {
info->type = SMMU_PTW_ERR_WALK_EABT;
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 270c80b665..cfb56725a6 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -98,20 +98,34 @@ static void smmuv3_write_gerrorn(SMMUv3State *s, uint32_t new_gerrorn)
trace_smmuv3_write_gerrorn(toggled & pending, s->gerrorn);
}
-static inline MemTxResult queue_read(SMMUQueue *q, void *data)
+static inline MemTxResult queue_read(SMMUQueue *q, Cmd *cmd)
{
dma_addr_t addr = Q_CONS_ENTRY(q);
+ MemTxResult ret;
+ int i;
- return dma_memory_read(&address_space_memory, addr, data, q->entry_size,
- MEMTXATTRS_UNSPECIFIED);
+ ret = dma_memory_read(&address_space_memory, addr, cmd, sizeof(Cmd),
+ MEMTXATTRS_UNSPECIFIED);
+ if (ret != MEMTX_OK) {
+ return ret;
+ }
+ for (i = 0; i < ARRAY_SIZE(cmd->word); i++) {
+ le32_to_cpus(&cmd->word[i]);
+ }
+ return ret;
}
-static MemTxResult queue_write(SMMUQueue *q, void *data)
+static MemTxResult queue_write(SMMUQueue *q, Evt *evt_in)
{
dma_addr_t addr = Q_PROD_ENTRY(q);
MemTxResult ret;
+ Evt evt = *evt_in;
+ int i;
- ret = dma_memory_write(&address_space_memory, addr, data, q->entry_size,
+ for (i = 0; i < ARRAY_SIZE(evt.word); i++) {
+ cpu_to_le32s(&evt.word[i]);
+ }
+ ret = dma_memory_write(&address_space_memory, addr, &evt, sizeof(Evt),
MEMTXATTRS_UNSPECIFIED);
if (ret != MEMTX_OK) {
return ret;
@@ -291,7 +305,7 @@ static void smmuv3_init_regs(SMMUv3State *s)
static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf,
SMMUEventInfo *event)
{
- int ret;
+ int ret, i;
trace_smmuv3_get_ste(addr);
/* TODO: guarantee 64-bit single-copy atomicity */
@@ -304,6 +318,9 @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf,
event->u.f_ste_fetch.addr = addr;
return -EINVAL;
}
+ for (i = 0; i < ARRAY_SIZE(buf->word); i++) {
+ le32_to_cpus(&buf->word[i]);
+ }
return 0;
}
@@ -313,7 +330,7 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
CD *buf, SMMUEventInfo *event)
{
dma_addr_t addr = STE_CTXPTR(ste);
- int ret;
+ int ret, i;
trace_smmuv3_get_cd(addr);
/* TODO: guarantee 64-bit single-copy atomicity */
@@ -326,6 +343,9 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
event->u.f_ste_fetch.addr = addr;
return -EINVAL;
}
+ for (i = 0; i < ARRAY_SIZE(buf->word); i++) {
+ le32_to_cpus(&buf->word[i]);
+ }
return 0;
}
@@ -407,7 +427,7 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
return -EINVAL;
}
if (s->features & SMMU_FEATURE_2LVL_STE) {
- int l1_ste_offset, l2_ste_offset, max_l2_ste, span;
+ int l1_ste_offset, l2_ste_offset, max_l2_ste, span, i;
dma_addr_t l1ptr, l2ptr;
STEDesc l1std;
@@ -431,6 +451,9 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
event->u.f_ste_fetch.addr = l1ptr;
return -EINVAL;
}
+ for (i = 0; i < ARRAY_SIZE(l1std.word); i++) {
+ le32_to_cpus(&l1std.word[i]);
+ }
span = L1STD_SPAN(&l1std);
--
2.39.3

@ -0,0 +1,88 @@
From e670722b9a6460d41497688d820d5a9a9b51d8e9 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Tue, 9 Jan 2024 11:36:42 +1000
Subject: [PATCH 001/101] hw/arm/virt: Add properties to disable high memory
regions
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 210: hw/arm/virt: Add properties to disable high memory regions
RH-Jira: RHEL-19738
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [1/1] 4097ba5133a67126e30b84202cb40df4e019c5f4
Upstream: RHEL-only
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=57927352
There are 3 high memory regions for GICv3 or GICv4 redistributor, PCI
ECAM and PCI MMIO. Each of them has a property introduced by upstream
commit 6a48c64eec ("hw/arm/virt: Add properties to disable high memory
regions") so that the corresponding high memory region can be disabled.
It's notable that another property ("compact-highmem") introduced by
upstream commit f40408a9fe ("hw/arm/virt: Add 'compact-highmem' property")
so that the compact high memory region layout during assignment can be
disabled, compatible to the old machine types. However, we don't have
the compatible issue since the compact high memory region layout is
always kept as disabled until RHEL9.2.0 machine type and onwards.
Expose those 3 properties: "highmem-redists", "highmem-ecam" and
"highmem-mmio". The property "compact-highmem" is kept as hidden.
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/virt.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 5cab00b4cd..60f117f0d2 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2456,6 +2456,7 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp)
vms->highmem_compact = value;
}
+#endif /* disabled for RHEL */
static bool virt_get_highmem_redists(Object *obj, Error **errp)
{
@@ -2498,7 +2499,6 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp)
vms->highmem_mmio = value;
}
-#endif /* disabled for RHEL */
static bool virt_get_its(Object *obj, Error **errp)
{
@@ -3521,6 +3521,28 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data)
"Set on/off to enable/disable using "
"physical address space above 32 bits");
+ object_class_property_add_bool(oc, "highmem-redists",
+ virt_get_highmem_redists,
+ virt_set_highmem_redists);
+ object_class_property_set_description(oc, "highmem-redists",
+ "Set on/off to enable/disable high "
+ "memory region for GICv3 or GICv4 "
+ "redistributor");
+
+ object_class_property_add_bool(oc, "highmem-ecam",
+ virt_get_highmem_ecam,
+ virt_set_highmem_ecam);
+ object_class_property_set_description(oc, "highmem-ecam",
+ "Set on/off to enable/disable high "
+ "memory region for PCI ECAM");
+
+ object_class_property_add_bool(oc, "highmem-mmio",
+ virt_get_highmem_mmio,
+ virt_set_highmem_mmio);
+ object_class_property_set_description(oc, "highmem-mmio",
+ "Set on/off to enable/disable high "
+ "memory region for PCI MMIO");
+
object_class_property_add_str(oc, "gic-version", virt_get_gic_version,
virt_set_gic_version);
object_class_property_set_description(oc, "gic-version",
--
2.39.3

@ -0,0 +1,132 @@
From 3f58194f8642a71c47d91d3c00a34faf44ea2c11 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Wed, 3 Jan 2024 05:57:38 -0500
Subject: [PATCH] hw/arm/virt: Fix compats
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 209: hw/arm/virt: Fix compats
RH-Jira: RHEL-17168
RH-Acked-by: Gavin Shan <gshan@redhat.com>
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
RH-Commit: [1/1] bcdf6493bbd6d7b52b0b88ff44441d22aeddfde2 (eauger1/centos-qemu-kvm)
arm_rhel_compat is not added for virt-rhel9.4.0 machine causing
the efi-virtio.rom to be looked for when instantiating a virtio-net-pci
device and it won't be found since not shipped on ARM. This is a
regression compared to 9.2.
Actually we do not need any rom file for any virtio-net-pci variant
because edk2 already brings the functionality. So for 9.4 onwards, we
want to set romfiles to "" for all of them.
However at the moment we apply arm_rhel_compat from the latest
rhel*_virt_options(). This is not aligned with the generic compat
usage which sets compats for a given machine type to accomodate for
changes that occured after its advent. Here we are somehow abusing
the compat infra to set general driver options that should apply for
all machines. On top of that this is really error prone and we have
forgotten to add arm_rhel_compat several times in the past.
So let's introduce set_arm_rhel_compat() being called before any
*virt_options in the non abstract machine class. That way the setting
will apply to any machine type without any need to add it in any
future machine types.
For < 9.4 machines we don't really care keeping non void romfiles
for transitional and non transitional devices because anyway this was
not working. So let's keep things simple and apply the new defaults for
all RHEL9 machine types.
Finally, to follow the generic pattern we should set hw_compat_rhel_9_0
in 9.0 machine as it is done on x86 or ccw. This has no consequence on
aarch64 because it only contains x86 stuff but that helps understanding
the consistency.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/arm/virt.c | 43 +++++++++++++++++++++++++++++--------------
1 file changed, 29 insertions(+), 14 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 0b17c94ad7..5cab00b4cd 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -111,11 +111,39 @@
DEFINE_VIRT_MACHINE_LATEST(major, minor, false)
#endif /* disabled for RHEL */
+/*
+ * This variable is for changes to properties that are RHEL specific,
+ * different to the current upstream and to be applied to the latest
+ * machine type. They may be overriden by older machine compats.
+ *
+ * virtio-net-pci variant romfiles are not needed because edk2 does
+ * fully support the pxe boot. Besides virtio romfiles are not shipped
+ * on rhel/aarch64.
+ */
+GlobalProperty arm_rhel_compat[] = {
+ {"virtio-net-pci", "romfile", "" },
+ {"virtio-net-pci-transitional", "romfile", "" },
+ {"virtio-net-pci-non-transitional", "romfile", "" },
+};
+const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat);
+
+/*
+ * This cannot be called from the rhel_virt_class_init() because
+ * TYPE_RHEL_MACHINE is abstract and mc->compat_props g_ptr_array_new()
+ * only is called on virt-rhelm.n.s non abstract class init.
+ */
+static void arm_rhel_compat_set(MachineClass *mc)
+{
+ compat_props_add(mc->compat_props, arm_rhel_compat,
+ arm_rhel_compat_len);
+}
+
#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \
static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \
void *data) \
{ \
MachineClass *mc = MACHINE_CLASS(oc); \
+ arm_rhel_compat_set(mc); \
rhel##m##n##s##_virt_options(mc); \
mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \
if (latest) { \
@@ -139,19 +167,6 @@
#define DEFINE_RHEL_MACHINE(major, minor, subminor) \
DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false)
-/* This variable is for changes to properties that are RHEL specific,
- * different to the current upstream and to be applied to the latest
- * machine type.
- */
-GlobalProperty arm_rhel_compat[] = {
- {
- .driver = "virtio-net-pci",
- .property = "romfile",
- .value = "",
- },
-};
-const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat);
-
/* Number of external interrupt lines to configure the GIC with */
#define NUM_IRQS 256
@@ -3639,7 +3654,6 @@ static void rhel920_virt_options(MachineClass *mc)
{
rhel940_virt_options(mc);
- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len);
compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len);
compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
@@ -3653,6 +3667,7 @@ static void rhel900_virt_options(MachineClass *mc)
rhel920_virt_options(mc);
compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len);
/* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */
vmc->no_tcg_lpa2 = true;
--
2.39.3

@ -1,41 +0,0 @@
From 022529f6d0ee306da857825c72a98bf7ddf5de22 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Tue, 27 Jun 2023 20:20:09 +1000
Subject: [PATCH 3/6] hw/arm/virt: Validate cluster and NUMA node boundary for
RHEL machines
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines
RH-Bugzilla: 2171363
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [3/3] a396c499259b566861ca007b01f8539bf6113711
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363
Upstream Status: RHEL only
Set mc->cpu_cluster_has_numa_boundary to true so that the boundary of
CPU cluster and NUMA node will be validated for 'virt-rhel*' machines.
A warning message will be printed if the boundary is broken.
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/virt.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index df6a0231bc..faf68488d5 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3530,6 +3530,8 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data)
mc->smp_props.clusters_supported = true;
mc->auto_enable_numa_with_memhp = true;
mc->auto_enable_numa_with_memdev = true;
+ /* platform instead of architectural choice */
+ mc->cpu_cluster_has_numa_boundary = true;
mc->default_ram_id = "mach-virt.ram";
object_class_property_add(oc, "acpi", "OnOffAuto",
--
2.39.3

@ -0,0 +1,41 @@
From 4c1d07995a7afb6fae68a7e7a8b6b6c94fa0a7bb Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Mon, 12 Feb 2024 10:37:54 +0100
Subject: [PATCH 5/6] hw/arm/virt: deprecate virt-rhel9.{0,2}.0 machine types
RH-Author: Cornelia Huck <cohuck@redhat.com>
RH-MergeRequest: 225: hw/arm/virt: deprecate virt-rhel9.{0,2}.0 machine types
RH-Jira: RHEL-24988
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [1/1] f15579db44808fa8a2d7bc01b3915aa59c064411 (cohuck/qemu-kvm-c9s)
Jira: https://issues.redhat.com/browse/RHEL-24988
Upstream: RHEL only
We do not plan to support any machine types prior to 9.4.0; leave them
in, but mark as deprecated.
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
hw/arm/virt.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 60f117f0d2..943c563391 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3679,6 +3679,10 @@ static void rhel920_virt_options(MachineClass *mc)
compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len);
compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len);
compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
+
+ /* RHEL 9.4 is the first supported release */
+ mc->deprecation_reason =
+ "machine types for versions prior to 9.4 are deprecated";
}
DEFINE_RHEL_MACHINE(9, 2, 0)
--
2.39.3

@ -0,0 +1,41 @@
From 7a6be312c11911bdd2ce82566be22a3e014947c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
Date: Tue, 21 Nov 2023 16:44:20 +0800
Subject: [PATCH 041/101] hw/i386: Activate IOMMUFD for q35 machines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 211: IOMMUFD backend backport
RH-Jira: RHEL-19302 RHEL-21057
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
RH-Commit: [40/67] b15764ab24fd57389a8d219736613484acd7d29e (eauger1/centos-qemu-kvm)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
(cherry picked from commit 64ad06f6eba66c514477f490bcba409439a480d8)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/i386/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index 55850791df..a1846be6f7 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -95,6 +95,7 @@ config Q35
imply E1000E_PCI_EXPRESS
imply VMPORT
imply VMMOUSE
+ imply IOMMUFD
select PC_PCI
select PC_ACPI
select PCI_EXPRESS_Q35
--
2.39.3

@ -1,44 +0,0 @@
From 491cf9e251026d135f315b7fe0d8771841f06e9f Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Tue, 25 Jul 2023 15:34:45 -0300
Subject: [PATCH 8/9] hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type
<= pc-q35-rhel9.2.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 192: hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type <= pc-q35-rhel9.2.0
RH-Bugzilla: 2223691
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: quintela1 <quintela@redhat.com>
RH-Commit: [1/1] e57816f8ad15a9ce5f342b061c103ae011ec1223 (LeoBras/centos-qemu-kvm)
This is a downstream-only patch to that sets off the property
x-pcie-err-unc-mask for machine types <= pc-q35-rhel9.2.0, allowing
live migrations to RHEL9.2 happen successfully.
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2223691
Fixes: 293a34b4be ("hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine
type < 8.0")
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
hw/core/machine.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 5ea52317b9..6f5117669d 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -62,6 +62,8 @@ GlobalProperty hw_compat_rhel_9_2[] = {
{ "virtio-mem", "x-early-migration", "false" },
/* hw_compat_rhel_9_2 from hw_compat_7_2 */
{ "migration", "x-preempt-pre-7-2", "true" },
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
+ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" },
};
const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2);
--
2.39.3

@ -1,118 +0,0 @@
From 3ac01bb90da12538898f95b2fb4e7f6bc1557eb3 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Tue, 2 May 2023 21:27:02 -0300
Subject: [PATCH 18/21] hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine
type < 8.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 170: hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0
RH-Bugzilla: 2189423
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [1/1] ad62dd5a8567f386770577513c00a0bf36bd3df1 (LeoBras/centos-qemu-kvm)
Since it's implementation on v8.0.0-rc0, having the PCI_ERR_UNCOR_MASK
set for machine types < 8.0 will cause migration to fail if the target
QEMU version is < 8.0.0 :
qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10a read: 40 device: 0 cmask: ff wmask: 0 w1cmask:0
qemu-system-x86_64: Failed to load PCIDevice:config
qemu-system-x86_64: Failed to load e1000e:parent_obj
qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:02.0/e1000e'
qemu-system-x86_64: load of migration failed: Invalid argument
The above test migrated a 7.2 machine type from QEMU master to QEMU 7.2.0,
with this cmdline:
./qemu-system-x86_64 -M pc-q35-7.2 [-incoming XXX]
In order to fix this, property x-pcie-err-unc-mask was introduced to
control when PCI_ERR_UNCOR_MASK is enabled. This property is enabled by
default, but is disabled if machine type <= 7.2.
Fixes: 010746ae1d ("hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register")
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Message-Id: <20230503002701.854329-1-leobras@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1576
Tested-by: Fiona Ebner <f.ebner@proxmox.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 5ed3dabe57dd9f4c007404345e5f5bf0e347317f)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
hw/core/machine.c | 1 +
hw/pci/pci.c | 2 ++
hw/pci/pcie_aer.c | 11 +++++++----
include/hw/pci/pci.h | 2 ++
4 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 0e0120b7f2..c28702b690 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -43,6 +43,7 @@ GlobalProperty hw_compat_7_2[] = {
{ "e1000e", "migrate-timadj", "off" },
{ "virtio-mem", "x-early-migration", "false" },
{ "migration", "x-preempt-pre-7-2", "true" },
+ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" },
};
const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index def5000e7b..8ad4349e96 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -79,6 +79,8 @@ static Property pci_props[] = {
DEFINE_PROP_STRING("failover_pair_id", PCIDevice,
failover_pair_id),
DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0),
+ DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
+ QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
DEFINE_PROP_END_OF_LIST()
};
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index 103667c368..374d593ead 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -112,10 +112,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset,
pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
PCI_ERR_UNC_SUPPORTED);
- pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK,
- PCI_ERR_UNC_MASK_DEFAULT);
- pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
- PCI_ERR_UNC_SUPPORTED);
+
+ if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) {
+ pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK,
+ PCI_ERR_UNC_MASK_DEFAULT);
+ pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
+ PCI_ERR_UNC_SUPPORTED);
+ }
pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
PCI_ERR_UNC_SEVERITY_DEFAULT);
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index d5a40cd058..6dc6742fc4 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -207,6 +207,8 @@ enum {
QEMU_PCIE_EXTCAP_INIT = (1 << QEMU_PCIE_EXTCAP_INIT_BITNR),
#define QEMU_PCIE_CXL_BITNR 10
QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR),
+#define QEMU_PCIE_ERR_UNC_MASK_BITNR 11
+ QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
};
typedef struct PCIINTxRoute {
--
2.39.3

@ -0,0 +1,116 @@
From 84f378c41832602dcf9bad6167b1f532c7c53e37 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
Date: Tue, 21 Nov 2023 15:03:55 +0100
Subject: [PATCH 048/101] hw/ppc/Kconfig: Imply VFIO_PCI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 211: IOMMUFD backend backport
RH-Jira: RHEL-19302 RHEL-21057
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
RH-Commit: [47/67] c1a40cdab9bf62b16cb428d57a20b3e0eaa6de38 (eauger1/centos-qemu-kvm)
When the legacy and iommufd backends were introduced, a set of common
vfio-pci routines were exported in pci.c for both backends to use :
vfio_pci_pre_reset
vfio_pci_get_pci_hot_reset_info
vfio_pci_host_match
vfio_pci_post_reset
This introduced a build failure on PPC when --without-default-devices
is use because VFIO is always selected in ppc/Kconfig but VFIO_PCI is
not.
Use an 'imply VFIO_PCI' in ppc/Kconfig and bypass compilation of the
VFIO EEH hooks routines defined in hw/ppc/spapr_pci_vfio.c with
CONFIG_VFIO_PCI.
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
(cherry picked from commit 4278df9d1d2383b738338c857406357660f11e42)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/ppc/Kconfig | 2 +-
hw/ppc/spapr_pci_vfio.c | 36 ++++++++++++++++++++++++++++++++++++
2 files changed, 37 insertions(+), 1 deletion(-)
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
index 56f0475a8e..44263a58c4 100644
--- a/hw/ppc/Kconfig
+++ b/hw/ppc/Kconfig
@@ -3,11 +3,11 @@ config PSERIES
imply PCI_DEVICES
imply TEST_DEVICES
imply VIRTIO_VGA
+ imply VFIO_PCI if LINUX # needed by spapr_pci_vfio.c
select NVDIMM
select DIMM
select PCI
select SPAPR_VSCSI
- select VFIO if LINUX # needed by spapr_pci_vfio.c
select XICS
select XIVE
select MSI_NONBROKEN
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
index d1d07bec46..76b2a3487b 100644
--- a/hw/ppc/spapr_pci_vfio.c
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -26,10 +26,12 @@
#include "hw/pci/pci_device.h"
#include "hw/vfio/vfio-common.h"
#include "qemu/error-report.h"
+#include CONFIG_DEVICES /* CONFIG_VFIO_PCI */
/*
* Interfaces for IBM EEH (Enhanced Error Handling)
*/
+#ifdef CONFIG_VFIO_PCI
static bool vfio_eeh_container_ok(VFIOContainer *container)
{
/*
@@ -314,3 +316,37 @@ int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
return RTAS_OUT_SUCCESS;
}
+
+#else
+
+bool spapr_phb_eeh_available(SpaprPhbState *sphb)
+{
+ return false;
+}
+
+void spapr_phb_vfio_reset(DeviceState *qdev)
+{
+}
+
+int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
+ unsigned int addr, int option)
+{
+ return RTAS_OUT_NOT_SUPPORTED;
+}
+
+int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state)
+{
+ return RTAS_OUT_NOT_SUPPORTED;
+}
+
+int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option)
+{
+ return RTAS_OUT_NOT_SUPPORTED;
+}
+
+int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
+{
+ return RTAS_OUT_NOT_SUPPORTED;
+}
+
+#endif /* CONFIG_VFIO_PCI */
--
2.39.3

@ -1,470 +0,0 @@
From d1b7a9b25c0df9016cd8e93d40837314b1a81d70 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 08/21] hw: replace most qemu_bh_new calls with
qemu_bh_new_guarded
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
RH-Jira: RHEL-516
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [4/13] bcbc67dd0023aee2b3a342665237daa83b183c7b (jmaloy/jmaloy-qemu-kvm-2)
Jira: https://issues.redhat.com/browse/RHEL-516
Upstream: Merged
CVE: CVE-2023-2680
commit f63192b0544af5d3e4d5edfd85ab520fcf671377
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Thu Apr 27 17:10:09 2023 -0400
hw: replace most qemu_bh_new calls with qemu_bh_new_guarded
This protects devices from bh->mmio reentrancy issues.
Thanks: Thomas Huth <thuth@redhat.com> for diagnosing OS X test failure.
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Paul Durrant <paul@xen.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <20230427211013.2994127-5-alxndr@bu.edu>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/9pfs/xen-9p-backend.c | 5 ++++-
hw/block/dataplane/virtio-blk.c | 3 ++-
hw/block/dataplane/xen-block.c | 5 +++--
hw/char/virtio-serial-bus.c | 3 ++-
hw/display/qxl.c | 9 ++++++---
hw/display/virtio-gpu.c | 6 ++++--
hw/ide/ahci.c | 3 ++-
hw/ide/ahci_internal.h | 1 +
hw/ide/core.c | 4 +++-
hw/misc/imx_rngc.c | 6 ++++--
hw/misc/macio/mac_dbdma.c | 2 +-
hw/net/virtio-net.c | 3 ++-
hw/nvme/ctrl.c | 6 ++++--
hw/scsi/mptsas.c | 3 ++-
hw/scsi/scsi-bus.c | 3 ++-
hw/scsi/vmw_pvscsi.c | 3 ++-
hw/usb/dev-uas.c | 3 ++-
hw/usb/hcd-dwc2.c | 3 ++-
hw/usb/hcd-ehci.c | 3 ++-
hw/usb/hcd-uhci.c | 2 +-
hw/usb/host-libusb.c | 6 ++++--
hw/usb/redirect.c | 6 ++++--
hw/usb/xen-usb.c | 3 ++-
hw/virtio/virtio-balloon.c | 5 +++--
hw/virtio/virtio-crypto.c | 3 ++-
25 files changed, 66 insertions(+), 33 deletions(-)
diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index 74f3a05f88..0e266c552b 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -61,6 +61,7 @@ typedef struct Xen9pfsDev {
int num_rings;
Xen9pfsRing *rings;
+ MemReentrancyGuard mem_reentrancy_guard;
} Xen9pfsDev;
static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev);
@@ -443,7 +444,9 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data +
XEN_FLEX_RING_SIZE(ring_order);
- xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]);
+ xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh,
+ &xen_9pdev->rings[i],
+ &xen_9pdev->mem_reentrancy_guard);
xen_9pdev->rings[i].out_cons = 0;
xen_9pdev->rings[i].out_size = 0;
xen_9pdev->rings[i].inprogress = false;
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index b28d81737e..a6202997ee 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -127,7 +127,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
} else {
s->ctx = qemu_get_aio_context();
}
- s->bh = aio_bh_new(s->ctx, notify_guest_bh, s);
+ s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s,
+ &DEVICE(vdev)->mem_reentrancy_guard);
s->batch_notify_vqs = bitmap_new(conf->num_queues);
*dataplane = s;
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 734da42ea7..d8bc39d359 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -633,8 +633,9 @@ XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev,
} else {
dataplane->ctx = qemu_get_aio_context();
}
- dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh,
- dataplane);
+ dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh,
+ dataplane,
+ &DEVICE(xendev)->mem_reentrancy_guard);
return dataplane;
}
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
index 7d4601cb5d..dd619f0731 100644
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -985,7 +985,8 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp)
return;
}
- port->bh = qemu_bh_new(flush_queued_data_bh, port);
+ port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port,
+ &dev->mem_reentrancy_guard);
port->elem = NULL;
}
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index 80ce1e9a93..f1c0eb7dfc 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -2201,11 +2201,14 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp)
qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl);
- qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl);
+ qxl->update_irq = qemu_bh_new_guarded(qxl_update_irq_bh, qxl,
+ &DEVICE(qxl)->mem_reentrancy_guard);
qxl_reset_state(qxl);
- qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl);
- qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd);
+ qxl->update_area_bh = qemu_bh_new_guarded(qxl_render_update_area_bh, qxl,
+ &DEVICE(qxl)->mem_reentrancy_guard);
+ qxl->ssd.cursor_bh = qemu_bh_new_guarded(qemu_spice_cursor_refresh_bh, &qxl->ssd,
+ &DEVICE(qxl)->mem_reentrancy_guard);
}
static void qxl_realize_primary(PCIDevice *dev, Error **errp)
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 5e15c79b94..66ac9b6cc5 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -1339,8 +1339,10 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
g->ctrl_vq = virtio_get_queue(vdev, 0);
g->cursor_vq = virtio_get_queue(vdev, 1);
- g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g);
- g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g);
+ g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g,
+ &qdev->mem_reentrancy_guard);
+ g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g,
+ &qdev->mem_reentrancy_guard);
QTAILQ_INIT(&g->reslist);
QTAILQ_INIT(&g->cmdq);
QTAILQ_INIT(&g->fenceq);
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 55902e1df7..4e76d6b191 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -1509,7 +1509,8 @@ static void ahci_cmd_done(const IDEDMA *dma)
ahci_write_fis_d2h(ad);
if (ad->port_regs.cmd_issue && !ad->check_bh) {
- ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad);
+ ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad,
+ &ad->mem_reentrancy_guard);
qemu_bh_schedule(ad->check_bh);
}
}
diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h
index 303fcd7235..2480455372 100644
--- a/hw/ide/ahci_internal.h
+++ b/hw/ide/ahci_internal.h
@@ -321,6 +321,7 @@ struct AHCIDevice {
bool init_d2h_sent;
AHCICmdHdr *cur_cmd;
NCQTransferState ncq_tfs[AHCI_MAX_CMDS];
+ MemReentrancyGuard mem_reentrancy_guard;
};
struct AHCIPCIState {
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 45d14a25e9..de48ff9f86 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -513,6 +513,7 @@ BlockAIOCB *ide_issue_trim(
BlockCompletionFunc *cb, void *cb_opaque, void *opaque)
{
IDEState *s = opaque;
+ IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master;
TrimAIOCB *iocb;
/* Paired with a decrement in ide_trim_bh_cb() */
@@ -520,7 +521,8 @@ BlockAIOCB *ide_issue_trim(
iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque);
iocb->s = s;
- iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
+ iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb,
+ &DEVICE(dev)->mem_reentrancy_guard);
iocb->ret = 0;
iocb->qiov = qiov;
iocb->i = -1;
diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c
index 632c03779c..082c6980ad 100644
--- a/hw/misc/imx_rngc.c
+++ b/hw/misc/imx_rngc.c
@@ -228,8 +228,10 @@ static void imx_rngc_realize(DeviceState *dev, Error **errp)
sysbus_init_mmio(sbd, &s->iomem);
sysbus_init_irq(sbd, &s->irq);
- s->self_test_bh = qemu_bh_new(imx_rngc_self_test, s);
- s->seed_bh = qemu_bh_new(imx_rngc_seed, s);
+ s->self_test_bh = qemu_bh_new_guarded(imx_rngc_self_test, s,
+ &dev->mem_reentrancy_guard);
+ s->seed_bh = qemu_bh_new_guarded(imx_rngc_seed, s,
+ &dev->mem_reentrancy_guard);
}
static void imx_rngc_reset(DeviceState *dev)
diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c
index 43bb1f56ba..80a789f32b 100644
--- a/hw/misc/macio/mac_dbdma.c
+++ b/hw/misc/macio/mac_dbdma.c
@@ -914,7 +914,7 @@ static void mac_dbdma_realize(DeviceState *dev, Error **errp)
{
DBDMAState *s = MAC_DBDMA(dev);
- s->bh = qemu_bh_new(DBDMA_run_bh, s);
+ s->bh = qemu_bh_new_guarded(DBDMA_run_bh, s, &dev->mem_reentrancy_guard);
}
static void mac_dbdma_class_init(ObjectClass *oc, void *data)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 53e1c32643..447f669921 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -2917,7 +2917,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index)
n->vqs[index].tx_vq =
virtio_add_queue(vdev, n->net_conf.tx_queue_size,
virtio_net_handle_tx_bh);
- n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
+ n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
+ &DEVICE(vdev)->mem_reentrancy_guard);
}
n->vqs[index].tx_waiting = 0;
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index ac24eeb5ed..e5a468975e 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -4607,7 +4607,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry);
}
- sq->bh = qemu_bh_new(nvme_process_sq, sq);
+ sq->bh = qemu_bh_new_guarded(nvme_process_sq, sq,
+ &DEVICE(sq->ctrl)->mem_reentrancy_guard);
if (n->dbbuf_enabled) {
sq->db_addr = n->dbbuf_dbs + (sqid << 3);
@@ -5253,7 +5254,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
}
}
n->cq[cqid] = cq;
- cq->bh = qemu_bh_new(nvme_post_cqes, cq);
+ cq->bh = qemu_bh_new_guarded(nvme_post_cqes, cq,
+ &DEVICE(cq->ctrl)->mem_reentrancy_guard);
}
static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c
index c485da792c..3de288b454 100644
--- a/hw/scsi/mptsas.c
+++ b/hw/scsi/mptsas.c
@@ -1322,7 +1322,8 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp)
}
s->max_devices = MPTSAS_NUM_PORTS;
- s->request_bh = qemu_bh_new(mptsas_fetch_requests, s);
+ s->request_bh = qemu_bh_new_guarded(mptsas_fetch_requests, s,
+ &DEVICE(dev)->mem_reentrancy_guard);
scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info);
}
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index c97176110c..3c20b47ad0 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -193,7 +193,8 @@ static void scsi_dma_restart_cb(void *opaque, bool running, RunState state)
AioContext *ctx = blk_get_aio_context(s->conf.blk);
/* The reference is dropped in scsi_dma_restart_bh.*/
object_ref(OBJECT(s));
- s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s);
+ s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s,
+ &DEVICE(s)->mem_reentrancy_guard);
qemu_bh_schedule(s->bh);
}
}
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
index fa76696855..4de34536e9 100644
--- a/hw/scsi/vmw_pvscsi.c
+++ b/hw/scsi/vmw_pvscsi.c
@@ -1184,7 +1184,8 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp)
pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET);
}
- s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s);
+ s->completion_worker = qemu_bh_new_guarded(pvscsi_process_completion_queue, s,
+ &DEVICE(pci_dev)->mem_reentrancy_guard);
scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info);
/* override default SCSI bus hotplug-handler, with pvscsi's one */
diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
index 88f99c05d5..f013ded91e 100644
--- a/hw/usb/dev-uas.c
+++ b/hw/usb/dev-uas.c
@@ -937,7 +937,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp)
QTAILQ_INIT(&uas->results);
QTAILQ_INIT(&uas->requests);
- uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas);
+ uas->status_bh = qemu_bh_new_guarded(usb_uas_send_status_bh, uas,
+ &d->mem_reentrancy_guard);
dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE);
scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info);
diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c
index 8755e9cbb0..a0c4e782b2 100644
--- a/hw/usb/hcd-dwc2.c
+++ b/hw/usb/hcd-dwc2.c
@@ -1364,7 +1364,8 @@ static void dwc2_realize(DeviceState *dev, Error **errp)
s->fi = USB_FRMINTVL - 1;
s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s);
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s);
- s->async_bh = qemu_bh_new(dwc2_work_bh, s);
+ s->async_bh = qemu_bh_new_guarded(dwc2_work_bh, s,
+ &dev->mem_reentrancy_guard);
sysbus_init_irq(sbd, &s->irq);
}
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index d4da8dcb8d..c930c60921 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -2533,7 +2533,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp)
}
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s);
- s->async_bh = qemu_bh_new(ehci_work_bh, s);
+ s->async_bh = qemu_bh_new_guarded(ehci_work_bh, s,
+ &dev->mem_reentrancy_guard);
s->device = dev;
s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s);
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 8ac1175ad2..77baaa7a6b 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -1190,7 +1190,7 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp)
USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL);
}
}
- s->bh = qemu_bh_new(uhci_bh, s);
+ s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard);
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s);
s->num_ports_vmstate = NB_PORTS;
QTAILQ_INIT(&s->queues);
diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c
index 176868d345..f500db85ab 100644
--- a/hw/usb/host-libusb.c
+++ b/hw/usb/host-libusb.c
@@ -1141,7 +1141,8 @@ static void usb_host_nodev_bh(void *opaque)
static void usb_host_nodev(USBHostDevice *s)
{
if (!s->bh_nodev) {
- s->bh_nodev = qemu_bh_new(usb_host_nodev_bh, s);
+ s->bh_nodev = qemu_bh_new_guarded(usb_host_nodev_bh, s,
+ &DEVICE(s)->mem_reentrancy_guard);
}
qemu_bh_schedule(s->bh_nodev);
}
@@ -1739,7 +1740,8 @@ static int usb_host_post_load(void *opaque, int version_id)
USBHostDevice *dev = opaque;
if (!dev->bh_postld) {
- dev->bh_postld = qemu_bh_new(usb_host_post_load_bh, dev);
+ dev->bh_postld = qemu_bh_new_guarded(usb_host_post_load_bh, dev,
+ &DEVICE(dev)->mem_reentrancy_guard);
}
qemu_bh_schedule(dev->bh_postld);
dev->bh_postld_pending = true;
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index fd7df599bc..39fbaaab16 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -1441,8 +1441,10 @@ static void usbredir_realize(USBDevice *udev, Error **errp)
}
}
- dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev);
- dev->device_reject_bh = qemu_bh_new(usbredir_device_reject_bh, dev);
+ dev->chardev_close_bh = qemu_bh_new_guarded(usbredir_chardev_close_bh, dev,
+ &DEVICE(dev)->mem_reentrancy_guard);
+ dev->device_reject_bh = qemu_bh_new_guarded(usbredir_device_reject_bh, dev,
+ &DEVICE(dev)->mem_reentrancy_guard);
dev->attach_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, usbredir_do_attach, dev);
packet_id_queue_init(&dev->cancelled, dev, "cancelled");
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
index 66cb3f7c24..38ee660a30 100644
--- a/hw/usb/xen-usb.c
+++ b/hw/usb/xen-usb.c
@@ -1032,7 +1032,8 @@ static void usbback_alloc(struct XenLegacyDevice *xendev)
QTAILQ_INIT(&usbif->req_free_q);
QSIMPLEQ_INIT(&usbif->hotplug_q);
- usbif->bh = qemu_bh_new(usbback_bh, usbif);
+ usbif->bh = qemu_bh_new_guarded(usbback_bh, usbif,
+ &DEVICE(xendev)->mem_reentrancy_guard);
}
static int usbback_free(struct XenLegacyDevice *xendev)
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 43092aa634..5186e831dd 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -909,8 +909,9 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
precopy_add_notifier(&s->free_page_hint_notify);
object_ref(OBJECT(s->iothread));
- s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
- virtio_ballloon_get_free_page_hints, s);
+ s->free_page_bh = aio_bh_new_guarded(iothread_get_aio_context(s->iothread),
+ virtio_ballloon_get_free_page_hints, s,
+ &dev->mem_reentrancy_guard);
}
if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) {
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
index 802e1b9659..2fe804510f 100644
--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
@@ -1074,7 +1074,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp)
vcrypto->vqs[i].dataq =
virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh);
vcrypto->vqs[i].dataq_bh =
- qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]);
+ qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i],
+ &dev->mem_reentrancy_guard);
vcrypto->vqs[i].vcrypto = vcrypto;
}
--
2.39.3

@ -1,141 +0,0 @@
From 8075a9e05699ef0c4e078017eefc20db3186328f Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Mon, 29 May 2023 14:21:08 -0400
Subject: [PATCH 17/21] hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI
controller (CVE-2023-0330)
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
RH-Jira: RHEL-516
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [13/13] 0b6fa742075ef2db3a354ee672dccca3747051cc (jmaloy/jmaloy-qemu-kvm-2)
Jira: https://issues.redhat.com/browse/RHEL-516
Upstream: Merged
CVE: CVE-2023-2680
commit b987718bbb1d0eabf95499b976212dd5f0120d75
Author: Thomas Huth <thuth@redhat.com>
Date: Mon May 22 11:10:11 2023 +0200
hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI controller (CVE-2023-0330)
We cannot use the generic reentrancy guard in the LSI code, so
we have to manually prevent endless reentrancy here. The problematic
lsi_execute_script() function has already a way to detect whether
too many instructions have been executed - we just have to slightly
change the logic here that it also takes into account if the function
has been called too often in a reentrant way.
The code in fuzz-lsi53c895a-test.c has been taken from an earlier
patch by Mauro Matteo Cascella.
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1563
Message-Id: <20230522091011.1082574-1-thuth@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Alexander Bulekov <alxndr@bu.edu>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/scsi/lsi53c895a.c | 23 +++++++++++++++------
tests/qtest/fuzz-lsi53c895a-test.c | 33 ++++++++++++++++++++++++++++++
2 files changed, 50 insertions(+), 6 deletions(-)
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
index 048436352b..f7d45b0b20 100644
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -1134,15 +1134,24 @@ static void lsi_execute_script(LSIState *s)
uint32_t addr, addr_high;
int opcode;
int insn_processed = 0;
+ static int reentrancy_level;
+
+ reentrancy_level++;
s->istat1 |= LSI_ISTAT1_SRUN;
again:
- if (++insn_processed > LSI_MAX_INSN) {
- /* Some windows drivers make the device spin waiting for a memory
- location to change. If we have been executed a lot of code then
- assume this is the case and force an unexpected device disconnect.
- This is apparently sufficient to beat the drivers into submission.
- */
+ /*
+ * Some windows drivers make the device spin waiting for a memory location
+ * to change. If we have executed more than LSI_MAX_INSN instructions then
+ * assume this is the case and force an unexpected device disconnect. This
+ * is apparently sufficient to beat the drivers into submission.
+ *
+ * Another issue (CVE-2023-0330) can occur if the script is programmed to
+ * trigger itself again and again. Avoid this problem by stopping after
+ * being called multiple times in a reentrant way (8 is an arbitrary value
+ * which should be enough for all valid use cases).
+ */
+ if (++insn_processed > LSI_MAX_INSN || reentrancy_level > 8) {
if (!(s->sien0 & LSI_SIST0_UDC)) {
qemu_log_mask(LOG_GUEST_ERROR,
"lsi_scsi: inf. loop with UDC masked");
@@ -1596,6 +1605,8 @@ again:
}
}
trace_lsi_execute_script_stop();
+
+ reentrancy_level--;
}
static uint8_t lsi_reg_readb(LSIState *s, int offset)
diff --git a/tests/qtest/fuzz-lsi53c895a-test.c b/tests/qtest/fuzz-lsi53c895a-test.c
index 2012bd54b7..1b55928b9f 100644
--- a/tests/qtest/fuzz-lsi53c895a-test.c
+++ b/tests/qtest/fuzz-lsi53c895a-test.c
@@ -8,6 +8,36 @@
#include "qemu/osdep.h"
#include "libqtest.h"
+/*
+ * This used to trigger a DMA reentrancy issue
+ * leading to memory corruption bugs like stack
+ * overflow or use-after-free
+ * https://gitlab.com/qemu-project/qemu/-/issues/1563
+ */
+static void test_lsi_dma_reentrancy(void)
+{
+ QTestState *s;
+
+ s = qtest_init("-M q35 -m 512M -nodefaults "
+ "-blockdev driver=null-co,node-name=null0 "
+ "-device lsi53c810 -device scsi-cd,drive=null0");
+
+ qtest_outl(s, 0xcf8, 0x80000804); /* PCI Command Register */
+ qtest_outw(s, 0xcfc, 0x7); /* Enables accesses */
+ qtest_outl(s, 0xcf8, 0x80000814); /* Memory Bar 1 */
+ qtest_outl(s, 0xcfc, 0xff100000); /* Set MMIO Address*/
+ qtest_outl(s, 0xcf8, 0x80000818); /* Memory Bar 2 */
+ qtest_outl(s, 0xcfc, 0xff000000); /* Set RAM Address*/
+ qtest_writel(s, 0xff000000, 0xc0000024);
+ qtest_writel(s, 0xff000114, 0x00000080);
+ qtest_writel(s, 0xff00012c, 0xff000000);
+ qtest_writel(s, 0xff000004, 0xff000114);
+ qtest_writel(s, 0xff000008, 0xff100014);
+ qtest_writel(s, 0xff10002f, 0x000000ff);
+
+ qtest_quit(s);
+}
+
/*
* This used to trigger a UAF in lsi_do_msgout()
* https://gitlab.com/qemu-project/qemu/-/issues/972
@@ -124,5 +154,8 @@ int main(int argc, char **argv)
qtest_add_func("fuzz/lsi53c895a/lsi_do_msgout_cancel_req",
test_lsi_do_msgout_cancel_req);
+ qtest_add_func("fuzz/lsi53c895a/lsi_dma_reentrancy",
+ test_lsi_dma_reentrancy);
+
return g_test_run();
}
--
2.39.3

@ -0,0 +1,73 @@
From 8f27893a37e55a31180bb66cd9eae7199911881b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Volker=20R=C3=BCmelin?= <vr_qemu@t-online.de>
Date: Fri, 29 Dec 2023 21:38:54 +0100
Subject: [PATCH 060/101] hw/vfio: fix iteration over global VFIODevice list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 211: IOMMUFD backend backport
RH-Jira: RHEL-19302 RHEL-21057
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
RH-Commit: [59/67] f926e1233c8c5ad418e8794b1a103371c9dc5eb0 (eauger1/centos-qemu-kvm)
Commit 3d779abafe ("vfio/common: Introduce a global VFIODevice list")
introduced a global VFIODevice list, but forgot to update the list
element field name when iterating over the new list. Change the code
to use the correct list element field.
Fixes: 3d779abafe ("vfio/common: Introduce a global VFIODevice list")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2061
Signed-off-by: Volker Rümelin <vr_qemu@t-online.de>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
(cherry picked from commit 9353b6da430f90e47f352dbf6dc31120c8914da6)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/vfio/common.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 0d4d8b8416..0b3352f2a9 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -73,7 +73,7 @@ bool vfio_mig_active(void)
return false;
}
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
if (vbasedev->migration_blocker) {
return false;
}
@@ -94,7 +94,7 @@ static bool vfio_multiple_devices_migration_is_supported(void)
unsigned int device_num = 0;
bool all_support_p2p = true;
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
if (vbasedev->migration) {
device_num++;
@@ -1366,13 +1366,13 @@ void vfio_reset_handler(void *opaque)
{
VFIODevice *vbasedev;
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
if (vbasedev->dev->realized) {
vbasedev->ops->vfio_compute_needs_reset(vbasedev);
}
}
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
if (vbasedev->dev->realized && vbasedev->needs_reset) {
vbasedev->ops->vfio_hot_reset_multi(vbasedev);
}
--
2.39.3

@ -1,76 +0,0 @@
From fcd6219a95851d17fd8bde69d87e78c6533be990 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
Date: Wed, 12 Jul 2023 17:46:57 +0200
Subject: [PATCH 24/37] hw/vfio/pci-quirks: Sanitize capability pointer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 179: vfio: live migration support
RH-Bugzilla: 2192818
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [22/28] cb080409c1912f4365f8e31cd23c914b48f91575 (clegoate/qemu-kvm-c9s)
Bugzilla: https://bugzilla.redhat.com/2192818
commit 0ddcb39c9357
Author: Alex Williamson <alex.williamson@redhat.com>
Date: Fri Jun 30 16:36:08 2023 -0600
hw/vfio/pci-quirks: Sanitize capability pointer
Coverity reports a tained scalar when traversing the capabilities
chain (CID 1516589). In practice I've never seen a device with a
chain so broken as to cause an issue, but it's also pretty easy to
sanitize.
Fixes: f6b30c1984f7 ("hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques")
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
hw/vfio/pci-quirks.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 0ed2fcd531..f4ff836805 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -1530,6 +1530,12 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = {
.set = set_nv_gpudirect_clique_id,
};
+static bool is_valid_std_cap_offset(uint8_t pos)
+{
+ return (pos >= PCI_STD_HEADER_SIZEOF &&
+ pos <= (PCI_CFG_SPACE_SIZE - PCI_CAP_SIZEOF));
+}
+
static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
{
PCIDevice *pdev = &vdev->pdev;
@@ -1563,7 +1569,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
*/
ret = pread(vdev->vbasedev.fd, &tmp, 1,
vdev->config_offset + PCI_CAPABILITY_LIST);
- if (ret != 1 || !tmp) {
+ if (ret != 1 || !is_valid_std_cap_offset(tmp)) {
error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list");
return -EINVAL;
}
@@ -1575,7 +1581,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
d4_conflict = true;
}
tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT];
- } while (tmp);
+ } while (is_valid_std_cap_offset(tmp));
if (!c8_conflict) {
pos = 0xC8;
--
2.39.3

@ -1,110 +0,0 @@
From dd38230a0a375fb8427fa106ff79562e56c51b6c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
Date: Wed, 12 Jul 2023 17:46:57 +0200
Subject: [PATCH 18/37] hw/vfio/pci-quirks: Support alternate offset for
GPUDirect Cliques
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 179: vfio: live migration support
RH-Bugzilla: 2192818
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [16/28] 9befb7c9adaeb58e9d0b49686cf54b751c742832 (clegoate/qemu-kvm-c9s)
Bugzilla: https://bugzilla.redhat.com/2192818
commit f6b30c1984f7
Author: Alex Williamson <alex.williamson@redhat.com>
Date: Thu Jun 8 12:05:07 2023 -0600
hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques
NVIDIA Turing and newer GPUs implement the MSI-X capability at the offset
previously reserved for use by hypervisors to implement the GPUDirect
Cliques capability. A revised specification provides an alternate
location. Add a config space walk to the quirk to check for conflicts,
allowing us to fall back to the new location or generate an error at the
quirk setup rather than when the real conflicting capability is added
should there be no available location.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
hw/vfio/pci-quirks.c | 41 ++++++++++++++++++++++++++++++++++++++++-
1 file changed, 40 insertions(+), 1 deletion(-)
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index f0147a050a..0ed2fcd531 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -1490,6 +1490,9 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev)
* +---------------------------------+---------------------------------+
*
* https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf
+ *
+ * Specification for Turning and later GPU architectures:
+ * https://lists.gnu.org/archive/html/qemu-devel/2023-06/pdf142OR4O4c2.pdf
*/
static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v,
const char *name, void *opaque,
@@ -1530,7 +1533,9 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = {
static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
{
PCIDevice *pdev = &vdev->pdev;
- int ret, pos = 0xC8;
+ int ret, pos;
+ bool c8_conflict = false, d4_conflict = false;
+ uint8_t tmp;
if (vdev->nv_gpudirect_clique == 0xFF) {
return 0;
@@ -1547,6 +1552,40 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
return -EINVAL;
}
+ /*
+ * Per the updated specification above, it's recommended to use offset
+ * D4h for Turing and later GPU architectures due to a conflict of the
+ * MSI-X capability at C8h. We don't know how to determine the GPU
+ * architecture, instead we walk the capability chain to mark conflicts
+ * and choose one or error based on the result.
+ *
+ * NB. Cap list head in pdev->config is already cleared, read from device.
+ */
+ ret = pread(vdev->vbasedev.fd, &tmp, 1,
+ vdev->config_offset + PCI_CAPABILITY_LIST);
+ if (ret != 1 || !tmp) {
+ error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list");
+ return -EINVAL;
+ }
+
+ do {
+ if (tmp == 0xC8) {
+ c8_conflict = true;
+ } else if (tmp == 0xD4) {
+ d4_conflict = true;
+ }
+ tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT];
+ } while (tmp);
+
+ if (!c8_conflict) {
+ pos = 0xC8;
+ } else if (!d4_conflict) {
+ pos = 0xD4;
+ } else {
+ error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid config space");
+ return -EINVAL;
+ }
+
ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp);
if (ret < 0) {
error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: ");
--
2.39.3

@ -1,62 +0,0 @@
From 0a731ac1191182546e80af5f39d178a5a2f3688f Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Mon, 17 Jul 2023 18:21:26 +0200
Subject: [PATCH 07/14] hw/virtio-iommu: Fix potential OOB access in
virtio_iommu_handle_command()
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes
RH-Bugzilla: 2229133
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Commit: [1/3] ecdb1e1aa6b93761dc87ea79bc0a1093ad649a74 (eauger1/centos-qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133
In the virtio_iommu_handle_command() when a PROBE request is handled,
output_size takes a value greater than the tail size and on a subsequent
iteration we can get a stack out-of-band access. Initialize the
output_size on each iteration.
The issue was found with ASAN. Credits to:
Yiming Tao(Zhejiang University)
Gaoning Pan(Zhejiang University)
Fixes: 1733eebb9e7 ("virtio-iommu: Implement RESV_MEM probe request")
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reported-by: Mauro Matteo Cascella <mcascell@redhat.com>
Cc: qemu-stable@nongnu.org
Message-Id: <20230717162126.11693-1-eric.auger@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit cf2f89edf36a59183166ae8721a8d7ab5cd286bd)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/virtio/virtio-iommu.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index 421e2a944f..17ce630200 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -728,13 +728,15 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
struct virtio_iommu_req_head head;
struct virtio_iommu_req_tail tail = {};
- size_t output_size = sizeof(tail), sz;
VirtQueueElement *elem;
unsigned int iov_cnt;
struct iovec *iov;
void *buf = NULL;
+ size_t sz;
for (;;) {
+ size_t output_size = sizeof(tail);
+
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
if (!elem) {
return;
--
2.39.3

@ -1,52 +0,0 @@
From f9d982fae156aa9db0506e1e098c1e8a7f7eec94 Mon Sep 17 00:00:00 2001
From: Bandan Das <bsd@redhat.com>
Date: Thu, 3 Aug 2023 14:29:15 -0400
Subject: [PATCH 13/14] i386/cpu: Update how the EBX register of CPUID
0x8000001F is set
RH-Author: Bandan Das <None>
RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter
RH-Bugzilla: 2214839
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [4/4] efc368b2c844fd4fbc3c755a5e2da288329e7a2c (bdas1/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839
commit fb6bbafc0f19385fb257ee073ed13dcaf613f2f8
Author: Tom Lendacky <thomas.lendacky@amd.com>
Date: Fri Sep 30 10:14:30 2022 -0500
i386/cpu: Update how the EBX register of CPUID 0x8000001F is set
Update the setting of CPUID 0x8000001F EBX to clearly document the ranges
associated with fields being set.
Fixes: 6cb8f2a663 ("cpu/i386: populate CPUID 0x8000_001F when SEV is active")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <5822fd7d02b575121380e1f493a8f6d9eba2b11a.1664550870.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Bandan Das <bsd@redhat.com>
---
target/i386/cpu.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 839706b430..4ac3046313 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6008,8 +6008,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
if (sev_enabled()) {
*eax = 0x2;
*eax |= sev_es_enabled() ? 0x8 : 0;
- *ebx = sev_get_cbit_position();
- *ebx |= sev_get_reduced_phys_bits() << 6;
+ *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */
+ *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */
}
break;
default:
--
2.39.3

@ -1,77 +0,0 @@
From 5c0d254762caaffd574bd95dbfc1df416e6e2509 Mon Sep 17 00:00:00 2001
From: Bandan Das <bsd@redhat.com>
Date: Thu, 3 Aug 2023 14:22:55 -0400
Subject: [PATCH 12/14] i386/sev: Update checks and information related to
reduced-phys-bits
RH-Author: Bandan Das <None>
RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter
RH-Bugzilla: 2214839
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [3/4] 7c5e7ea9f6cd39e84e5b60417c849430296399fd (bdas1/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839
commit 8168fed9f84e3128f7628969ae78af49433d5ce7
Author: Tom Lendacky <thomas.lendacky@amd.com>
Date: Fri Sep 30 10:14:29 2022 -0500
i386/sev: Update checks and information related to reduced-phys-bits
The value of the reduced-phys-bits parameter is propogated to the CPUID
information exposed to the guest. Update the current validation check to
account for the size of the CPUID field (6-bits), ensuring the value is
in the range of 1 to 63.
Maintain backward compatibility, to an extent, by allowing a value greater
than 1 (so that the previously documented value of 5 still works), but not
allowing anything over 63.
Fixes: d8575c6c02 ("sev/i386: add command to initialize the memory encryption context")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <cca5341a95ac73f904e6300f10b04f9c62e4e8ff.1664550870.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Bandan Das <bsd@redhat.com>
---
target/i386/sev.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 859e06f6ad..fe2144c038 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -932,15 +932,26 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL);
host_cbitpos = ebx & 0x3f;
+ /*
+ * The cbitpos value will be placed in bit positions 5:0 of the EBX
+ * register of CPUID 0x8000001F. No need to verify the range as the
+ * comparison against the host value accomplishes that.
+ */
if (host_cbitpos != sev->cbitpos) {
error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'",
__func__, host_cbitpos, sev->cbitpos);
goto err;
}
- if (sev->reduced_phys_bits < 1) {
- error_setg(errp, "%s: reduced_phys_bits check failed, it should be >=1,"
- " requested '%d'", __func__, sev->reduced_phys_bits);
+ /*
+ * The reduced-phys-bits value will be placed in bit positions 11:6 of
+ * the EBX register of CPUID 0x8000001F, so verify the supplied value
+ * is in the range of 1 to 63.
+ */
+ if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) {
+ error_setg(errp, "%s: reduced_phys_bits check failed,"
+ " it should be in the range of 1 to 63, requested '%d'",
+ __func__, sev->reduced_phys_bits);
goto err;
}
--
2.39.3

@ -0,0 +1,54 @@
From 51b8f29cddb73eb02f91af5f52a205fdd3af6583 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Wed, 17 Jan 2024 21:08:59 +0100
Subject: [PATCH 099/101] include/ui/rect.h: fix qemu_rect_init()
mis-assignment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 216: Fix regression in QEMU's virtio-gpu VNC sessions
RH-Jira: RHEL-21570
RH-Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Commit: [1/1] a9d487be04e2c1847b80c479b5cc790af81e3428 (thuth/qemu-kvm-cs9)
JIRA: https://issues.redhat.com/browse/RHEL-21570
commit 9d5b42beb6978dc6219d5dc029c9d453c6b8d503
Author: Elen Avan <elen.avan@bk.ru>
Date: Fri Dec 22 22:17:21 2023 +0300
include/ui/rect.h: fix qemu_rect_init() mis-assignment
Signed-off-by: Elen Avan <elen.avan@bk.ru>
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2051
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2050
Fixes: a200d53b1fde "virtio-gpu: replace PIXMAN for region/rect test"
Cc: qemu-stable@nongnu.org
Reviewed-by: Michael Tokarev <mjt@tls.msk.ru>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
include/ui/rect.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/ui/rect.h b/include/ui/rect.h
index 94898f92d0..68f05d78a8 100644
--- a/include/ui/rect.h
+++ b/include/ui/rect.h
@@ -19,7 +19,7 @@ static inline void qemu_rect_init(QemuRect *rect,
uint16_t width, uint16_t height)
{
rect->x = x;
- rect->y = x;
+ rect->y = y;
rect->width = width;
rect->height = height;
}
--
2.39.3

@ -1,144 +0,0 @@
From 399bfc04fb8352af6d2f4c984e68c334d2043368 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 17 May 2023 17:28:34 +0200
Subject: [PATCH 04/21] iotests: Test commit with iothreads and ongoing I/O
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 166: block/graph-lock: Disable locking for now
RH-Bugzilla: 2186725
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [4/4] 1e42fde5951ae12bddc4eea2320f066f7079878f (kmwolf/centos-qemu-kvm)
This tests exercises graph locking, draining, and graph modifications
with AioContext switches a lot. Amongst others, it serves as a
regression test for bdrv_graph_wrlock() deadlocking because it is called
with a locked AioContext and for AioContext handling in the NBD server.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20230517152834.277483-4-kwolf@redhat.com>
Tested-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 95fdd8db61848d31fde1d9b32da7f3f76babfa25)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
tests/qemu-iotests/iotests.py | 4 ++
.../qemu-iotests/tests/graph-changes-while-io | 56 +++++++++++++++++--
.../tests/graph-changes-while-io.out | 4 +-
3 files changed, 58 insertions(+), 6 deletions(-)
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 3e82c634cf..7073579a7d 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -462,6 +462,10 @@ def qmp(self, cmd: str, args: Optional[Dict[str, object]] = None) \
assert self._qmp is not None
return self._qmp.cmd(cmd, args)
+ def get_qmp(self) -> QEMUMonitorProtocol:
+ assert self._qmp is not None
+ return self._qmp
+
def stop(self, kill_signal=15):
self._p.send_signal(kill_signal)
self._p.wait()
diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io
index 7664f33689..750e7d4d38 100755
--- a/tests/qemu-iotests/tests/graph-changes-while-io
+++ b/tests/qemu-iotests/tests/graph-changes-while-io
@@ -22,19 +22,19 @@
import os
from threading import Thread
import iotests
-from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \
- QemuStorageDaemon
+from iotests import imgfmt, qemu_img, qemu_img_create, qemu_io, \
+ QMPTestCase, QemuStorageDaemon
top = os.path.join(iotests.test_dir, 'top.img')
nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock')
-def do_qemu_img_bench() -> None:
+def do_qemu_img_bench(count: int = 2000000) -> None:
"""
Do some I/O requests on `nbd_sock`.
"""
- qemu_img('bench', '-f', 'raw', '-c', '2000000',
+ qemu_img('bench', '-f', 'raw', '-c', str(count),
f'nbd+unix:///node0?socket={nbd_sock}')
@@ -84,6 +84,54 @@ class TestGraphChangesWhileIO(QMPTestCase):
bench_thr.join()
+ def test_commit_while_io(self) -> None:
+ # Run qemu-img bench in the background
+ bench_thr = Thread(target=do_qemu_img_bench, args=(200000, ))
+ bench_thr.start()
+
+ qemu_io('-c', 'write 0 64k', top)
+ qemu_io('-c', 'write 128k 64k', top)
+
+ result = self.qsd.qmp('blockdev-add', {
+ 'driver': imgfmt,
+ 'node-name': 'overlay',
+ 'backing': None,
+ 'file': {
+ 'driver': 'file',
+ 'filename': top
+ }
+ })
+ self.assert_qmp(result, 'return', {})
+
+ result = self.qsd.qmp('blockdev-snapshot', {
+ 'node': 'node0',
+ 'overlay': 'overlay',
+ })
+ self.assert_qmp(result, 'return', {})
+
+ # While qemu-img bench is running, repeatedly commit overlay to node0
+ while bench_thr.is_alive():
+ result = self.qsd.qmp('block-commit', {
+ 'job-id': 'job0',
+ 'device': 'overlay',
+ })
+ self.assert_qmp(result, 'return', {})
+
+ result = self.qsd.qmp('block-job-cancel', {
+ 'device': 'job0',
+ })
+ self.assert_qmp(result, 'return', {})
+
+ cancelled = False
+ while not cancelled:
+ for event in self.qsd.get_qmp().get_events(wait=10.0):
+ if event['event'] != 'JOB_STATUS_CHANGE':
+ continue
+ if event['data']['status'] == 'null':
+ cancelled = True
+
+ bench_thr.join()
+
if __name__ == '__main__':
# Format must support raw backing files
iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'],
diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out
index ae1213e6f8..fbc63e62f8 100644
--- a/tests/qemu-iotests/tests/graph-changes-while-io.out
+++ b/tests/qemu-iotests/tests/graph-changes-while-io.out
@@ -1,5 +1,5 @@
-.
+..
----------------------------------------------------------------------
-Ran 1 tests
+Ran 2 tests
OK
--
2.39.3

@ -1,132 +0,0 @@
From 2c9e6892369ff99decd4030642b8dcf3875e9ebf Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Tue, 9 May 2023 15:41:33 +0200
Subject: [PATCH 55/56] iotests: Test resizing image attached to an iothread
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize()
RH-Bugzilla: 2185688
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [4/4] 8d31752d1e6e8c6a422d68d9cb2251fbc34b7aef (kmwolf/centos-qemu-kvm)
This tests that trying to resize an image with QMP block_resize doesn't
hang or otherwise fail when the image is attached to a device running in
an iothread.
This is a regression test for the recent fix that changed
qmp_block_resize, which is a coroutine based QMP handler, to avoid
calling no_coroutine_fns directly.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20230509134133.373408-1-kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit e113362e4cdfdcfe1d497e569527f70a0021333a)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
tests/qemu-iotests/tests/iothreads-resize | 71 +++++++++++++++++++
tests/qemu-iotests/tests/iothreads-resize.out | 11 +++
2 files changed, 82 insertions(+)
create mode 100755 tests/qemu-iotests/tests/iothreads-resize
create mode 100644 tests/qemu-iotests/tests/iothreads-resize.out
diff --git a/tests/qemu-iotests/tests/iothreads-resize b/tests/qemu-iotests/tests/iothreads-resize
new file mode 100755
index 0000000000..36e4598c62
--- /dev/null
+++ b/tests/qemu-iotests/tests/iothreads-resize
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+# group: rw auto quick
+#
+# Test resizing an image that is attached to a separate iothread
+#
+# Copyright (C) 2023 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=kwolf@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+cd ..
+. ./common.rc
+. ./common.filter
+
+# Resizing images is only supported by a few block drivers
+_supported_fmt raw qcow2 qed
+_supported_proto file
+_require_devices virtio-scsi-pci
+
+size=64M
+_make_test_img $size
+
+qmp() {
+cat <<EOF
+{"execute":"qmp_capabilities"}
+{'execute': 'block_resize',
+ 'arguments': {'node-name': 'img', 'size': 134217728}}
+{"execute":"quit"}
+EOF
+}
+
+qmp | $QEMU -S -display none \
+ -drive if=none,format=$IMGFMT,file="$TEST_IMG",node-name=img \
+ -object iothread,id=t0 \
+ -device virtio-scsi-pci,iothread=t0 \
+ -device scsi-hd,drive=none0 \
+ -qmp stdio \
+ | _filter_qmp
+
+_img_info | _filter_img_info
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/tests/iothreads-resize.out b/tests/qemu-iotests/tests/iothreads-resize.out
new file mode 100644
index 0000000000..2ca5a9d964
--- /dev/null
+++ b/tests/qemu-iotests/tests/iothreads-resize.out
@@ -0,0 +1,11 @@
+QA output created by iothreads-resize
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+QMP_VERSION
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 128 MiB (134217728 bytes)
+*** done
--
2.39.1

@ -1,44 +0,0 @@
From 21d1d93abd48f613c18df7dacd986693da774175 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 11 May 2023 13:03:22 +0200
Subject: [PATCH 54/56] iotests: Use alternative CPU type that is not
deprecated in RHEL
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize()
RH-Bugzilla: 2185688
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [3/4] 038d4718c0ee7a17ff5e6f4af8fc04d07e452f8d (kmwolf/centos-qemu-kvm)
This is a downstream-only patch that is necessary because the default
CPU in RHEL is marked as deprecated. This makes test cases fail due to
the warning in the output:
qemu-system-x86_64: warning: CPU model qemu64-x86_64-cpu is deprecated -- use at least 'Nehalem' / 'Opteron_G4', or 'host' / 'max'
Fixes: 318178778db60b6475d1484509bee136317156d3
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
tests/qemu-iotests/testenv.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py
index 9a37ad9152..963514aab3 100644
--- a/tests/qemu-iotests/testenv.py
+++ b/tests/qemu-iotests/testenv.py
@@ -244,6 +244,9 @@ def __init__(self, source_dir: str, build_dir: str,
if self.qemu_prog.endswith(f'qemu-system-{suffix}'):
self.qemu_options += f' -machine {machine}'
+ if self.qemu_prog.endswith('qemu-system-x86_64'):
+ self.qemu_options += ' -cpu Nehalem'
+
# QEMU_DEFAULT_MACHINE
self.qemu_default_machine = get_default_machine(self.qemu_prog)
--
2.39.1

@ -0,0 +1,49 @@
From a9be663beaace1c31d75ca353e5d3bb0657a4f6c Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 18 Jan 2024 09:48:21 -0500
Subject: [PATCH 11/22] iotests: add filter_qmp_generated_node_ids()
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter
RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Commit: [7/17] 8dd20acc5b1e992294ed422e80897a9c221940dd (stefanha/centos-stream-qemu-kvm)
Add a filter function for QMP responses that contain QEMU's
automatically generated node ids. The ids change between runs and must
be masked in the reference output.
The next commit will use this new function.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-ID: <20240118144823.1497953-2-stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit da62b507a20510d819bcfbe8f5e573409b954006)
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
tests/qemu-iotests/iotests.py | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index e5c5798c71..ea48af4a7b 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -651,6 +651,13 @@ def _filter(_key, value):
def filter_generated_node_ids(msg):
return re.sub("#block[0-9]+", "NODE_NAME", msg)
+def filter_qmp_generated_node_ids(qmsg):
+ def _filter(_key, value):
+ if is_str(value):
+ return filter_generated_node_ids(value)
+ return value
+ return filter_qmp(qmsg, _filter)
+
def filter_img_info(output: str, filename: str,
drop_child_info: bool = True) -> str:
lines = []
--
2.39.3

@ -0,0 +1,49 @@
From 453da839a7d81896d03b827a95c1991a60740dc5 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 25 Jan 2024 16:21:50 +0100
Subject: [PATCH 21/22] iotests/iothreads-stream: Use the right TimeoutError
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter
RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Commit: [17/17] ca5a512ccccb668089b726d7499562d1e294c828 (stefanha/centos-stream-qemu-kvm)
Since Python 3.11 asyncio.TimeoutError is an alias for TimeoutError, but
in older versions it's not. We really have to catch asyncio.TimeoutError
here, otherwise a slow test run will fail (as has happened multiple
times on CI recently).
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-ID: <20240125152150.42389-1-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit c9c0b37ff4c11b712b21efabe8e5381d223d0295)
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
tests/qemu-iotests/tests/iothreads-stream | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tests/qemu-iotests/tests/iothreads-stream b/tests/qemu-iotests/tests/iothreads-stream
index 503f221f16..231195b5e8 100755
--- a/tests/qemu-iotests/tests/iothreads-stream
+++ b/tests/qemu-iotests/tests/iothreads-stream
@@ -18,6 +18,7 @@
#
# Creator/Owner: Kevin Wolf <kwolf@redhat.com>
+import asyncio
import iotests
iotests.script_initialize(supported_fmts=['qcow2'],
@@ -69,6 +70,6 @@ with iotests.FilePath('disk1.img') as base1_path, \
# The test is done once both jobs are gone
if finished == 2:
break
- except TimeoutError:
+ except asyncio.TimeoutError:
pass
vm.cmd('query-jobs')
--
2.39.3

@ -1,186 +0,0 @@
From add833b5de202d6765dda56c8773985fbe7f40a6 Mon Sep 17 00:00:00 2001
From: Hanna Czenczek <hreitz@redhat.com>
Date: Tue, 11 Apr 2023 19:34:18 +0200
Subject: [PATCH 4/9] iotests/iov-padding: New test
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX
RH-Bugzilla: 2174676
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [4/5] b32715b5c2a3e2add39c5ed6e8f71df56e0b91a0 (hreitz/qemu-kvm-c-9-s)
Test that even vectored IO requests with 1024 vector elements that are
not aligned to the device's request alignment will succeed.
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
Message-Id: <20230411173418.19549-5-hreitz@redhat.com>
(cherry picked from commit d7e1905e3f54ff9512db4c7a946a8603b62b108d)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
tests/qemu-iotests/tests/iov-padding | 85 ++++++++++++++++++++++++
tests/qemu-iotests/tests/iov-padding.out | 59 ++++++++++++++++
2 files changed, 144 insertions(+)
create mode 100755 tests/qemu-iotests/tests/iov-padding
create mode 100644 tests/qemu-iotests/tests/iov-padding.out
diff --git a/tests/qemu-iotests/tests/iov-padding b/tests/qemu-iotests/tests/iov-padding
new file mode 100755
index 0000000000..b9604900c7
--- /dev/null
+++ b/tests/qemu-iotests/tests/iov-padding
@@ -0,0 +1,85 @@
+#!/usr/bin/env bash
+# group: rw quick
+#
+# Check the interaction of request padding (to fit alignment restrictions) with
+# vectored I/O from the guest
+#
+# Copyright Red Hat
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+seq=$(basename $0)
+echo "QA output created by $seq"
+
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+cd ..
+. ./common.rc
+. ./common.filter
+
+_supported_fmt raw
+_supported_proto file
+
+_make_test_img 1M
+
+IMGSPEC="driver=blkdebug,align=4096,image.driver=file,image.filename=$TEST_IMG"
+
+# Four combinations:
+# - Offset 4096, length 1023 * 512 + 512: Fully aligned to 4k
+# - Offset 4096, length 1023 * 512 + 4096: Head is aligned, tail is not
+# - Offset 512, length 1023 * 512 + 512: Neither head nor tail are aligned
+# - Offset 512, length 1023 * 512 + 4096: Tail is aligned, head is not
+for start_offset in 4096 512; do
+ for last_element_length in 512 4096; do
+ length=$((1023 * 512 + $last_element_length))
+
+ echo
+ echo "== performing 1024-element vectored requests to image (offset: $start_offset; length: $length) =="
+
+ # Fill with data for testing
+ $QEMU_IO -c 'write -P 1 0 1M' "$TEST_IMG" | _filter_qemu_io
+
+ # 1023 512-byte buffers, and then one with length $last_element_length
+ cmd_params="-P 2 $start_offset $(yes 512 | head -n 1023 | tr '\n' ' ') $last_element_length"
+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \
+ -c "writev $cmd_params" \
+ --image-opts \
+ "$IMGSPEC" \
+ | _filter_qemu_io
+
+ # Read all patterns -- read the part we just wrote with writev twice,
+ # once "normally", and once with a readv, so we see that that works, too
+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \
+ -c "read -P 1 0 $start_offset" \
+ -c "read -P 2 $start_offset $length" \
+ -c "readv $cmd_params" \
+ -c "read -P 1 $((start_offset + length)) $((1024 * 1024 - length - start_offset))" \
+ --image-opts \
+ "$IMGSPEC" \
+ | _filter_qemu_io
+ done
+done
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/tests/iov-padding.out b/tests/qemu-iotests/tests/iov-padding.out
new file mode 100644
index 0000000000..e07a91fac7
--- /dev/null
+++ b/tests/qemu-iotests/tests/iov-padding.out
@@ -0,0 +1,59 @@
+QA output created by iov-padding
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+
+== performing 1024-element vectored requests to image (offset: 4096; length: 524288) ==
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 524288/524288 bytes at offset 4096
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 524288/524288 bytes at offset 4096
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 524288/524288 bytes at offset 4096
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 520192/520192 bytes at offset 528384
+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== performing 1024-element vectored requests to image (offset: 4096; length: 527872) ==
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 527872/527872 bytes at offset 4096
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 527872/527872 bytes at offset 4096
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 527872/527872 bytes at offset 4096
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 516608/516608 bytes at offset 531968
+504.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== performing 1024-element vectored requests to image (offset: 512; length: 524288) ==
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 524288/524288 bytes at offset 512
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 524288/524288 bytes at offset 512
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 524288/524288 bytes at offset 512
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 523776/523776 bytes at offset 524800
+511.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== performing 1024-element vectored requests to image (offset: 512; length: 527872) ==
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 527872/527872 bytes at offset 512
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 527872/527872 bytes at offset 512
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 527872/527872 bytes at offset 512
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 520192/520192 bytes at offset 528384
+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done
--
2.39.3

@ -0,0 +1,592 @@
From 70efc3bbf1f7d7b1b0c2475d9ce3bb70cc9d1cc7 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 18 Jan 2024 09:48:22 -0500
Subject: [PATCH 12/22] iotests: port 141 to Python for reliable QMP testing
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter
RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Commit: [8/17] 0783f536508916feac4b4c39e41c22c24a2e52e7 (stefanha/centos-stream-qemu-kvm)
The common.qemu bash functions allow tests to interact with the QMP
monitor of a QEMU process. I spent two days trying to update 141 when
the order of the test output changed, but found it would still fail
occassionally because printf() and QMP events race with synchronous QMP
communication.
I gave up and ported 141 to the existing Python API for QMP tests. The
Python API is less affected by the order in which QEMU prints output
because it does not print all QMP traffic by default.
The next commit changes the order in which QMP messages are received.
Make 141 reliable first.
Cc: Hanna Czenczek <hreitz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-ID: <20240118144823.1497953-3-stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 9ee2dd4c22a3639c5462b3fc20df60c005c3de64)
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
tests/qemu-iotests/141 | 307 ++++++++++++++++---------------------
tests/qemu-iotests/141.out | 200 ++++++------------------
2 files changed, 176 insertions(+), 331 deletions(-)
diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141
index a37030ee17..a7d3985a02 100755
--- a/tests/qemu-iotests/141
+++ b/tests/qemu-iotests/141
@@ -1,9 +1,12 @@
-#!/usr/bin/env bash
+#!/usr/bin/env python3
# group: rw auto quick
#
# Test case for ejecting BDSs with block jobs still running on them
#
-# Copyright (C) 2016 Red Hat, Inc.
+# Originally written in bash by Hanna Czenczek, ported to Python by Stefan
+# Hajnoczi.
+#
+# Copyright Red Hat
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -19,177 +22,129 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
-# creator
-owner=hreitz@redhat.com
-
-seq="$(basename $0)"
-echo "QA output created by $seq"
-
-status=1 # failure is the default!
-
-_cleanup()
-{
- _cleanup_qemu
- _cleanup_test_img
- for img in "$TEST_DIR"/{b,m,o}.$IMGFMT; do
- _rm_test_img "$img"
- done
-}
-trap "_cleanup; exit \$status" 0 1 2 3 15
-
-# get standard environment, filters and checks
-. ./common.rc
-. ./common.filter
-. ./common.qemu
-
-# Needs backing file and backing format support
-_supported_fmt qcow2 qed
-_supported_proto file
-_supported_os Linux
-
-
-test_blockjob()
-{
- _send_qemu_cmd $QEMU_HANDLE \
- "{'execute': 'blockdev-add',
- 'arguments': {
- 'node-name': 'drv0',
- 'driver': '$IMGFMT',
- 'file': {
- 'driver': 'file',
- 'filename': '$TEST_IMG'
- }}}" \
- 'return'
-
- # If "$2" is an event, we may or may not see it before the
- # {"return": {}}. Therefore, filter the {"return": {}} out both
- # here and in the next command. (Naturally, if we do not see it
- # here, we will see it before the next command can be executed,
- # so it will appear in the next _send_qemu_cmd's output.)
- _send_qemu_cmd $QEMU_HANDLE \
- "$1" \
- "$2" \
- | _filter_img_create | _filter_qmp_empty_return
-
- # We want this to return an error because the block job is still running
- _send_qemu_cmd $QEMU_HANDLE \
- "{'execute': 'blockdev-del',
- 'arguments': {'node-name': 'drv0'}}" \
- 'error' | _filter_generated_node_ids | _filter_qmp_empty_return
-
- _send_qemu_cmd $QEMU_HANDLE \
- "{'execute': 'block-job-cancel',
- 'arguments': {'device': 'job0'}}" \
- "$3"
-
- _send_qemu_cmd $QEMU_HANDLE \
- "{'execute': 'blockdev-del',
- 'arguments': {'node-name': 'drv0'}}" \
- 'return'
-}
-
-
-TEST_IMG="$TEST_DIR/b.$IMGFMT" _make_test_img 1M
-TEST_IMG="$TEST_DIR/m.$IMGFMT" _make_test_img -b "$TEST_DIR/b.$IMGFMT" -F $IMGFMT 1M
-_make_test_img -b "$TEST_DIR/m.$IMGFMT" 1M -F $IMGFMT
-
-_launch_qemu -nodefaults
-
-_send_qemu_cmd $QEMU_HANDLE \
- "{'execute': 'qmp_capabilities'}" \
- 'return'
-
-echo
-echo '=== Testing drive-backup ==='
-echo
-
-# drive-backup will not send BLOCK_JOB_READY by itself, and cancelling the job
-# will consequently result in BLOCK_JOB_CANCELLED being emitted.
-
-test_blockjob \
- "{'execute': 'drive-backup',
- 'arguments': {'job-id': 'job0',
- 'device': 'drv0',
- 'target': '$TEST_DIR/o.$IMGFMT',
- 'format': '$IMGFMT',
- 'sync': 'none'}}" \
- 'return' \
- '"status": "null"'
-
-echo
-echo '=== Testing drive-mirror ==='
-echo
-
-# drive-mirror will send BLOCK_JOB_READY basically immediately, and cancelling
-# the job will consequently result in BLOCK_JOB_COMPLETED being emitted.
-
-test_blockjob \
- "{'execute': 'drive-mirror',
- 'arguments': {'job-id': 'job0',
- 'device': 'drv0',
- 'target': '$TEST_DIR/o.$IMGFMT',
- 'format': '$IMGFMT',
- 'sync': 'none'}}" \
- 'BLOCK_JOB_READY' \
- '"status": "null"'
-
-echo
-echo '=== Testing active block-commit ==='
-echo
-
-# An active block-commit will send BLOCK_JOB_READY basically immediately, and
-# cancelling the job will consequently result in BLOCK_JOB_COMPLETED being
-# emitted.
-
-test_blockjob \
- "{'execute': 'block-commit',
- 'arguments': {'job-id': 'job0', 'device': 'drv0'}}" \
- 'BLOCK_JOB_READY' \
- '"status": "null"'
-
-echo
-echo '=== Testing non-active block-commit ==='
-echo
-
-# Give block-commit something to work on, otherwise it would be done
-# immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would work just
-# fine without the block job still running.
-
-$QEMU_IO -c 'write 0 1M' "$TEST_DIR/m.$IMGFMT" | _filter_qemu_io
-
-test_blockjob \
- "{'execute': 'block-commit',
- 'arguments': {'job-id': 'job0',
- 'device': 'drv0',
- 'top': '$TEST_DIR/m.$IMGFMT',
- 'speed': 1}}" \
- 'return' \
- '"status": "null"'
-
-echo
-echo '=== Testing block-stream ==='
-echo
-
-# Give block-stream something to work on, otherwise it would be done
-# immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would work just
-# fine without the block job still running.
-
-$QEMU_IO -c 'write 0 1M' "$TEST_DIR/b.$IMGFMT" | _filter_qemu_io
-
-# With some data to stream (and @speed set to 1), block-stream will not complete
-# until we send the block-job-cancel command.
-
-test_blockjob \
- "{'execute': 'block-stream',
- 'arguments': {'job-id': 'job0',
- 'device': 'drv0',
- 'speed': 1}}" \
- 'return' \
- '"status": "null"'
-
-_cleanup_qemu
-
-# success, all done
-echo "*** done"
-rm -f $seq.full
-status=0
+import iotests
+
+# Common filters to mask values that vary in the test output
+QMP_FILTERS = [iotests.filter_qmp_testfiles, \
+ iotests.filter_qmp_imgfmt]
+
+
+class TestCase:
+ def __init__(self, name, vm, image_path, cancel_event):
+ self.name = name
+ self.vm = vm
+ self.image_path = image_path
+ self.cancel_event = cancel_event
+
+ def __enter__(self):
+ iotests.log(f'=== Testing {self.name} ===')
+ self.vm.qmp_log('blockdev-add', \
+ node_name='drv0', \
+ driver=iotests.imgfmt, \
+ file={'driver': 'file', 'filename': self.image_path}, \
+ filters=QMP_FILTERS)
+
+ def __exit__(self, *exc_details):
+ # This is expected to fail because the job still exists
+ self.vm.qmp_log('blockdev-del', node_name='drv0', \
+ filters=[iotests.filter_qmp_generated_node_ids])
+
+ self.vm.qmp_log('block-job-cancel', device='job0')
+ event = self.vm.event_wait(self.cancel_event)
+ iotests.log(event, filters=[iotests.filter_qmp_event])
+
+ # This time it succeeds
+ self.vm.qmp_log('blockdev-del', node_name='drv0')
+
+ # Separate test cases in output
+ iotests.log('')
+
+
+def main() -> None:
+ with iotests.FilePath('bottom', 'middle', 'top', 'target') as \
+ (bottom_path, middle_path, top_path, target_path), \
+ iotests.VM() as vm:
+
+ iotests.log('Creating bottom <- middle <- top backing file chain...')
+ IMAGE_SIZE='1M'
+ iotests.qemu_img_create('-f', iotests.imgfmt, bottom_path, IMAGE_SIZE)
+ iotests.qemu_img_create('-f', iotests.imgfmt, \
+ '-F', iotests.imgfmt, \
+ '-b', bottom_path, \
+ middle_path, \
+ IMAGE_SIZE)
+ iotests.qemu_img_create('-f', iotests.imgfmt, \
+ '-F', iotests.imgfmt, \
+ '-b', middle_path, \
+ top_path, \
+ IMAGE_SIZE)
+
+ iotests.log('Starting VM...')
+ vm.add_args('-nodefaults')
+ vm.launch()
+
+ # drive-backup will not send BLOCK_JOB_READY by itself, and cancelling
+ # the job will consequently result in BLOCK_JOB_CANCELLED being
+ # emitted.
+ with TestCase('drive-backup', vm, top_path, 'BLOCK_JOB_CANCELLED'):
+ vm.qmp_log('drive-backup', \
+ job_id='job0', \
+ device='drv0', \
+ target=target_path, \
+ format=iotests.imgfmt, \
+ sync='none', \
+ filters=QMP_FILTERS)
+
+ # drive-mirror will send BLOCK_JOB_READY basically immediately, and
+ # cancelling the job will consequently result in BLOCK_JOB_COMPLETED
+ # being emitted.
+ with TestCase('drive-mirror', vm, top_path, 'BLOCK_JOB_COMPLETED'):
+ vm.qmp_log('drive-mirror', \
+ job_id='job0', \
+ device='drv0', \
+ target=target_path, \
+ format=iotests.imgfmt, \
+ sync='none', \
+ filters=QMP_FILTERS)
+ event = vm.event_wait('BLOCK_JOB_READY')
+ assert event is not None # silence mypy
+ iotests.log(event, filters=[iotests.filter_qmp_event])
+
+ # An active block-commit will send BLOCK_JOB_READY basically
+ # immediately, and cancelling the job will consequently result in
+ # BLOCK_JOB_COMPLETED being emitted.
+ with TestCase('active block-commit', vm, top_path, \
+ 'BLOCK_JOB_COMPLETED'):
+ vm.qmp_log('block-commit', \
+ job_id='job0', \
+ device='drv0')
+ event = vm.event_wait('BLOCK_JOB_READY')
+ assert event is not None # silence mypy
+ iotests.log(event, filters=[iotests.filter_qmp_event])
+
+ # Give block-commit something to work on, otherwise it would be done
+ # immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would
+ # work just fine without the block job still running.
+ iotests.qemu_io(middle_path, '-c', f'write 0 {IMAGE_SIZE}')
+ with TestCase('non-active block-commit', vm, top_path, \
+ 'BLOCK_JOB_CANCELLED'):
+ vm.qmp_log('block-commit', \
+ job_id='job0', \
+ device='drv0', \
+ top=middle_path, \
+ speed=1, \
+ filters=[iotests.filter_qmp_testfiles])
+
+ # Give block-stream something to work on, otherwise it would be done
+ # immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would
+ # work just fine without the block job still running.
+ iotests.qemu_io(bottom_path, '-c', f'write 0 {IMAGE_SIZE}')
+ with TestCase('block-stream', vm, top_path, 'BLOCK_JOB_CANCELLED'):
+ vm.qmp_log('block-stream', \
+ job_id='job0', \
+ device='drv0', \
+ speed=1)
+
+if __name__ == '__main__':
+ iotests.script_main(main, supported_fmts=['qcow2', 'qed'],
+ supported_protocols=['file'])
diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out
index 63203d9944..91b7ba50af 100644
--- a/tests/qemu-iotests/141.out
+++ b/tests/qemu-iotests/141.out
@@ -1,179 +1,69 @@
-QA output created by 141
-Formatting 'TEST_DIR/b.IMGFMT', fmt=IMGFMT size=1048576
-Formatting 'TEST_DIR/m.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/b.IMGFMT backing_fmt=IMGFMT
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/m.IMGFMT backing_fmt=IMGFMT
-{'execute': 'qmp_capabilities'}
-{"return": {}}
-
+Creating bottom <- middle <- top backing file chain...
+Starting VM...
=== Testing drive-backup ===
-
-{'execute': 'blockdev-add',
- 'arguments': {
- 'node-name': 'drv0',
- 'driver': 'IMGFMT',
- 'file': {
- 'driver': 'file',
- 'filename': 'TEST_DIR/t.IMGFMT'
- }}}
-{"return": {}}
-{'execute': 'drive-backup',
-'arguments': {'job-id': 'job0',
-'device': 'drv0',
-'target': 'TEST_DIR/o.IMGFMT',
-'format': 'IMGFMT',
-'sync': 'none'}}
-Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
-{'execute': 'blockdev-del',
- 'arguments': {'node-name': 'drv0'}}
+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}}
+{"return": {}}
+{"execute": "drive-backup", "arguments": {"device": "drv0", "format": "IMGFMT", "job-id": "job0", "sync": "none", "target": "TEST_DIR/PID-target"}}
+{"return": {}}
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}}
-{'execute': 'block-job-cancel',
- 'arguments': {'device': 'job0'}}
+{"execute": "block-job-cancel", "arguments": {"device": "job0"}}
{"return": {}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 0, "speed": 0, "type": "backup"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
-{'execute': 'blockdev-del',
- 'arguments': {'node-name': 'drv0'}}
+{"data": {"device": "job0", "len": 1048576, "offset": 0, "speed": 0, "type": "backup"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
{"return": {}}
=== Testing drive-mirror ===
-
-{'execute': 'blockdev-add',
- 'arguments': {
- 'node-name': 'drv0',
- 'driver': 'IMGFMT',
- 'file': {
- 'driver': 'file',
- 'filename': 'TEST_DIR/t.IMGFMT'
- }}}
-{"return": {}}
-{'execute': 'drive-mirror',
-'arguments': {'job-id': 'job0',
-'device': 'drv0',
-'target': 'TEST_DIR/o.IMGFMT',
-'format': 'IMGFMT',
-'sync': 'none'}}
-Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
-{'execute': 'blockdev-del',
- 'arguments': {'node-name': 'drv0'}}
+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}}
+{"return": {}}
+{"execute": "drive-mirror", "arguments": {"device": "drv0", "format": "IMGFMT", "job-id": "job0", "sync": "none", "target": "TEST_DIR/PID-target"}}
+{"return": {}}
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: mirror"}}
-{'execute': 'block-job-cancel',
- 'arguments': {'device': 'job0'}}
+{"execute": "block-job-cancel", "arguments": {"device": "job0"}}
{"return": {}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
-{'execute': 'blockdev-del',
- 'arguments': {'node-name': 'drv0'}}
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
{"return": {}}
=== Testing active block-commit ===
-
-{'execute': 'blockdev-add',
- 'arguments': {
- 'node-name': 'drv0',
- 'driver': 'IMGFMT',
- 'file': {
- 'driver': 'file',
- 'filename': 'TEST_DIR/t.IMGFMT'
- }}}
-{"return": {}}
-{'execute': 'block-commit',
-'arguments': {'job-id': 'job0', 'device': 'drv0'}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
-{'execute': 'blockdev-del',
- 'arguments': {'node-name': 'drv0'}}
+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}}
+{"return": {}}
+{"execute": "block-commit", "arguments": {"device": "drv0", "job-id": "job0"}}
+{"return": {}}
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}}
-{'execute': 'block-job-cancel',
- 'arguments': {'device': 'job0'}}
+{"execute": "block-job-cancel", "arguments": {"device": "job0"}}
{"return": {}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
-{'execute': 'blockdev-del',
- 'arguments': {'node-name': 'drv0'}}
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
{"return": {}}
=== Testing non-active block-commit ===
-
-wrote 1048576/1048576 bytes at offset 0
-1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-{'execute': 'blockdev-add',
- 'arguments': {
- 'node-name': 'drv0',
- 'driver': 'IMGFMT',
- 'file': {
- 'driver': 'file',
- 'filename': 'TEST_DIR/t.IMGFMT'
- }}}
-{"return": {}}
-{'execute': 'block-commit',
-'arguments': {'job-id': 'job0',
-'device': 'drv0',
-'top': 'TEST_DIR/m.IMGFMT',
-'speed': 1}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
-{'execute': 'blockdev-del',
- 'arguments': {'node-name': 'drv0'}}
+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}}
+{"return": {}}
+{"execute": "block-commit", "arguments": {"device": "drv0", "job-id": "job0", "speed": 1, "top": "TEST_DIR/PID-middle"}}
+{"return": {}}
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}}
-{'execute': 'block-job-cancel',
- 'arguments': {'device': 'job0'}}
+{"execute": "block-job-cancel", "arguments": {"device": "job0"}}
{"return": {}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "commit"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
-{'execute': 'blockdev-del',
- 'arguments': {'node-name': 'drv0'}}
+{"data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "commit"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
{"return": {}}
=== Testing block-stream ===
-
-wrote 1048576/1048576 bytes at offset 0
-1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-{'execute': 'blockdev-add',
- 'arguments': {
- 'node-name': 'drv0',
- 'driver': 'IMGFMT',
- 'file': {
- 'driver': 'file',
- 'filename': 'TEST_DIR/t.IMGFMT'
- }}}
-{"return": {}}
-{'execute': 'block-stream',
-'arguments': {'job-id': 'job0',
-'device': 'drv0',
-'speed': 1}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
-{'execute': 'blockdev-del',
- 'arguments': {'node-name': 'drv0'}}
+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}}
+{"return": {}}
+{"execute": "block-stream", "arguments": {"device": "drv0", "job-id": "job0", "speed": 1}}
+{"return": {}}
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: stream"}}
-{'execute': 'block-job-cancel',
- 'arguments': {'device': 'job0'}}
+{"execute": "block-job-cancel", "arguments": {"device": "job0"}}
{"return": {}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "stream"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
-{'execute': 'blockdev-del',
- 'arguments': {'node-name': 'drv0'}}
+{"data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "stream"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
{"return": {}}
-*** done
+
--
2.39.3

@ -0,0 +1,105 @@
From 4ab25b33831fa207500179bd30f29388d81e4cce Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 5 Dec 2023 13:20:10 -0500
Subject: [PATCH 093/101] job: remove outdated AioContext locking comments
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 214: Remove AioContext lock
RH-Jira: RHEL-15965
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [24/26] 15ff2928be82d6905c22619458487fbb72d6044a (kmwolf/centos-qemu-kvm)
The AioContext lock no longer exists.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-ID: <20231205182011.1976568-14-stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
include/qemu/job.h | 20 --------------------
1 file changed, 20 deletions(-)
diff --git a/include/qemu/job.h b/include/qemu/job.h
index e502787dd8..9ea98b5927 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -67,8 +67,6 @@ typedef struct Job {
/**
* The completion function that will be called when the job completes.
- * Called with AioContext lock held, since many callback implementations
- * use bdrv_* functions that require to hold the lock.
*/
BlockCompletionFunc *cb;
@@ -264,9 +262,6 @@ struct JobDriver {
*
* This callback will not be invoked if the job has already failed.
* If it fails, abort and then clean will be called.
- *
- * Called with AioContext lock held, since many callbacs implementations
- * use bdrv_* functions that require to hold the lock.
*/
int (*prepare)(Job *job);
@@ -277,9 +272,6 @@ struct JobDriver {
*
* All jobs will complete with a call to either .commit() or .abort() but
* never both.
- *
- * Called with AioContext lock held, since many callback implementations
- * use bdrv_* functions that require to hold the lock.
*/
void (*commit)(Job *job);
@@ -290,9 +282,6 @@ struct JobDriver {
*
* All jobs will complete with a call to either .commit() or .abort() but
* never both.
- *
- * Called with AioContext lock held, since many callback implementations
- * use bdrv_* functions that require to hold the lock.
*/
void (*abort)(Job *job);
@@ -301,9 +290,6 @@ struct JobDriver {
* .commit() or .abort(). Regardless of which callback is invoked after
* completion, .clean() will always be called, even if the job does not
* belong to a transaction group.
- *
- * Called with AioContext lock held, since many callbacs implementations
- * use bdrv_* functions that require to hold the lock.
*/
void (*clean)(Job *job);
@@ -318,17 +304,12 @@ struct JobDriver {
* READY).
* (If the callback is NULL, the job is assumed to terminate
* without I/O.)
- *
- * Called with AioContext lock held, since many callback implementations
- * use bdrv_* functions that require to hold the lock.
*/
bool (*cancel)(Job *job, bool force);
/**
* Called when the job is freed.
- * Called with AioContext lock held, since many callback implementations
- * use bdrv_* functions that require to hold the lock.
*/
void (*free)(Job *job);
};
@@ -424,7 +405,6 @@ void job_ref_locked(Job *job);
* Release a reference that was previously acquired with job_ref_locked() or
* job_create(). If it's the last reference to the object, it will be freed.
*
- * Takes AioContext lock internally to invoke a job->driver callback.
* Called with job lock held.
*/
void job_unref_locked(Job *job);
--
2.39.3

@ -0,0 +1,42 @@
From 9c2eb4ab03903bc084c53ac29b60b8d2121c9fed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
Date: Tue, 21 Nov 2023 16:44:19 +0800
Subject: [PATCH 040/101] kconfig: Activate IOMMUFD for s390x machines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 211: IOMMUFD backend backport
RH-Jira: RHEL-19302 RHEL-21057
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
RH-Commit: [39/67] cf0ebe770b8db5916dd35247618c0a325dc1eaab (eauger1/centos-qemu-kvm)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
(cherry picked from commit 73e2df669335047b542b67d37ade060a6ae40dd8)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/s390x/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/hw/s390x/Kconfig b/hw/s390x/Kconfig
index 4c068d7960..26ad104485 100644
--- a/hw/s390x/Kconfig
+++ b/hw/s390x/Kconfig
@@ -6,6 +6,7 @@ config S390_CCW_VIRTIO
imply VFIO_CCW
imply WDT_DIAG288
imply PCIE_DEVICES
+ imply IOMMUFD
select PCI_EXPRESS
select S390_FLIC
select S390_FLIC_KVM if KVM
--
2.39.3

@ -1,160 +0,0 @@
From a5857fb12fcad46e27c415fe82ce13c0cb5d09c7 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 29 Jun 2023 14:48:32 -0300
Subject: [PATCH 5/6] kvm: reuse per-vcpu stats fd to avoid vcpu interruption
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Marcelo Tosatti <None>
RH-MergeRequest: 177: kvm: reuse per-vcpu stats fd to avoid vcpu interruption
RH-Bugzilla: 2218644
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Leonardo Brás <leobras@redhat.com>
RH-Commit: [1/1] 4ec72385a9047888121485f49bacb1aff84f7018 (mtosatti/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2218644
Commit: 3b6f485275ae95a81eec589d2773b86ca9ddec4d
A regression has been detected in latency testing of KVM guests.
More specifically, it was observed that the cyclictest
numbers inside of an isolated vcpu (running on isolated pcpu) are:
Where a maximum of 50us is acceptable.
The implementation of KVM_GET_STATS_FD uses run_on_cpu to query
per vcpu statistics, which interrupts the vcpu (and is unnecessary).
To fix this, open the per vcpu stats fd on vcpu initialization,
and read from that fd from QEMU's main thread.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
accel/kvm/kvm-all.c | 30 +++++++++++++++---------------
include/hw/core/cpu.h | 1 +
2 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index cf3a88d90e..fa7ca46c66 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -450,6 +450,8 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
"kvm_init_vcpu: kvm_arch_init_vcpu failed (%lu)",
kvm_arch_vcpu_id(cpu));
}
+ cpu->kvm_vcpu_stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
+
err:
return ret;
}
@@ -3959,7 +3961,7 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd
/* Read stats header */
kvm_stats_header = &descriptors->kvm_stats_header;
- ret = read(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header));
+ ret = pread(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header), 0);
if (ret != sizeof(*kvm_stats_header)) {
error_setg(errp, "KVM stats: failed to read stats header: "
"expected %zu actual %zu",
@@ -3990,7 +3992,8 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd
}
static void query_stats(StatsResultList **result, StatsTarget target,
- strList *names, int stats_fd, Error **errp)
+ strList *names, int stats_fd, CPUState *cpu,
+ Error **errp)
{
struct kvm_stats_desc *kvm_stats_desc;
struct kvm_stats_header *kvm_stats_header;
@@ -4048,7 +4051,7 @@ static void query_stats(StatsResultList **result, StatsTarget target,
break;
case STATS_TARGET_VCPU:
add_stats_entry(result, STATS_PROVIDER_KVM,
- current_cpu->parent_obj.canonical_path,
+ cpu->parent_obj.canonical_path,
stats_list);
break;
default:
@@ -4085,10 +4088,9 @@ static void query_stats_schema(StatsSchemaList **result, StatsTarget target,
add_stats_schema(result, STATS_PROVIDER_KVM, target, stats_list);
}
-static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data)
+static void query_stats_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args)
{
- StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr;
- int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
+ int stats_fd = cpu->kvm_vcpu_stats_fd;
Error *local_err = NULL;
if (stats_fd == -1) {
@@ -4097,14 +4099,13 @@ static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data)
return;
}
query_stats(kvm_stats_args->result.stats, STATS_TARGET_VCPU,
- kvm_stats_args->names, stats_fd, kvm_stats_args->errp);
- close(stats_fd);
+ kvm_stats_args->names, stats_fd, cpu,
+ kvm_stats_args->errp);
}
-static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data)
+static void query_stats_schema_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args)
{
- StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr;
- int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
+ int stats_fd = cpu->kvm_vcpu_stats_fd;
Error *local_err = NULL;
if (stats_fd == -1) {
@@ -4114,7 +4115,6 @@ static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data)
}
query_stats_schema(kvm_stats_args->result.schema, STATS_TARGET_VCPU, stats_fd,
kvm_stats_args->errp);
- close(stats_fd);
}
static void query_stats_cb(StatsResultList **result, StatsTarget target,
@@ -4132,7 +4132,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target,
error_setg_errno(errp, errno, "KVM stats: ioctl failed");
return;
}
- query_stats(result, target, names, stats_fd, errp);
+ query_stats(result, target, names, stats_fd, NULL, errp);
close(stats_fd);
break;
}
@@ -4146,7 +4146,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target,
if (!apply_str_list_filter(cpu->parent_obj.canonical_path, targets)) {
continue;
}
- run_on_cpu(cpu, query_stats_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args));
+ query_stats_vcpu(cpu, &stats_args);
}
break;
}
@@ -4172,6 +4172,6 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp)
if (first_cpu) {
stats_args.result.schema = result;
stats_args.errp = errp;
- run_on_cpu(first_cpu, query_stats_schema_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args));
+ query_stats_schema_vcpu(first_cpu, &stats_args);
}
}
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 397fd3ac68..ae96be07e7 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -399,6 +399,7 @@ struct CPUState {
struct kvm_dirty_gfn *kvm_dirty_gfns;
uint32_t kvm_fetch_index;
uint64_t dirty_pages;
+ int kvm_vcpu_stats_fd;
/* Use by accel-block: CPU is executing an ioctl() */
QemuLockCnt in_ioctl_lock;
--
2.39.3

@ -1,53 +0,0 @@
From 6de2f37d9a5db6578554929227377e4fd6d2feb3 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 14/21] loongarch: mark loongarch_ipi_iocsr re-entrnacy safe
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
RH-Jira: RHEL-516
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [10/13] 02435b9148b906960137de32eb5a3c4961e44a57 (jmaloy/jmaloy-qemu-kvm-2)
Jira: https://issues.redhat.com/browse/RHEL-516
Upstream: Merged
CVE: CVE-2023-2680
commit 6d0589e0e6c64b888864a2bf980537be20389264
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Sat May 6 07:21:45 2023 -0400
loongarch: mark loongarch_ipi_iocsr re-entrnacy safe
loongarch_ipi_iocsr MRs rely on re-entrant IO through the ipi_send
function. As such, mark these MRs re-entrancy-safe.
Fixes: a2e1753b80 ("memory: prevent dma-reentracy issues")
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Reviewed-by: Song Gao <gaosong@loongson.cn>
Message-Id: <20230506112145.3563708-1-alxndr@bu.edu>
Signed-off-by: Song Gao <gaosong@loongson.cn>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/intc/loongarch_ipi.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c
index aa4bf9eb74..40e98af2ce 100644
--- a/hw/intc/loongarch_ipi.c
+++ b/hw/intc/loongarch_ipi.c
@@ -215,6 +215,10 @@ static void loongarch_ipi_init(Object *obj)
for (cpu = 0; cpu < MAX_IPI_CORE_NUM; cpu++) {
memory_region_init_io(&s->ipi_iocsr_mem[cpu], obj, &loongarch_ipi_ops,
&lams->ipi_core[cpu], "loongarch_ipi_iocsr", 0x48);
+
+ /* loongarch_ipi_iocsr performs re-entrant IO through ipi_send */
+ s->ipi_iocsr_mem[cpu].disable_reentrancy_guard = true;
+
sysbus_init_mmio(sbd, &s->ipi_iocsr_mem[cpu]);
memory_region_init_io(&s->ipi64_iocsr_mem[cpu], obj, &loongarch_ipi64_ops,
--
2.39.3

@ -1,70 +0,0 @@
From 0660a7a6994db0db9f6d0b84f6345aa06dc61761 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Mon, 29 May 2023 14:21:08 -0400
Subject: [PATCH 16/21] lsi53c895a: disable reentrancy detection for MMIO
region, too
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
RH-Jira: RHEL-516
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [12/13] fb9da8b68cdf0dc0b0bd8fb8540849c944d0bf20 (jmaloy/jmaloy-qemu-kvm-2)
Jira: https://issues.redhat.com/browse/RHEL-516
Upstream: Merged
CVE: CVE-2023-2680
commit d139fe9ad8a27bcc50b4ead77d2f97d191a0e95e
Author: Thomas Huth <thuth@redhat.com>
Date: Tue May 16 11:05:56 2023 +0200
lsi53c895a: disable reentrancy detection for MMIO region, too
While trying to use a SCSI disk on the LSI controller with an
older version of Fedora (25), I'm getting:
qemu: warning: Blocked re-entrant IO on MemoryRegion: lsi-mmio at addr: 0x34
and the SCSI controller is not usable. Seems like we have to
disable the reentrancy checker for the MMIO region, too, to
get this working again.
The problem could be reproduced it like this:
./qemu-system-x86_64 -accel kvm -m 2G -machine q35 \
-device lsi53c810,id=lsi1 -device scsi-hd,drive=d0 \
-drive if=none,id=d0,file=.../somedisk.qcow2 \
-cdrom Fedora-Everything-netinst-i386-25-1.3.iso
Where somedisk.qcow2 is an image that contains already some partitions
and file systems.
In the boot menu of Fedora, go to
"Troubleshooting" -> "Rescue a Fedora system" -> "3) Skip to shell"
Then check "dmesg | grep -i 53c" for failure messages, and try to mount
a partition from somedisk.qcow2.
Message-Id: <20230516090556.553813-1-thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/scsi/lsi53c895a.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
index db27872963..048436352b 100644
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -2307,6 +2307,7 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp)
* re-entrancy guard.
*/
s->ram_io.disable_reentrancy_guard = true;
+ s->mmio_io.disable_reentrancy_guard = true;
address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io");
qdev_init_gpio_out(d, &s->ext_irq, 1);
--
2.39.3

@ -1,58 +0,0 @@
From 621808c6c4da3adcc073231493d487d6360386c9 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 09/21] lsi53c895a: disable reentrancy detection for script RAM
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
RH-Jira: RHEL-516
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [5/13] 765d65fc3fb735eb4b52a408ccff91b538ad32b6 (jmaloy/jmaloy-qemu-kvm-2)
Jira: https://issues.redhat.com/browse/RHEL-516
Upstream: Merged
CVE: CVE-2023-2680
commit bfd6e7ae6a72b84e2eb9574f56e6ec037f05182c
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Thu Apr 27 17:10:10 2023 -0400
lsi53c895a: disable reentrancy detection for script RAM
As the code is designed to use the memory APIs to access the script ram,
disable reentrancy checks for the pseudo-RAM ram_io MemoryRegion.
In the future, ram_io may be converted from an IO to a proper RAM MemoryRegion.
Reported-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Message-Id: <20230427211013.2994127-6-alxndr@bu.edu>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/scsi/lsi53c895a.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
index af93557a9a..db27872963 100644
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -2302,6 +2302,12 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp)
memory_region_init_io(&s->io_io, OBJECT(s), &lsi_io_ops, s,
"lsi-io", 256);
+ /*
+ * Since we use the address-space API to interact with ram_io, disable the
+ * re-entrancy guard.
+ */
+ s->ram_io.disable_reentrancy_guard = true;
+
address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io");
qdev_init_gpio_out(d, &s->ext_irq, 1);
--
2.39.3

@ -0,0 +1,112 @@
From 633c6a52ac88526534466ae311522fe5447bcf91 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 17 Jan 2024 14:55:54 +0100
Subject: [PATCH 02/22] memory-device: reintroduce memory region size check
RH-Author: David Hildenbrand <david@redhat.com>
RH-MergeRequest: 221: memory-device: reintroduce memory region size check
RH-Jira: RHEL-20341
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
RH-Commit: [2/2] e9ff2339b0c07c3f48f5834c9c80cd6d4cbc8f71
JIRA: https://issues.redhat.com/browse/RHEL-20341
We used to check that the memory region size is multiples of the overall
requested address alignment for the device memory address.
We removed that check, because there are cases (i.e., hv-balloon) where
devices unconditionally request an address alignment that has a very large
alignment (i.e., 32 GiB), but the actual memory device size might not be
multiples of that alignment.
However, this change:
(a) allows for some practically impossible DIMM sizes, like "1GB+1 byte".
(b) allows for DIMMs that partially cover hugetlb pages, previously
reported in [1].
Both scenarios don't make any sense: we might even waste memory.
So let's reintroduce that check, but only check that the
memory region size is multiples of the memory region alignment (i.e.,
page size, huge page size), but not any additional memory device
requirements communicated using md->get_min_alignment().
The following examples now fail again as expected:
(a) 1M with 2M THP
qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \
-object memory-backend-ram,id=mem1,size=1M \
-device pc-dimm,id=dimm1,memdev=mem1
-> backend memory size must be multiple of 0x200000
(b) 1G+1byte
qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \
-object memory-backend-ram,id=mem1,size=1073741825B \
-device pc-dimm,id=dimm1,memdev=mem1
-> backend memory size must be multiple of 0x200000
(c) Unliagned hugetlb size (2M)
qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \
-object memory-backend-file,id=mem1,mem-path=/dev/hugepages/tmp,size=511M \
-device pc-dimm,id=dimm1,memdev=mem1
backend memory size must be multiple of 0x200000
(d) Unliagned hugetlb size (1G)
qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \
-object memory-backend-file,id=mem1,mem-path=/dev/hugepages1G/tmp,size=2047M \
-device pc-dimm,id=dimm1,memdev=mem1
-> backend memory size must be multiple of 0x40000000
Note that this fix depends on a hv-balloon change to communicate its
additional alignment requirements using get_min_alignment() instead of
through the memory region.
[1] https://lkml.kernel.org/r/f77d641d500324525ac036fe1827b3070de75fc1.1701088320.git.mprivozn@redhat.com
Message-ID: <20240117135554.787344-3-david@redhat.com>
Reported-by: Zhenyu Zhang <zhenyzha@redhat.com>
Reported-by: Michal Privoznik <mprivozn@redhat.com>
Fixes: eb1b7c4bd413 ("memory-device: Drop size alignment check")
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
Tested-by: Mario Casquero <mcasquer@redhat.com>
Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
(cherry picked from commit 540a1abbf0b243e4cfb4333c5d30a041f7080ba4)
Signed-off-by: David Hildenbrand <david@redhat.com>
---
hw/mem/memory-device.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c
index a1b1af26bc..e098585cda 100644
--- a/hw/mem/memory-device.c
+++ b/hw/mem/memory-device.c
@@ -374,6 +374,20 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
goto out;
}
+ /*
+ * We always want the memory region size to be multiples of the memory
+ * region alignment: for example, DIMMs with 1G+1byte size don't make
+ * any sense. Note that we don't check that the size is multiples
+ * of any additional alignment requirements the memory device might
+ * have when it comes to the address in physical address space.
+ */
+ if (!QEMU_IS_ALIGNED(memory_region_size(mr),
+ memory_region_get_alignment(mr))) {
+ error_setg(errp, "backend memory size must be multiple of 0x%"
+ PRIx64, memory_region_get_alignment(mr));
+ return;
+ }
+
if (legacy_align) {
align = *legacy_align;
} else {
--
2.39.3

@ -1,150 +0,0 @@
From 0bc9295be331781491e993b6f1b0dca959194f13 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 05/21] memory: prevent dma-reentracy issues
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
RH-Jira: RHEL-516
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [1/13] d4a762d3b156200a65d09cde58cd6d77b229071e (jmaloy/jmaloy-qemu-kvm-2)
Jira: https://issues.redhat.com/browse/RHEL-516
Upstream: Merged
CVE: CVE-2023-2680
CVE: CVE-2023-0330
commit a2e1753b8054344f32cf94f31c6399a58794a380
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Thu Apr 27 17:10:06 2023 -0400
memory: prevent dma-reentracy issues
Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
This flag is set/checked prior to calling a device's MemoryRegion
handlers, and set when device code initiates DMA. The purpose of this
flag is to prevent two types of DMA-based reentrancy issues:
1.) mmio -> dma -> mmio case
2.) bh -> dma write -> mmio case
These issues have led to problems such as stack-exhaustion and
use-after-frees.
Summary of the problem from Peter Maydell:
https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282
Resolves: CVE-2023-0330
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <20230427211013.2994127-2-alxndr@bu.edu>
[thuth: Replace warn_report() with warn_report_once()]
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
include/exec/memory.h | 5 +++++
include/hw/qdev-core.h | 7 +++++++
softmmu/memory.c | 16 ++++++++++++++++
3 files changed, 28 insertions(+)
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 15ade918ba..e45ce6061f 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -767,6 +767,8 @@ struct MemoryRegion {
bool is_iommu;
RAMBlock *ram_block;
Object *owner;
+ /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */
+ DeviceState *dev;
const MemoryRegionOps *ops;
void *opaque;
@@ -791,6 +793,9 @@ struct MemoryRegion {
unsigned ioeventfd_nb;
MemoryRegionIoeventfd *ioeventfds;
RamDiscardManager *rdm; /* Only for RAM */
+
+ /* For devices designed to perform re-entrant IO into their own IO MRs */
+ bool disable_reentrancy_guard;
};
struct IOMMUMemoryRegion {
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index bd50ad5ee1..7623703943 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -162,6 +162,10 @@ struct NamedClockList {
QLIST_ENTRY(NamedClockList) node;
};
+typedef struct {
+ bool engaged_in_io;
+} MemReentrancyGuard;
+
/**
* DeviceState:
* @realized: Indicates whether the device has been fully constructed.
@@ -194,6 +198,9 @@ struct DeviceState {
int alias_required_for_version;
ResettableState reset;
GSList *unplug_blockers;
+
+ /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
+ MemReentrancyGuard mem_reentrancy_guard;
};
struct DeviceListener {
diff --git a/softmmu/memory.c b/softmmu/memory.c
index b1a6cae6f5..b7b3386e9d 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
access_size_max = 4;
}
+ /* Do not allow more than one simultaneous access to a device's IO Regions */
+ if (mr->dev && !mr->disable_reentrancy_guard &&
+ !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
+ if (mr->dev->mem_reentrancy_guard.engaged_in_io) {
+ warn_report_once("Blocked re-entrant IO on MemoryRegion: "
+ "%s at addr: 0x%" HWADDR_PRIX,
+ memory_region_name(mr), addr);
+ return MEMTX_ACCESS_ERROR;
+ }
+ mr->dev->mem_reentrancy_guard.engaged_in_io = true;
+ }
+
/* FIXME: support unaligned access? */
access_size = MAX(MIN(size, access_size_max), access_size_min);
access_mask = MAKE_64BIT_MASK(0, access_size * 8);
@@ -556,6 +568,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
access_mask, attrs);
}
}
+ if (mr->dev) {
+ mr->dev->mem_reentrancy_guard.engaged_in_io = false;
+ }
return r;
}
@@ -1170,6 +1185,7 @@ static void memory_region_do_init(MemoryRegion *mr,
}
mr->name = g_strdup(name);
mr->owner = owner;
+ mr->dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE);
mr->ram_block = NULL;
if (name) {
--
2.39.3

@ -1,67 +0,0 @@
From 3f2042e33acb6db91594e12ebd63b9abd9e753cc Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Wed, 7 Jun 2023 11:45:09 -0400
Subject: [PATCH 15/21] memory: stricter checks prior to unsetting
engaged_in_io
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
RH-Jira: RHEL-516
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [11/13] b8e1a4b49dd7fa3b7948d32f46dfe1d7f7a4c1cf (jmaloy/jmaloy-qemu-kvm-2)
Jira: https://issues.redhat.com/browse/RHEL-516
Upstream: Merged
CVE: CVE-2023-2680
commit 3884bf6468ac6bbb58c2b3feaa74e87f821b52f3
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Tue May 16 04:40:02 2023 -0400
memory: stricter checks prior to unsetting engaged_in_io
engaged_in_io could be unset by an MR with re-entrancy checks disabled.
Ensure that only MRs that can set the engaged_in_io flag can unset it.
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Message-Id: <20230516084002.3813836-1-alxndr@bu.edu>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
softmmu/memory.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/softmmu/memory.c b/softmmu/memory.c
index b7b3386e9d..26424f1d78 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -534,6 +534,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
unsigned access_size;
unsigned i;
MemTxResult r = MEMTX_OK;
+ bool reentrancy_guard_applied = false;
if (!access_size_min) {
access_size_min = 1;
@@ -552,6 +553,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
return MEMTX_ACCESS_ERROR;
}
mr->dev->mem_reentrancy_guard.engaged_in_io = true;
+ reentrancy_guard_applied = true;
}
/* FIXME: support unaligned access? */
@@ -568,7 +570,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
access_mask, attrs);
}
}
- if (mr->dev) {
+ if (mr->dev && reentrancy_guard_applied) {
mr->dev->mem_reentrancy_guard.engaged_in_io = false;
}
return r;
--
2.39.3

@ -1,162 +0,0 @@
From 8f89d3bc8f226cd038bf88b9fb3ef43b0fb33034 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
Date: Wed, 12 Jul 2023 17:46:57 +0200
Subject: [PATCH 10/37] migration: Add switchover ack capability
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 179: vfio: live migration support
RH-Bugzilla: 2192818
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [8/28] 2f4ca020783bd617eca13b18289fce764279833b (clegoate/qemu-kvm-c9s)
Bugzilla: https://bugzilla.redhat.com/2192818
commit 6574232fff6a
Author: Avihai Horon <avihaih@nvidia.com>
Date: Wed Jun 21 14:11:54 2023 +0300
migration: Add switchover ack capability
Migration downtime estimation is calculated based on bandwidth and
remaining migration data. This assumes that loading of migration data in
the destination takes a negligible amount of time and that downtime
depends only on network speed.
While this may be true for RAM, it's not necessarily true for other
migrated devices. For example, loading the data of a VFIO device in the
destination might require from the device to allocate resources, prepare
internal data structures and so on. These operations can take a
significant amount of time which can increase migration downtime.
This patch adds a new capability "switchover ack" that prevents the
source from stopping the VM and completing the migration until an ACK
is received from the destination that it's OK to do so.
This can be used by migrated devices in various ways to reduce downtime.
For example, a device can send initial precopy metadata to pre-allocate
resources in the destination and use this capability to make sure that
the pre-allocation is completed before the source VM is stopped, so it
will have full effect.
This new capability relies on the return path capability to communicate
from the destination back to the source.
The actual implementation of the capability will be added in the
following patches.
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Tested-by: YangHang Liu <yanghliu@redhat.com>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Conflicts:
- qapi/migration.json
re-indent of @switchover-ack to avoid ../qapi/migration.json:482:1:
unexpected de-indent (expected at least 17 spaces)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
migration/options.c | 21 +++++++++++++++++++++
migration/options.h | 1 +
qapi/migration.json | 14 +++++++++++++-
3 files changed, 35 insertions(+), 1 deletion(-)
diff --git a/migration/options.c b/migration/options.c
index a76984276d..c3df6c6dde 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -182,6 +182,8 @@ Property migration_properties[] = {
DEFINE_PROP_MIG_CAP("x-zero-copy-send",
MIGRATION_CAPABILITY_ZERO_COPY_SEND),
#endif
+ DEFINE_PROP_MIG_CAP("x-switchover-ack",
+ MIGRATION_CAPABILITY_SWITCHOVER_ACK),
DEFINE_PROP_END_OF_LIST(),
};
@@ -305,6 +307,13 @@ bool migrate_return_path(void)
return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH];
}
+bool migrate_switchover_ack(void)
+{
+ MigrationState *s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_SWITCHOVER_ACK];
+}
+
bool migrate_validate_uuid(void)
{
MigrationState *s = migrate_get_current();
@@ -532,6 +541,18 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp)
}
}
+ if (new_caps[MIGRATION_CAPABILITY_SWITCHOVER_ACK]) {
+ if (!new_caps[MIGRATION_CAPABILITY_RETURN_PATH]) {
+ error_setg(errp, "Capability 'switchover-ack' requires capability "
+ "'return-path'");
+ return false;
+ }
+
+ /* Disable this capability until it's implemented */
+ error_setg(errp, "'switchover-ack' is not implemented yet");
+ return false;
+ }
+
return true;
}
diff --git a/migration/options.h b/migration/options.h
index 7b0f7245ad..0fc7be6869 100644
--- a/migration/options.h
+++ b/migration/options.h
@@ -47,6 +47,7 @@ bool migrate_postcopy_ram(void);
bool migrate_rdma_pin_all(void);
bool migrate_release_ram(void);
bool migrate_return_path(void);
+bool migrate_switchover_ack(void);
bool migrate_validate_uuid(void);
bool migrate_xbzrle(void);
bool migrate_zero_blocks(void);
diff --git a/qapi/migration.json b/qapi/migration.json
index 2c35b7b9cf..b6a58347cc 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -478,6 +478,18 @@
# should not affect the correctness of postcopy migration.
# (since 7.1)
#
+# @switchover-ack: If enabled, migration will not stop the source VM
+# and complete the migration until an ACK is received
+# from the destination that it's OK to do so.
+# Exactly when this ACK is sent depends on the
+# migrated devices that use this feature. For
+# example, a device can use it to make sure some of
+# its data is sent and loaded in the destination
+# before doing switchover. This can reduce downtime
+# if devices that support this capability are
+# present. 'return-path' capability must be enabled
+# to use it. (since 8.1)
+#
# Features:
# @unstable: Members @x-colo and @x-ignore-shared are experimental.
#
@@ -492,7 +504,7 @@
'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
{ 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
'validate-uuid', 'background-snapshot',
- 'zero-copy-send', 'postcopy-preempt'] }
+ 'zero-copy-send', 'postcopy-preempt', 'switchover-ack'] }
##
# @MigrationCapabilityStatus:
--
2.39.3

@ -1,308 +0,0 @@
From e2c2910edf90186ca0d7d13c9943caa284e95ea9 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Tue, 25 Apr 2023 21:15:14 -0400
Subject: [PATCH 51/56] migration: Allow postcopy_ram_supported_by_host() to
report err
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Peter Xu <peterx@redhat.com>
RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types
RH-Bugzilla: 2057267
RH-Acked-by: Leonardo Brás <leobras@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: quintela1 <quintela@redhat.com>
RH-Commit: [50/50] 08c44affc11c27ddf1aa7ce0dfacbaf5effb80cb (peterx/qemu-kvm)
Instead of print it to STDERR, bring the error upwards so that it can be
reported via QMP responses.
E.g.:
{ "execute": "migrate-set-capabilities" ,
"arguments": { "capabilities":
[ { "capability": "postcopy-ram", "state": true } ] } }
{ "error":
{ "class": "GenericError",
"desc": "Postcopy is not supported: Host backend files need to be TMPFS
or HUGETLBFS only" } }
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
(cherry picked from commit 74c38cf7fd24c60e4f0a90585d17250478260877)
Signed-off-by: Peter Xu <peterx@redhat.com>
---
migration/options.c | 8 ++----
migration/postcopy-ram.c | 60 +++++++++++++++++++++-------------------
migration/postcopy-ram.h | 3 +-
migration/savevm.c | 3 +-
4 files changed, 39 insertions(+), 35 deletions(-)
diff --git a/migration/options.c b/migration/options.c
index 4701c75a4d..e51d667e14 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -302,6 +302,7 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp)
{
MigrationIncomingState *mis = migration_incoming_get_current();
+ ERRP_GUARD();
#ifndef CONFIG_LIVE_BLOCK_MIGRATION
if (new_caps[MIGRATION_CAPABILITY_BLOCK]) {
error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) "
@@ -327,11 +328,8 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp)
*/
if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] &&
runstate_check(RUN_STATE_INMIGRATE) &&
- !postcopy_ram_supported_by_host(mis)) {
- /* postcopy_ram_supported_by_host will have emitted a more
- * detailed message
- */
- error_setg(errp, "Postcopy is not supported");
+ !postcopy_ram_supported_by_host(mis, errp)) {
+ error_prepend(errp, "Postcopy is not supported: ");
return false;
}
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 0711500036..75aa276bb1 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -283,11 +283,13 @@ static bool request_ufd_features(int ufd, uint64_t features)
return true;
}
-static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis)
+static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis,
+ Error **errp)
{
uint64_t asked_features = 0;
static uint64_t supported_features;
+ ERRP_GUARD();
/*
* it's not possible to
* request UFFD_API twice per one fd
@@ -295,7 +297,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis)
*/
if (!supported_features) {
if (!receive_ufd_features(&supported_features)) {
- error_report("%s failed", __func__);
+ error_setg(errp, "Userfault feature detection failed");
return false;
}
}
@@ -317,8 +319,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis)
* userfault file descriptor
*/
if (!request_ufd_features(ufd, asked_features)) {
- error_report("%s failed: features %" PRIu64, __func__,
- asked_features);
+ error_setg(errp, "Failed features %" PRIu64, asked_features);
return false;
}
@@ -329,7 +330,8 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis)
have_hp = supported_features & UFFD_FEATURE_MISSING_HUGETLBFS;
#endif
if (!have_hp) {
- error_report("Userfault on this host does not support huge pages");
+ error_setg(errp,
+ "Userfault on this host does not support huge pages");
return false;
}
}
@@ -338,7 +340,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis)
/* Callback from postcopy_ram_supported_by_host block iterator.
*/
-static int test_ramblock_postcopiable(RAMBlock *rb)
+static int test_ramblock_postcopiable(RAMBlock *rb, Error **errp)
{
const char *block_name = qemu_ram_get_idstr(rb);
ram_addr_t length = qemu_ram_get_used_length(rb);
@@ -346,16 +348,18 @@ static int test_ramblock_postcopiable(RAMBlock *rb)
QemuFsType fs;
if (length % pagesize) {
- error_report("Postcopy requires RAM blocks to be a page size multiple,"
- " block %s is 0x" RAM_ADDR_FMT " bytes with a "
- "page size of 0x%zx", block_name, length, pagesize);
+ error_setg(errp,
+ "Postcopy requires RAM blocks to be a page size multiple,"
+ " block %s is 0x" RAM_ADDR_FMT " bytes with a "
+ "page size of 0x%zx", block_name, length, pagesize);
return 1;
}
if (rb->fd >= 0) {
fs = qemu_fd_getfs(rb->fd);
if (fs != QEMU_FS_TYPE_TMPFS && fs != QEMU_FS_TYPE_HUGETLBFS) {
- error_report("Host backend files need to be TMPFS or HUGETLBFS only");
+ error_setg(errp,
+ "Host backend files need to be TMPFS or HUGETLBFS only");
return 1;
}
}
@@ -368,7 +372,7 @@ static int test_ramblock_postcopiable(RAMBlock *rb)
* normally fine since if the postcopy succeeds it gets turned back on at the
* end.
*/
-bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
+bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp)
{
long pagesize = qemu_real_host_page_size();
int ufd = -1;
@@ -377,29 +381,27 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
struct uffdio_register reg_struct;
struct uffdio_range range_struct;
uint64_t feature_mask;
- Error *local_err = NULL;
RAMBlock *block;
+ ERRP_GUARD();
if (qemu_target_page_size() > pagesize) {
- error_report("Target page size bigger than host page size");
+ error_setg(errp, "Target page size bigger than host page size");
goto out;
}
ufd = uffd_open(O_CLOEXEC);
if (ufd == -1) {
- error_report("%s: userfaultfd not available: %s", __func__,
- strerror(errno));
+ error_setg(errp, "Userfaultfd not available: %s", strerror(errno));
goto out;
}
/* Give devices a chance to object */
- if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, &local_err)) {
- error_report_err(local_err);
+ if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, errp)) {
goto out;
}
/* Version and features check */
- if (!ufd_check_and_apply(ufd, mis)) {
+ if (!ufd_check_and_apply(ufd, mis, errp)) {
goto out;
}
@@ -417,7 +419,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
* affect in reality, or we can revisit.
*/
RAMBLOCK_FOREACH(block) {
- if (test_ramblock_postcopiable(block)) {
+ if (test_ramblock_postcopiable(block, errp)) {
goto out;
}
}
@@ -427,7 +429,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
* it was enabled.
*/
if (munlockall()) {
- error_report("%s: munlockall: %s", __func__, strerror(errno));
+ error_setg(errp, "munlockall() failed: %s", strerror(errno));
goto out;
}
@@ -439,8 +441,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE |
MAP_ANONYMOUS, -1, 0);
if (testarea == MAP_FAILED) {
- error_report("%s: Failed to map test area: %s", __func__,
- strerror(errno));
+ error_setg(errp, "Failed to map test area: %s", strerror(errno));
goto out;
}
g_assert(QEMU_PTR_IS_ALIGNED(testarea, pagesize));
@@ -450,14 +451,14 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
if (ioctl(ufd, UFFDIO_REGISTER, &reg_struct)) {
- error_report("%s userfault register: %s", __func__, strerror(errno));
+ error_setg(errp, "UFFDIO_REGISTER failed: %s", strerror(errno));
goto out;
}
range_struct.start = (uintptr_t)testarea;
range_struct.len = pagesize;
if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) {
- error_report("%s userfault unregister: %s", __func__, strerror(errno));
+ error_setg(errp, "UFFDIO_UNREGISTER failed: %s", strerror(errno));
goto out;
}
@@ -465,8 +466,8 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
(__u64)1 << _UFFDIO_COPY |
(__u64)1 << _UFFDIO_ZEROPAGE;
if ((reg_struct.ioctls & feature_mask) != feature_mask) {
- error_report("Missing userfault map features: %" PRIx64,
- (uint64_t)(~reg_struct.ioctls & feature_mask));
+ error_setg(errp, "Missing userfault map features: %" PRIx64,
+ (uint64_t)(~reg_struct.ioctls & feature_mask));
goto out;
}
@@ -1188,6 +1189,8 @@ static int postcopy_temp_pages_setup(MigrationIncomingState *mis)
int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
{
+ Error *local_err = NULL;
+
/* Open the fd for the kernel to give us userfaults */
mis->userfault_fd = uffd_open(O_CLOEXEC | O_NONBLOCK);
if (mis->userfault_fd == -1) {
@@ -1200,7 +1203,8 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
* Although the host check already tested the API, we need to
* do the check again as an ABI handshake on the new fd.
*/
- if (!ufd_check_and_apply(mis->userfault_fd, mis)) {
+ if (!ufd_check_and_apply(mis->userfault_fd, mis, &local_err)) {
+ error_report_err(local_err);
return -1;
}
@@ -1360,7 +1364,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo *info)
{
}
-bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
+bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp)
{
error_report("%s: No OS support", __func__);
return false;
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index b4867a32d5..442ab89752 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -14,7 +14,8 @@
#define QEMU_POSTCOPY_RAM_H
/* Return true if the host supports everything we need to do postcopy-ram */
-bool postcopy_ram_supported_by_host(MigrationIncomingState *mis);
+bool postcopy_ram_supported_by_host(MigrationIncomingState *mis,
+ Error **errp);
/*
* Make all of RAM sensitive to accesses to areas that haven't yet been written
diff --git a/migration/savevm.c b/migration/savevm.c
index 9671211339..211eff3a8b 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1753,7 +1753,8 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
return -EINVAL;
}
- if (!postcopy_ram_supported_by_host(mis)) {
+ if (!postcopy_ram_supported_by_host(mis, &local_err)) {
+ error_report_err(local_err);
postcopy_state_set(POSTCOPY_INCOMING_NONE);
return -1;
}
--
2.39.1

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save