import qemu-kvm-8.0.0-16.el9_3

i9 changed/i9c/qemu-kvm-8.0.0-16.el9_3
MSVSphere Packaging Team 1 year ago
parent de74d18eb7
commit 2c6097766c

2
.gitignore vendored

@ -1 +1 @@
SOURCES/qemu-7.2.0.tar.xz
SOURCES/qemu-8.0.0.tar.xz

@ -1 +1 @@
634a3e4b381cbf13085eb1568accb85cbd9d89c4 SOURCES/qemu-7.2.0.tar.xz
17d54a85aa5d7f5dcfc619aa34049f9a91ceed0d SOURCES/qemu-8.0.0.tar.xz

@ -1,4 +1,4 @@
From ccc4a5bdc8c2f27678312364a7c12aeafd009bb6 Mon Sep 17 00:00:00 2001
From 84039bfc860878f3c3421de4a1836ac5d6300ed7 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 26 May 2021 10:56:02 +0200
Subject: Initial redhat build
@ -13,7 +13,7 @@ several issues are fixed in QEMU tree:
We disable make check due to issues with some of the tests.
This rebase is based on qemu-kvm-7.1.0-7.el9
This rebase is based on qemu-kvm-7.2.0-14.el9
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
--
@ -66,6 +66,16 @@ Rebase changes (7.2.0):
- Fix SRPM name generation to work on Fedora 37
- Switch back to system meson
Rebase changes (8.0.0-rc1):
- use enable-dtrace-backands instead of enable-dtrace-backend
- Removed qemu virtiofsd bits
Rebase changes (8.0.0-rc2):
- test/check-block.sh removed (upstream)
Rebase changes (8.0.0-rc3):
- Add new --disable-* options for configure
Merged patches (6.0.0):
- 605758c902 Limit build on Power to qemu-img and qemu-ga only
@ -162,16 +172,18 @@ Merged patches (7.2.0 rc4):
- 8c6834feb6 Remove opengl display device subpackages (C9S MR 124)
- 0ecc97f29e spec: Add requires for packages with additional virtio-gpu variants (C9S MR 124)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
Merged patches (8.0.0-rc1):
- 7754f6ba78 Minor packaging fixes
- 401af56187 spec: Disable VDUSE
fix
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
.distro/Makefile | 100 +
.distro/Makefile.common | 41 +
.distro/README.tests | 39 +
.distro/modules-load.conf | 4 +
.distro/qemu-guest-agent.service | 1 -
.distro/qemu-kvm.spec.template | 4315 +++++++++++++++++++++++
.distro/qemu-kvm.spec.template | 4528 +++++++++++++++++++++++
.distro/rpminspect.yaml | 6 +-
.distro/scripts/extract_build_cmd.py | 12 +
.distro/scripts/process-patches.sh | 4 +
@ -180,9 +192,8 @@ fix
scripts/qemu-guest-agent/fsfreeze-hook | 2 +-
scripts/systemtap/conf.d/qemu_kvm.conf | 4 +
scripts/systemtap/script.d/qemu_kvm.stp | 1 +
tests/check-block.sh | 2 +
ui/vnc-auth-sasl.c | 2 +-
16 files changed, 4573 insertions(+), 4 deletions(-)
15 files changed, 4784 insertions(+), 4 deletions(-)
create mode 100644 .distro/Makefile
create mode 100644 .distro/Makefile.common
create mode 100644 .distro/README.tests
@ -271,19 +282,6 @@ index 0000000000..c04abf9449
+++ b/scripts/systemtap/script.d/qemu_kvm.stp
@@ -0,0 +1 @@
+probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {}
diff --git a/tests/check-block.sh b/tests/check-block.sh
index 5de2c1ba0b..6af743f441 100755
--- a/tests/check-block.sh
+++ b/tests/check-block.sh
@@ -22,6 +22,8 @@ if [ -z "$(find . -name 'qemu-system-*' -print)" ]; then
skip "No qemu-system binary available ==> Not running the qemu-iotests."
fi
+exit 0
+
cd tests/qemu-iotests
# QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests
diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c
index 47fdae5b21..2a950caa2a 100644
--- a/ui/vnc-auth-sasl.c
@ -298,5 +296,5 @@ index 47fdae5b21..2a950caa2a 100644
if (saslErr != SASL_OK) {
error_setg(errp, "Failed to initialize SASL auth: %s",
--
2.31.1
2.39.1

@ -1,4 +1,4 @@
From 90366cd2ead5a5301aaceed56477d2e6d9f1b3cd Mon Sep 17 00:00:00 2001
From 63829772dbc2075fc014a9d52e3968735d228018 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 7 Dec 2022 03:05:48 -0500
Subject: Enable/disable devices for RHEL
@ -32,6 +32,11 @@ Rebase notes (7.1.0 rc3):
Rebase notes (7.2.0 rc20):
- Removed disabling a15mpcore.c as no longer needed
Rebase notes (8.0.0-rc1):
- Rename CONFIG_ACPI_X86_ICH to CONFIG_ACPI_ICH9
- Inlude qemu/error-report.h in hw/display/cirrus_vga.c
- Change virtiofsd dependency version
Merged patches (6.1.0):
- c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak
- 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI
@ -63,7 +68,7 @@ Merged patches (7.1.0 rc0):
hw/arm/meson.build | 2 +-
hw/block/fdc.c | 10 ++
hw/cpu/meson.build | 3 +-
hw/display/cirrus_vga.c | 5 +-
hw/display/cirrus_vga.c | 7 +-
hw/ide/piix.c | 5 +-
hw/input/pckbd.c | 2 +
hw/net/e1000.c | 2 +
@ -73,7 +78,7 @@ Merged patches (7.1.0 rc0):
target/ppc/cpu-models.c | 9 ++
target/s390x/cpu_models_sysemu.c | 3 +
target/s390x/kvm/kvm.c | 8 ++
19 files changed, 283 insertions(+), 13 deletions(-)
19 files changed, 285 insertions(+), 13 deletions(-)
create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak
create mode 100644 configs/devices/rh-virtio.mak
@ -212,7 +217,7 @@ index 0000000000..69a799adbd
+CONFIG_VHOST_USER_FS=y
diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
new file mode 100644
index 0000000000..10cb0a14e0
index 0000000000..668b2d0e18
--- /dev/null
+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
@@ -0,0 +1,109 @@
@ -226,7 +231,7 @@ index 0000000000..10cb0a14e0
+CONFIG_ACPI_SMBUS=y
+CONFIG_ACPI_VMGENID=y
+CONFIG_ACPI_X86=y
+CONFIG_ACPI_X86_ICH=y
+CONFIG_ACPI_ICH9=y
+CONFIG_AHCI=y
+CONFIG_APIC=y
+CONFIG_APM=y
@ -326,10 +331,10 @@ index 0000000000..10cb0a14e0
+CONFIG_VHOST_USER_VSOCK=y
+CONFIG_VHOST_USER_FS=y
diff --git a/hw/arm/meson.build b/hw/arm/meson.build
index 92f9f6e000..c5e94c997c 100644
index b545ba0e4f..a41a16cba7 100644
--- a/hw/arm/meson.build
+++ b/hw/arm/meson.build
@@ -30,7 +30,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c'))
@@ -29,7 +29,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c'))
arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c'))
arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c'))
@ -339,7 +344,7 @@ index 92f9f6e000..c5e94c997c 100644
arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c'))
arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c'))
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 64ae4a6899..9b8e782c19 100644
index d7cc4d3ec1..12d0a60905 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -49,6 +49,8 @@
@ -367,7 +372,7 @@ index 64ae4a6899..9b8e782c19 100644
error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'");
return;
diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build
index 9e52fee9e7..87c209a754 100644
index e37490074f..4431e3731c 100644
--- a/hw/cpu/meson.build
+++ b/hw/cpu/meson.build
@@ -1,4 +1,5 @@
@ -375,13 +380,29 @@ index 9e52fee9e7..87c209a754 100644
+#softmmu_ss.add(files('core.c', 'cluster.c'))
+softmmu_ss.add(files('core.c'))
specific_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c'))
specific_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c'))
softmmu_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c'))
softmmu_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c'))
diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c
index 6e8c747c46..1948ebee8e 100644
index b80f98b6c4..cbde6a8f15 100644
--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -2946,7 +2946,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp)
@@ -36,6 +36,7 @@
#include "qemu/module.h"
#include "qemu/units.h"
#include "qemu/log.h"
+#include "qemu/error-report.h"
#include "sysemu/reset.h"
#include "qapi/error.h"
#include "trace.h"
@@ -47,6 +48,7 @@
#include "qom/object.h"
#include "ui/console.h"
+
/*
* TODO:
* - destination write mask support not complete (bits 5..7)
@@ -2946,7 +2948,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp)
PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
int16_t device_id = pc->device_id;
@ -394,10 +415,10 @@ index 6e8c747c46..1948ebee8e 100644
* Also accept 8 MB/16 MB for backward compatibility.
*/
diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index 267dbf37db..87fcda4062 100644
index 41d60921e3..a4af45b4e8 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -199,7 +199,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data)
@@ -193,7 +193,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data)
k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1;
k->class_id = PCI_CLASS_STORAGE_IDE;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
@ -407,7 +428,7 @@ index 267dbf37db..87fcda4062 100644
}
static const TypeInfo piix3_ide_info = {
@@ -222,6 +223,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data)
@@ -216,6 +217,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data)
k->class_id = PCI_CLASS_STORAGE_IDE;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
dc->hotpluggable = false;
@ -430,10 +451,10 @@ index b92b63bedc..3b6235dde6 100644
static const TypeInfo i8042_info = {
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index e26e0a64c1..41492fae79 100644
index 23d660619f..b75c9aa799 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -1824,6 +1824,7 @@ static const E1000Info e1000_devices[] = {
@@ -1805,6 +1805,7 @@ static const E1000Info e1000_devices[] = {
.revision = 0x03,
.phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
},
@ -441,7 +462,7 @@ index e26e0a64c1..41492fae79 100644
{
.name = "e1000-82544gc",
.device_id = E1000_DEV_ID_82544GC_COPPER,
@@ -1836,6 +1837,7 @@ static const E1000Info e1000_devices[] = {
@@ -1817,6 +1818,7 @@ static const E1000Info e1000_devices[] = {
.revision = 0x03,
.phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
},
@ -467,7 +488,7 @@ index 8a4861f45a..fcb5dfe792 100644
DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"),
DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"),
diff --git a/hw/usb/meson.build b/hw/usb/meson.build
index 793df42e21..cd3c305471 100644
index 599dc24f0d..905a994c3a 100644
--- a/hw/usb/meson.build
+++ b/hw/usb/meson.build
@@ -52,7 +52,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade
@ -480,10 +501,10 @@ index 793df42e21..cd3c305471 100644
endif
diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
index 9a2cef7d05..a528ff9a3d 100644
index df0c45e523..c154a4dcf2 100644
--- a/target/arm/cpu_tcg.c
+++ b/target/arm/cpu_tcg.c
@@ -151,6 +151,7 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu)
@@ -155,6 +155,7 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu)
/* CPU models. These are not needed for the AArch64 linux-user build. */
#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
@ -491,7 +512,7 @@ index 9a2cef7d05..a528ff9a3d 100644
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
{
@@ -504,6 +505,7 @@ static void cortex_a9_initfn(Object *obj)
@@ -508,6 +509,7 @@ static void cortex_a9_initfn(Object *obj)
cpu->isar.reset_pmcr_el0 = 0x41093000;
define_arm_cp_regs(cpu, cortexa9_cp_reginfo);
}
@ -499,7 +520,7 @@ index 9a2cef7d05..a528ff9a3d 100644
#ifndef CONFIG_USER_ONLY
static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -528,6 +530,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
@@ -532,6 +534,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
.access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
};
@ -507,7 +528,7 @@ index 9a2cef7d05..a528ff9a3d 100644
static void cortex_a7_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -576,6 +579,7 @@ static void cortex_a7_initfn(Object *obj)
@@ -580,6 +583,7 @@ static void cortex_a7_initfn(Object *obj)
cpu->isar.reset_pmcr_el0 = 0x41072000;
define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */
}
@ -515,7 +536,7 @@ index 9a2cef7d05..a528ff9a3d 100644
static void cortex_a15_initfn(Object *obj)
{
@@ -624,6 +628,7 @@ static void cortex_a15_initfn(Object *obj)
@@ -628,6 +632,7 @@ static void cortex_a15_initfn(Object *obj)
define_arm_cp_regs(cpu, cortexa15_cp_reginfo);
}
@ -523,7 +544,7 @@ index 9a2cef7d05..a528ff9a3d 100644
static void cortex_m0_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -1065,6 +1070,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data)
@@ -1110,6 +1115,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data)
cc->gdb_core_xml_file = "arm-m-profile.xml";
}
@ -531,7 +552,7 @@ index 9a2cef7d05..a528ff9a3d 100644
#ifndef TARGET_AARCH64
/*
@@ -1132,6 +1138,7 @@ static void arm_max_initfn(Object *obj)
@@ -1177,6 +1183,7 @@ static void arm_max_initfn(Object *obj)
#endif /* !TARGET_AARCH64 */
static const ARMCPUInfo arm_tcg_cpus[] = {
@ -539,7 +560,7 @@ index 9a2cef7d05..a528ff9a3d 100644
{ .name = "arm926", .initfn = arm926_initfn },
{ .name = "arm946", .initfn = arm946_initfn },
{ .name = "arm1026", .initfn = arm1026_initfn },
@@ -1147,7 +1154,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
@@ -1192,7 +1199,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
{ .name = "cortex-a7", .initfn = cortex_a7_initfn },
{ .name = "cortex-a8", .initfn = cortex_a8_initfn },
{ .name = "cortex-a9", .initfn = cortex_a9_initfn },
@ -549,7 +570,7 @@ index 9a2cef7d05..a528ff9a3d 100644
{ .name = "cortex-m0", .initfn = cortex_m0_initfn,
.class_init = arm_v7m_class_init },
{ .name = "cortex-m3", .initfn = cortex_m3_initfn,
@@ -1178,6 +1187,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
@@ -1224,6 +1233,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
{ .name = "pxa270-b1", .initfn = pxa270b1_initfn },
{ .name = "pxa270-c0", .initfn = pxa270c0_initfn },
{ .name = "pxa270-c5", .initfn = pxa270c5_initfn },
@ -620,7 +641,7 @@ index 912b037c63..cd3ff700ac 100644
{ NULL, NULL }
};
diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c
index d8a141a023..d086b1c39c 100644
index 63981bf36b..87a4480c05 100644
--- a/target/s390x/cpu_models_sysemu.c
+++ b/target/s390x/cpu_models_sysemu.c
@@ -35,6 +35,9 @@ static void check_unavailable_features(const S390CPUModel *max_model,
@ -653,5 +674,5 @@ index 3ac7ec9acf..97da1a6424 100644
prop.ibc = s390_ibc_from_cpu_model(model);
/* configure cpu features indicated via STFL(e) */
--
2.31.1
2.39.1

@ -1,4 +1,4 @@
From 0208f38671b9de4036c0d56142a7f22e5091bae0 Mon Sep 17 00:00:00 2001
From c13f8e21b32aa06b08847e88080f2fdea5084a9b Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 11 Jan 2019 09:54:45 +0100
Subject: Machine type related general changes
@ -46,28 +46,33 @@ Merged patches (7.1.0 rc0):
Merged patches (7.2.0 rc0):
- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts)
Merged patches (8.0.0-rc1):
- 21ed34787b Addd 7.2 compat bits for RHEL 9.1 machine type
- e5c8d5d603 virtio-rng-pci: fix migration compat for vectors
- 5a5fa77059 virtio-rng-pci: fix transitional migration compat for vectors
---
hw/acpi/piix4.c | 2 +-
hw/arm/virt.c | 2 +-
hw/core/machine.c | 222 +++++++++++++++++++++++++++++++++++
hw/core/machine.c | 229 +++++++++++++++++++++++++++++++++++
hw/display/vga-isa.c | 2 +-
hw/i386/pc_piix.c | 2 +
hw/i386/pc_q35.c | 2 +
hw/net/rtl8139.c | 4 +-
hw/smbios/smbios.c | 46 +++++++-
hw/smbios/smbios.c | 46 ++++++-
hw/timer/i8254_common.c | 2 +-
hw/usb/hcd-xhci-pci.c | 59 +++++++---
hw/usb/hcd-xhci-pci.c | 59 ++++++---
hw/usb/hcd-xhci-pci.h | 1 +
include/hw/boards.h | 31 +++++
include/hw/firmware/smbios.h | 5 +-
include/hw/i386/pc.h | 3 +
14 files changed, 360 insertions(+), 23 deletions(-)
14 files changed, 367 insertions(+), 23 deletions(-)
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 0a81f1ad93..dbfb362a8f 100644
index 63d2113b86..a24b9aac92 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -248,7 +248,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id)
@@ -247,7 +247,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id)
static const VMStateDescription vmstate_acpi = {
.name = "piix4_pm",
.version_id = 3,
@ -77,25 +82,25 @@ index 0a81f1ad93..dbfb362a8f 100644
.fields = (VMStateField[]) {
VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState),
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index b871350856..d633300fdc 100644
index ac626b3bef..4a6e89c7bc 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1619,7 +1619,7 @@ static void virt_build_smbios(VirtMachineState *vms)
@@ -1629,7 +1629,7 @@ static void virt_build_smbios(VirtMachineState *vms)
smbios_set_defaults("QEMU", product,
vmc->smbios_old_sys_ver ? "1.0" : mc->name, false,
- true, SMBIOS_ENTRY_POINT_TYPE_64);
+ true, NULL, NULL, SMBIOS_ENTRY_POINT_TYPE_64);
smbios_get_tables(MACHINE(vms), NULL, 0,
&smbios_tables, &smbios_tables_len,
/* build the array of physical mem area from base_memmap */
mem_array.address = vms->memmap[VIRT_MEM].base;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 8d34caa31d..9edec1ca05 100644
index cd13b8b0a3..5aa567fad3 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -40,6 +40,228 @@
#include "hw/virtio/virtio-pci.h"
#include "qom/object_interfaces.h"
@@ -46,6 +46,235 @@ GlobalProperty hw_compat_7_2[] = {
};
const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
+/*
+ * RHEL only: machine types for previous major releases are deprecated
@ -111,6 +116,13 @@ index 8d34caa31d..9edec1ca05 100644
+ { "arm-gicv3-common", "force-8-bit-prio", "on" },
+ /* hw_compat_rhel_9_1 from hw_compat_7_0 */
+ { "nvme-ns", "eui64-default", "on"},
+ /* hw_compat_rhel_9_1 from hw_compat_7_1 */
+ { "virtio-device", "queue_reset", "false" },
+ /* hw_compat_rhel_9_1 bz 2155749 */
+ { "virtio-rng-pci", "vectors", "0" },
+ /* hw_compat_rhel_9_1 bz 2162569 */
+ { "virtio-rng-pci-transitional", "vectors", "0" },
+ { "virtio-rng-pci-non-transitional", "vectors", "0" },
+};
+const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1);
+
@ -321,7 +333,7 @@ index 8d34caa31d..9edec1ca05 100644
+
GlobalProperty hw_compat_7_1[] = {
{ "virtio-device", "queue_reset", "false" },
};
{ "virtio-rng-pci", "vectors", "0" },
diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c
index 2a5437d803..0db2c2b2a1 100644
--- a/hw/display/vga-isa.c
@ -336,10 +348,10 @@ index 2a5437d803..0db2c2b2a1 100644
};
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 0ad0ed1603..0985ff67d2 100644
index 30eedd62a3..14a794081e 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -187,6 +187,8 @@ static void pc_init1(MachineState *machine,
@@ -201,6 +201,8 @@ static void pc_init1(MachineState *machine,
smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)",
mc->name, pcmc->smbios_legacy_mode,
pcmc->smbios_uuid_encoded,
@ -349,10 +361,10 @@ index 0ad0ed1603..0985ff67d2 100644
}
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index a496bd6e74..ea582254e3 100644
index 797ba347fd..dc0ba5f9e7 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -201,6 +201,8 @@ static void pc_q35_init(MachineState *machine)
@@ -202,6 +202,8 @@ static void pc_q35_init(MachineState *machine)
smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)",
mc->name, pcmc->smbios_legacy_mode,
pcmc->smbios_uuid_encoded,
@ -362,7 +374,7 @@ index a496bd6e74..ea582254e3 100644
}
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index 700b1b66b6..13693aeb4f 100644
index 5a5aaf868d..3d473d5869 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -3178,7 +3178,7 @@ static int rtl8139_pre_save(void *opaque)
@ -385,10 +397,10 @@ index 700b1b66b6..13693aeb4f 100644
VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State),
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index b4243de735..c5ad69237e 100644
index d2007e70fb..319eae9e9d 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -57,6 +57,9 @@ static bool smbios_legacy = true;
@@ -58,6 +58,9 @@ static bool smbios_legacy = true;
static bool smbios_uuid_encoded = true;
/* end: legacy structures & constants for <= 2.0 machines */
@ -398,7 +410,7 @@ index b4243de735..c5ad69237e 100644
uint8_t *smbios_tables;
size_t smbios_tables_len;
@@ -669,7 +672,7 @@ static void smbios_build_type_1_table(void)
@@ -670,7 +673,7 @@ static void smbios_build_type_1_table(void)
static void smbios_build_type_2_table(void)
{
@ -407,7 +419,7 @@ index b4243de735..c5ad69237e 100644
SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer);
SMBIOS_TABLE_SET_STR(2, product_str, type2.product);
@@ -977,7 +980,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features)
@@ -980,7 +983,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features)
void smbios_set_defaults(const char *manufacturer, const char *product,
const char *version, bool legacy_mode,
@ -419,7 +431,7 @@ index b4243de735..c5ad69237e 100644
{
smbios_have_defaults = true;
smbios_legacy = legacy_mode;
@@ -998,11 +1004,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product,
@@ -1001,11 +1007,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product,
g_free(smbios_entries);
}
@ -579,10 +591,10 @@ index 643d4643e4..529bad9366 100644
dc->vmsd = &vmstate_xhci_pci;
set_bit(DEVICE_CATEGORY_USB, dc->categories);
diff --git a/hw/usb/hcd-xhci-pci.h b/hw/usb/hcd-xhci-pci.h
index c193f79443..086a1feb1e 100644
index 08f70ce97c..1be7527c1b 100644
--- a/hw/usb/hcd-xhci-pci.h
+++ b/hw/usb/hcd-xhci-pci.h
@@ -39,6 +39,7 @@ typedef struct XHCIPciState {
@@ -40,6 +40,7 @@ typedef struct XHCIPciState {
XHCIState xhci;
OnOffAuto msi;
OnOffAuto msix;
@ -591,10 +603,10 @@ index c193f79443..086a1feb1e 100644
#endif
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 90f1dd3aeb..2209d4e416 100644
index 6fbbfd56c8..c5a965d27f 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -454,4 +454,35 @@ extern const size_t hw_compat_2_2_len;
@@ -459,4 +459,35 @@ extern const size_t hw_compat_2_2_len;
extern GlobalProperty hw_compat_2_1[];
extern const size_t hw_compat_2_1_len;
@ -647,10 +659,10 @@ index 7f3259a630..d24b3ccd32 100644
void smbios_get_tables(MachineState *ms,
const struct smbios_phys_mem_area *mem_array,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index c95333514e..3754eaa97d 100644
index 8206d5405a..908a275736 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -112,6 +112,9 @@ struct PCMachineClass {
@@ -111,6 +111,9 @@ struct PCMachineClass {
bool smbios_defaults;
bool smbios_legacy_mode;
bool smbios_uuid_encoded;
@ -661,5 +673,5 @@ index c95333514e..3754eaa97d 100644
/* RAM / address space compat: */
bool gigabyte_align;
--
2.31.1
2.39.1

@ -1,4 +1,4 @@
From 8501581c99760ed8a800d0c98eeb17a4bf450366 Mon Sep 17 00:00:00 2001
From ec6468b65a3af0e2b84575c9f965f61916d0d8ea Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 19 Oct 2018 12:53:31 +0200
Subject: Add aarch64 machine types
@ -26,6 +26,9 @@ Rebase notes (7.1.0 rc3):
Rebase notes (7.2.0 rc0):
- Disabled cortex-a35
Rebase notes (8.0.0-rc1):
- Moved changed code from target/arm/helper.c to target/arm/arm-qmp-cmds.c
Merged patches (6.2.0):
- 9a3d4fde0e hw/arm/virt: Remove 9.0 machine type
- f7d04d6695 hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type
@ -49,23 +52,27 @@ Merged patches (7.1.0 rc0):
Merged patches (7.2.0 rc0):
- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts)
Merged patches (8.0.0-rc1):
- c1a21266d8 redhat: aarch64: add rhel9.2.0 virt machine type
- d97cd7c513 redhat: fix virt-rhel9.2.0 compat props
---
hw/arm/virt.c | 237 ++++++++++++++++++++++++++++++++-
hw/arm/virt.c | 251 ++++++++++++++++++++++++++++++++-
include/hw/arm/virt.h | 8 ++
target/arm/arm-qmp-cmds.c | 2 +
target/arm/cpu-qom.h | 1 +
target/arm/cpu.c | 5 +
target/arm/cpu.h | 2 +
target/arm/cpu64.c | 16 ++-
target/arm/cpu_tcg.c | 12 +-
target/arm/helper.c | 2 +
tests/qtest/arm-cpu-features.c | 6 +
9 files changed, 277 insertions(+), 12 deletions(-)
9 files changed, 289 insertions(+), 14 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index d633300fdc..dfcab40a73 100644
index 4a6e89c7bc..1ae1654be5 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -80,6 +80,7 @@
@@ -81,6 +81,7 @@
#include "hw/char/pl011.h"
#include "qemu/guest-random.h"
@ -73,7 +80,7 @@ index d633300fdc..dfcab40a73 100644
#define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
void *data) \
@@ -106,7 +107,48 @@
@@ -107,7 +108,48 @@
DEFINE_VIRT_MACHINE_LATEST(major, minor, true)
#define DEFINE_VIRT_MACHINE(major, minor) \
DEFINE_VIRT_MACHINE_LATEST(major, minor, false)
@ -123,7 +130,7 @@ index d633300fdc..dfcab40a73 100644
/* Number of external interrupt lines to configure the GIC with */
#define NUM_IRQS 256
@@ -197,15 +239,19 @@ static const int a15irqmap[] = {
@@ -204,16 +246,20 @@ static const int a15irqmap[] = {
};
static const char *valid_cpus[] = {
@ -132,6 +139,7 @@ index d633300fdc..dfcab40a73 100644
ARM_CPU_TYPE_NAME("cortex-a15"),
ARM_CPU_TYPE_NAME("cortex-a35"),
ARM_CPU_TYPE_NAME("cortex-a53"),
ARM_CPU_TYPE_NAME("cortex-a55"),
+#endif /* disabled for RHEL */
ARM_CPU_TYPE_NAME("cortex-a57"),
+#if 0 /* Disabled for Red Hat Enterprise Linux */
@ -143,7 +151,7 @@ index d633300fdc..dfcab40a73 100644
ARM_CPU_TYPE_NAME("host"),
ARM_CPU_TYPE_NAME("max"),
};
@@ -2290,6 +2336,7 @@ static void machvirt_init(MachineState *machine)
@@ -2339,6 +2385,7 @@ static void machvirt_init(MachineState *machine)
qemu_add_machine_init_done_notifier(&vms->machine_done);
}
@ -151,7 +159,7 @@ index d633300fdc..dfcab40a73 100644
static bool virt_get_secure(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -2317,6 +2364,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp)
@@ -2366,6 +2413,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp)
vms->virt = value;
}
@ -159,7 +167,25 @@ index d633300fdc..dfcab40a73 100644
static bool virt_get_highmem(Object *obj, Error **errp)
{
@@ -2346,6 +2394,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp)
@@ -2380,7 +2428,7 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp)
vms->highmem = value;
}
-
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static bool virt_get_compact_highmem(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -2436,7 +2484,7 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp)
vms->highmem_mmio = value;
}
-
+#endif /* disabled for RHEL */
static bool virt_get_its(Object *obj, Error **errp)
{
@@ -2452,6 +2500,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp)
vms->its = value;
}
@ -167,7 +193,7 @@ index d633300fdc..dfcab40a73 100644
static bool virt_get_dtb_randomness(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -2359,6 +2408,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp)
@@ -2465,6 +2514,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp)
vms->dtb_randomness = value;
}
@ -175,7 +201,7 @@ index d633300fdc..dfcab40a73 100644
static char *virt_get_oem_id(Object *obj, Error **errp)
{
@@ -2442,6 +2492,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp)
@@ -2548,6 +2598,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp)
vms->ras = value;
}
@ -183,7 +209,7 @@ index d633300fdc..dfcab40a73 100644
static bool virt_get_mte(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -2455,6 +2506,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp)
@@ -2561,6 +2612,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp)
vms->mte = value;
}
@ -191,7 +217,7 @@ index d633300fdc..dfcab40a73 100644
static char *virt_get_gic_version(Object *obj, Error **errp)
{
@@ -2886,6 +2938,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str)
@@ -2988,6 +3040,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str)
return fixed_ipa ? 0 : requested_pa_size;
}
@ -199,7 +225,7 @@ index d633300fdc..dfcab40a73 100644
static void virt_machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -3294,3 +3347,185 @@ static void virt_machine_2_6_options(MachineClass *mc)
@@ -3441,3 +3494,195 @@ static void virt_machine_2_6_options(MachineClass *mc)
vmc->no_pmu = true;
}
DEFINE_VIRT_MACHINE(2, 6)
@ -312,6 +338,7 @@ index d633300fdc..dfcab40a73 100644
+
+ /* High memory is enabled by default */
+ vms->highmem = true;
+ vms->highmem_compact = !vmc->no_highmem_compact;
+ vms->gic_version = VIRT_GIC_VERSION_NOSEL;
+
+ vms->highmem_ecam = !vmc->no_highmem_ecam;
@ -374,22 +401,31 @@ index d633300fdc..dfcab40a73 100644
+}
+type_init(rhel_machine_init);
+
+static void rhel920_virt_options(MachineClass *mc)
+{
+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
+}
+DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0)
+
+static void rhel900_virt_options(MachineClass *mc)
+{
+ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
+
+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
+ rhel920_virt_options(mc);
+
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
+
+ /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */
+ vmc->no_tcg_lpa2 = true;
+ /* Compact layout for high memory regions was introduced with 9.2.0 */
+ vmc->no_highmem_compact = true;
+}
+DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0)
+DEFINE_RHEL_MACHINE(9, 0, 0)
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 6ec479ca2b..22b54ec510 100644
index e1ddbea96b..81c2363a40 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -180,9 +180,17 @@ struct VirtMachineState {
@@ -187,9 +187,17 @@ struct VirtMachineState {
#define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM)
@ -407,8 +443,28 @@ index 6ec479ca2b..22b54ec510 100644
void virt_acpi_setup(VirtMachineState *vms);
bool virt_is_acpi_enabled(VirtMachineState *vms);
diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c
index c8fa524002..3aa089abf3 100644
--- a/target/arm/arm-qmp-cmds.c
+++ b/target/arm/arm-qmp-cmds.c
@@ -231,6 +231,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type,
static void arm_cpu_add_definition(gpointer data, gpointer user_data)
{
ObjectClass *oc = data;
+ CPUClass *cc = CPU_CLASS(oc);
CpuDefinitionInfoList **cpu_list = user_data;
CpuDefinitionInfo *info;
const char *typename;
@@ -240,6 +241,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data)
info->name = g_strndup(typename,
strlen(typename) - strlen("-" TYPE_ARM_CPU));
info->q_typename = g_strdup(typename);
+ info->deprecated = !!cc->deprecation_note;
QAPI_LIST_PREPEND(*cpu_list, info);
}
diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h
index 64c44cef2d..82e97249bc 100644
index 514c22ced9..f789173451 100644
--- a/target/arm/cpu-qom.h
+++ b/target/arm/cpu-qom.h
@@ -35,6 +35,7 @@ typedef struct ARMCPUInfo {
@ -420,10 +476,10 @@ index 64c44cef2d..82e97249bc 100644
void arm_cpu_register(const ARMCPUInfo *info);
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 38d066c294..a845814bfb 100644
index 5182ed0c91..6740a8b940 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2250,8 +2250,13 @@ static void arm_cpu_instance_init(Object *obj)
@@ -2290,8 +2290,13 @@ static void arm_cpu_instance_init(Object *obj)
static void cpu_register_class_init(ObjectClass *oc, void *data)
{
ARMCPUClass *acc = ARM_CPU_CLASS(oc);
@ -438,7 +494,7 @@ index 38d066c294..a845814bfb 100644
void arm_cpu_register(const ARMCPUInfo *info)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 9aeed3c848..f9f504d89e 100644
index c097cae988..829d4a2328 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -34,6 +34,8 @@
@ -451,10 +507,10 @@ index 9aeed3c848..f9f504d89e 100644
#define EXCP_SWI 2 /* software interrupt */
#define EXCP_PREFETCH_ABORT 3
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 3d74f134f5..4b330a52b5 100644
index 0fb07cc7b6..47459627fb 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -36,6 +36,7 @@
@@ -31,6 +31,7 @@
#include "hw/qdev-properties.h"
#include "internals.h"
@ -462,7 +518,7 @@ index 3d74f134f5..4b330a52b5 100644
static void aarch64_a35_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -115,6 +116,7 @@ static void aarch64_a35_initfn(Object *obj)
@@ -110,6 +111,7 @@ static void aarch64_a35_initfn(Object *obj)
/* These values are the same with A53/A57/A72. */
define_cortex_a72_a57_a53_cp_reginfo(cpu);
}
@ -470,7 +526,7 @@ index 3d74f134f5..4b330a52b5 100644
void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
{
@@ -735,6 +737,7 @@ static void aarch64_a57_initfn(Object *obj)
@@ -730,6 +732,7 @@ static void aarch64_a57_initfn(Object *obj)
define_cortex_a72_a57_a53_cp_reginfo(cpu);
}
@ -478,15 +534,15 @@ index 3d74f134f5..4b330a52b5 100644
static void aarch64_a53_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -1033,6 +1036,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj)
/* From D5.1 AArch64 PMU register summary */
cpu->isar.reset_pmcr_el0 = 0x410c3000;
@@ -1164,6 +1167,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj)
define_neoverse_n1_cp_reginfo(cpu);
}
+#endif /* disabled for RHEL */
static void aarch64_host_initfn(Object *obj)
{
@@ -1240,13 +1244,18 @@ static void aarch64_max_initfn(Object *obj)
@@ -1373,14 +1377,19 @@ static void aarch64_max_initfn(Object *obj)
}
static const ARMCPUInfo aarch64_cpus[] = {
@ -498,6 +554,7 @@ index 3d74f134f5..4b330a52b5 100644
+ .deprecation_note = RHEL_CPU_DEPRECATION },
+#if 0 /* Disabled for Red Hat Enterprise Linux */
{ .name = "cortex-a53", .initfn = aarch64_a53_initfn },
{ .name = "cortex-a55", .initfn = aarch64_a55_initfn },
{ .name = "cortex-a72", .initfn = aarch64_a72_initfn },
{ .name = "cortex-a76", .initfn = aarch64_a76_initfn },
{ .name = "a64fx", .initfn = aarch64_a64fx_initfn },
@ -506,7 +563,7 @@ index 3d74f134f5..4b330a52b5 100644
{ .name = "max", .initfn = aarch64_max_initfn },
#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
{ .name = "host", .initfn = aarch64_host_initfn },
@@ -1318,8 +1327,13 @@ static void aarch64_cpu_instance_init(Object *obj)
@@ -1452,8 +1461,13 @@ static void aarch64_cpu_instance_init(Object *obj)
static void cpu_register_class_init(ObjectClass *oc, void *data)
{
ARMCPUClass *acc = ARM_CPU_CLASS(oc);
@ -521,10 +578,10 @@ index 3d74f134f5..4b330a52b5 100644
void aarch64_cpu_register(const ARMCPUInfo *info)
diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
index a528ff9a3d..053f70e399 100644
index c154a4dcf2..f29425b656 100644
--- a/target/arm/cpu_tcg.c
+++ b/target/arm/cpu_tcg.c
@@ -148,10 +148,10 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu)
@@ -152,10 +152,10 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu)
}
#endif /* !CONFIG_USER_ONLY */
@ -536,7 +593,7 @@ index a528ff9a3d..053f70e399 100644
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
{
@@ -505,7 +505,6 @@ static void cortex_a9_initfn(Object *obj)
@@ -509,7 +509,6 @@ static void cortex_a9_initfn(Object *obj)
cpu->isar.reset_pmcr_el0 = 0x41093000;
define_arm_cp_regs(cpu, cortexa9_cp_reginfo);
}
@ -544,7 +601,7 @@ index a528ff9a3d..053f70e399 100644
#ifndef CONFIG_USER_ONLY
static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -530,7 +529,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
@@ -534,7 +533,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
.access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
};
@ -552,7 +609,7 @@ index a528ff9a3d..053f70e399 100644
static void cortex_a7_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -579,7 +577,6 @@ static void cortex_a7_initfn(Object *obj)
@@ -583,7 +581,6 @@ static void cortex_a7_initfn(Object *obj)
cpu->isar.reset_pmcr_el0 = 0x41072000;
define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */
}
@ -560,7 +617,7 @@ index a528ff9a3d..053f70e399 100644
static void cortex_a15_initfn(Object *obj)
{
@@ -628,7 +625,6 @@ static void cortex_a15_initfn(Object *obj)
@@ -632,7 +629,6 @@ static void cortex_a15_initfn(Object *obj)
define_arm_cp_regs(cpu, cortexa15_cp_reginfo);
}
@ -568,7 +625,7 @@ index a528ff9a3d..053f70e399 100644
static void cortex_m0_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -1070,7 +1066,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data)
@@ -1115,7 +1111,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data)
cc->gdb_core_xml_file = "arm-m-profile.xml";
}
@ -576,7 +633,7 @@ index a528ff9a3d..053f70e399 100644
#ifndef TARGET_AARCH64
/*
@@ -1138,7 +1133,6 @@ static void arm_max_initfn(Object *obj)
@@ -1183,7 +1178,6 @@ static void arm_max_initfn(Object *obj)
#endif /* !TARGET_AARCH64 */
static const ARMCPUInfo arm_tcg_cpus[] = {
@ -584,7 +641,7 @@ index a528ff9a3d..053f70e399 100644
{ .name = "arm926", .initfn = arm926_initfn },
{ .name = "arm946", .initfn = arm946_initfn },
{ .name = "arm1026", .initfn = arm1026_initfn },
@@ -1154,9 +1148,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
@@ -1199,9 +1193,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
{ .name = "cortex-a7", .initfn = cortex_a7_initfn },
{ .name = "cortex-a8", .initfn = cortex_a8_initfn },
{ .name = "cortex-a9", .initfn = cortex_a9_initfn },
@ -594,7 +651,7 @@ index a528ff9a3d..053f70e399 100644
{ .name = "cortex-m0", .initfn = cortex_m0_initfn,
.class_init = arm_v7m_class_init },
{ .name = "cortex-m3", .initfn = cortex_m3_initfn,
@@ -1187,7 +1179,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
@@ -1233,7 +1225,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
{ .name = "pxa270-b1", .initfn = pxa270b1_initfn },
{ .name = "pxa270-c0", .initfn = pxa270c0_initfn },
{ .name = "pxa270-c5", .initfn = pxa270c5_initfn },
@ -602,36 +659,16 @@ index a528ff9a3d..053f70e399 100644
#ifndef TARGET_AARCH64
{ .name = "max", .initfn = arm_max_initfn },
#endif
@@ -1215,3 +1206,4 @@ static void arm_tcg_cpu_register_types(void)
@@ -1261,3 +1252,4 @@ static void arm_tcg_cpu_register_types(void)
type_init(arm_tcg_cpu_register_types)
#endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */
+#endif /* disabled for RHEL */
diff --git a/target/arm/helper.c b/target/arm/helper.c
index d8c8223ec3..ad9d235773 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -8476,6 +8476,7 @@ void arm_cpu_list(void)
static void arm_cpu_add_definition(gpointer data, gpointer user_data)
{
ObjectClass *oc = data;
+ CPUClass *cc = CPU_CLASS(oc);
CpuDefinitionInfoList **cpu_list = user_data;
CpuDefinitionInfo *info;
const char *typename;
@@ -8485,6 +8486,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data)
info->name = g_strndup(typename,
strlen(typename) - strlen("-" TYPE_ARM_CPU));
info->q_typename = g_strdup(typename);
+ info->deprecated = !!cc->deprecation_note;
QAPI_LIST_PREPEND(*cpu_list, info);
}
diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c
index 5a14527386..a3579fc303 100644
index 1cb08138ad..834497dfec 100644
--- a/tests/qtest/arm-cpu-features.c
+++ b/tests/qtest/arm-cpu-features.c
@@ -440,8 +440,10 @@ static void test_query_cpu_model_expansion(const void *data)
@@ -441,8 +441,10 @@ static void test_query_cpu_model_expansion(const void *data)
assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL);
/* Test expected feature presence/absence for some cpu types */
@ -642,7 +679,7 @@ index 5a14527386..a3579fc303 100644
/* Enabling and disabling pmu should always work. */
assert_has_feature_enabled(qts, "max", "pmu");
@@ -458,6 +460,7 @@ static void test_query_cpu_model_expansion(const void *data)
@@ -459,6 +461,7 @@ static void test_query_cpu_model_expansion(const void *data)
assert_has_feature_enabled(qts, "cortex-a57", "pmu");
assert_has_feature_enabled(qts, "cortex-a57", "aarch64");
@ -650,7 +687,7 @@ index 5a14527386..a3579fc303 100644
assert_has_feature_enabled(qts, "a64fx", "pmu");
assert_has_feature_enabled(qts, "a64fx", "aarch64");
/*
@@ -470,6 +473,7 @@ static void test_query_cpu_model_expansion(const void *data)
@@ -471,6 +474,7 @@ static void test_query_cpu_model_expansion(const void *data)
"{ 'sve384': true }");
assert_error(qts, "a64fx", "cannot enable sve640",
"{ 'sve640': true }");
@ -658,7 +695,7 @@ index 5a14527386..a3579fc303 100644
sve_tests_default(qts, "max");
pauth_tests_default(qts, "max");
@@ -505,9 +509,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data)
@@ -506,9 +510,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data)
QDict *resp;
char *error;
@ -671,5 +708,5 @@ index 5a14527386..a3579fc303 100644
assert_has_feature_enabled(qts, "host", "aarch64");
--
2.31.1
2.39.1

@ -1,4 +1,4 @@
From 2c523f1b6c9470e1cd517ba99e414cde02727e16 Mon Sep 17 00:00:00 2001
From 401d0ebf1ee959fd944df6b5b4ae9c51c36d1244 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 19 Oct 2018 13:27:13 +0200
Subject: Add ppc64 machine types
@ -34,10 +34,10 @@ Merged patches (7.1.0 rc0):
8 files changed, 314 insertions(+), 1 deletion(-)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 66b414d2e9..499eb49253 100644
index 4921198b9d..e24b3e22e3 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1633,6 +1633,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason)
@@ -1634,6 +1634,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason)
pef_kvm_reset(machine->cgs, &error_fatal);
spapr_caps_apply(spapr);
@ -47,7 +47,7 @@ index 66b414d2e9..499eb49253 100644
first_ppc_cpu = POWERPC_CPU(first_cpu);
if (kvm_enabled() && kvmppc_has_cap_mmu_radix() &&
@@ -3347,6 +3350,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp)
@@ -3348,6 +3351,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp)
spapr->host_serial = g_strdup(value);
}
@ -68,7 +68,7 @@ index 66b414d2e9..499eb49253 100644
static void spapr_instance_init(Object *obj)
{
SpaprMachineState *spapr = SPAPR_MACHINE(obj);
@@ -3425,6 +3442,12 @@ static void spapr_instance_init(Object *obj)
@@ -3426,6 +3443,12 @@ static void spapr_instance_init(Object *obj)
spapr_get_host_serial, spapr_set_host_serial);
object_property_set_description(obj, "host-serial",
"Host serial number to advertise in guest device tree");
@ -81,7 +81,7 @@ index 66b414d2e9..499eb49253 100644
}
static void spapr_machine_finalizefn(Object *obj)
@@ -4682,6 +4705,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
@@ -4683,6 +4706,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
vmc->client_architecture_support = spapr_vof_client_architecture_support;
vmc->quiesce = spapr_vof_quiesce;
vmc->setprop = spapr_vof_setprop;
@ -89,15 +89,15 @@ index 66b414d2e9..499eb49253 100644
}
static const TypeInfo spapr_machine_info = {
@@ -4733,6 +4757,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc)
@@ -4734,6 +4758,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc)
} \
type_init(spapr_machine_register_##suffix)
+#if 0 /* Disabled for Red Hat Enterprise Linux */
/*
* pseries-7.2
* pseries-8.0
*/
@@ -4882,6 +4907,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc)
@@ -4894,6 +4919,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc)
}
DEFINE_SPAPR_MACHINE(4_1, "4.1", false);
@ -105,7 +105,7 @@ index 66b414d2e9..499eb49253 100644
/*
* pseries-4.0
@@ -4901,6 +4927,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index,
@@ -4913,6 +4939,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index,
*nv2atsd = 0;
return true;
}
@ -114,7 +114,7 @@ index 66b414d2e9..499eb49253 100644
static void spapr_machine_4_0_class_options(MachineClass *mc)
{
SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
@@ -5228,6 +5256,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc)
@@ -5240,6 +5268,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc)
compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len);
}
DEFINE_SPAPR_MACHINE(2_1, "2.1", false);
@ -375,10 +375,10 @@ index fcb5dfe792..ab8fb5bf62 100644
qdev_unrealize(DEVICE(cpu));
return false;
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 04a95669ab..d5f4cf5e03 100644
index 5c8aabd444..04489d5808 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -154,6 +154,7 @@ struct SpaprMachineClass {
@@ -155,6 +155,7 @@ struct SpaprMachineClass {
bool pre_5_2_numa_associativity;
bool pre_6_2_numa_affinity;
@ -386,7 +386,7 @@ index 04a95669ab..d5f4cf5e03 100644
bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
uint64_t *buid, hwaddr *pio,
hwaddr *mmio32, hwaddr *mmio64,
@@ -256,6 +257,9 @@ struct SpaprMachineState {
@@ -257,6 +258,9 @@ struct SpaprMachineState {
/* Set by -boot */
char *boot_device;
@ -434,10 +434,10 @@ index cd3ff700ac..1cb49c8087 100644
{ "405cr", "405crc" },
{ "405gp", "405gpd" },
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 81d4263a07..508fbed90b 100644
index 557d736dab..6646ec1c27 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1467,6 +1467,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch)
@@ -1482,6 +1482,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch)
/* Compatibility modes */
#if defined(TARGET_PPC64)
@ -446,10 +446,10 @@ index 81d4263a07..508fbed90b 100644
uint32_t min_compat_pvr, uint32_t max_compat_pvr);
bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr,
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 7c25348b7b..83671c955f 100644
index 78f6fc50cd..68d06c3f8f 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -89,6 +89,7 @@ static int cap_ppc_nested_kvm_hv;
@@ -88,6 +88,7 @@ static int cap_ppc_nested_kvm_hv;
static int cap_large_decr;
static int cap_fwnmi;
static int cap_rpt_invalidate;
@ -457,7 +457,7 @@ index 7c25348b7b..83671c955f 100644
static uint32_t debug_inst_opcode;
@@ -136,6 +137,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
@@ -135,6 +136,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
kvmppc_get_cpu_characteristics(s);
cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
@ -465,7 +465,7 @@ index 7c25348b7b..83671c955f 100644
cap_large_decr = kvmppc_get_dec_bits();
cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI);
/*
@@ -2570,6 +2572,16 @@ int kvmppc_has_cap_rpt_invalidate(void)
@@ -2569,6 +2571,16 @@ int kvmppc_has_cap_rpt_invalidate(void)
return cap_rpt_invalidate;
}
@ -482,7 +482,7 @@ index 7c25348b7b..83671c955f 100644
PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
{
uint32_t host_pvr = mfpvr();
@@ -2970,3 +2982,18 @@ bool kvm_arch_cpu_check_are_resettable(void)
@@ -2969,3 +2981,18 @@ bool kvm_arch_cpu_check_are_resettable(void)
void kvm_arch_accel_class_init(ObjectClass *oc)
{
}
@ -502,10 +502,10 @@ index 7c25348b7b..83671c955f 100644
+ }
+}
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index ee9325bf9a..20dbb95989 100644
index 5fd9753953..b5ebfe2be0 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -40,6 +40,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
@@ -43,6 +43,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
bool radix, bool gtse,
uint64_t proc_tbl);
@ -513,7 +513,7 @@ index ee9325bf9a..20dbb95989 100644
#ifndef CONFIG_USER_ONLY
bool kvmppc_spapr_use_multitce(void);
int kvmppc_spapr_enable_inkernel_multitce(void);
@@ -74,6 +75,8 @@ int kvmppc_get_cap_large_decr(void);
@@ -77,6 +78,8 @@ int kvmppc_get_cap_large_decr(void);
int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable);
int kvmppc_has_cap_rpt_invalidate(void);
int kvmppc_enable_hwrng(void);
@ -522,7 +522,7 @@ index ee9325bf9a..20dbb95989 100644
int kvmppc_put_books_sregs(PowerPCCPU *cpu);
PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
void kvmppc_check_papr_resize_hpt(Error **errp);
@@ -393,6 +396,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void)
@@ -396,6 +399,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void)
return false;
}
@ -540,5 +540,5 @@ index ee9325bf9a..20dbb95989 100644
{
return -1;
--
2.31.1
2.39.1

@ -1,4 +1,4 @@
From 1973257ed781a93943f27f1518933e8c09c50f88 Mon Sep 17 00:00:00 2001
From 3c7647197729fcd76e219070c6f359bb3667d04d Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 19 Oct 2018 13:47:32 +0200
Subject: Add s390x machine types
@ -30,45 +30,72 @@ Merged patches (7.1.0 rc0):
Merged patches (7.2.0 rc0):
- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts)
Merged patches (8.0.0-rc1):
- 27c188c6a4 redhat: Update s390x machine type compatibility for QEMU 7.2.0 update
- a932b8d429 redhat: Add new rhel-9.2.0 s390x machine type
- ac88104bad s390x/s390-virtio-ccw: Activate zPCI features on s390-ccw-virtio-rhel8.6.0
---
hw/s390x/s390-virtio-ccw.c | 108 +++++++++++++++++++++++++++++++
target/s390x/cpu_models.c | 11 ++++
hw/s390x/s390-virtio-ccw.c | 143 +++++++++++++++++++++++++++++++
target/s390x/cpu_models.c | 11 +++
target/s390x/cpu_models.h | 2 +
target/s390x/cpu_models_sysemu.c | 2 +
4 files changed, 123 insertions(+)
4 files changed, 158 insertions(+)
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 2e64ffab45..8d5221fbb1 100644
index 503f212a31..dcd3b966b0 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -823,6 +823,7 @@ bool css_migration_enabled(void)
@@ -826,6 +826,7 @@ bool css_migration_enabled(void)
} \
type_init(ccw_machine_register_##suffix)
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static void ccw_machine_7_2_instance_options(MachineState *machine)
static void ccw_machine_8_0_instance_options(MachineState *machine)
{
}
@@ -1186,6 +1187,113 @@ static void ccw_machine_2_4_class_options(MachineClass *mc)
@@ -1201,6 +1202,148 @@ static void ccw_machine_2_4_class_options(MachineClass *mc)
compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
}
DEFINE_CCW_MACHINE(2_4, "2.4", false);
+#endif
+
+
+static void ccw_machine_rhel920_instance_options(MachineState *machine)
+{
+}
+
+static void ccw_machine_rhel920_class_options(MachineClass *mc)
+{
+}
+DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true);
+
+static void ccw_machine_rhel900_instance_options(MachineState *machine)
+{
+ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 };
+
+ ccw_machine_rhel920_instance_options(machine);
+
+ s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat);
+ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE);
+}
+
+static void ccw_machine_rhel900_class_options(MachineClass *mc)
+{
+ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc);
+ static GlobalProperty compat[] = {
+ { TYPE_S390_PCI_DEVICE, "interpret", "off", },
+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", },
+ };
+
+ ccw_machine_rhel920_class_options(mc);
+
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len);
+ s390mc->max_threads = S390_MAX_CPUS;
+}
+DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true);
+DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", false);
+
+static void ccw_machine_rhel860_instance_options(MachineState *machine)
+{
@ -78,7 +105,14 @@ index 2e64ffab45..8d5221fbb1 100644
+
+static void ccw_machine_rhel860_class_options(MachineClass *mc)
+{
+ static GlobalProperty compat[] = {
+ { TYPE_S390_PCI_DEVICE, "interpret", "on", },
+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "on", },
+ };
+
+ ccw_machine_rhel900_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+
+ /* All RHEL machines for prior major releases are deprecated */
+ mc->deprecation_reason = rhel_old_machine_deprecation;
@ -102,8 +136,14 @@ index 2e64ffab45..8d5221fbb1 100644
+
+static void ccw_machine_rhel850_class_options(MachineClass *mc)
+{
+ static GlobalProperty compat[] = {
+ { TYPE_S390_PCI_DEVICE, "interpret", "off", },
+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", },
+ };
+
+ ccw_machine_rhel860_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+ mc->smp_props.prefer_sockets = true;
+}
+DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false);
@ -164,10 +204,10 @@ index 2e64ffab45..8d5221fbb1 100644
static void ccw_machine_register_types(void)
{
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index c3a4f80633..739770dc15 100644
index 457b5cb10c..ff6b9463cb 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -45,6 +45,9 @@
@@ -46,6 +46,9 @@
* of a following release have been a superset of the previous release. With
* generation 15 one base feature and one optional feature have been deprecated.
*/
@ -177,7 +217,7 @@ index c3a4f80633..739770dc15 100644
static S390CPUDef s390_cpu_defs[] = {
CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"),
CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"),
@@ -854,22 +857,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data)
@@ -857,22 +860,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data)
static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data)
{
S390CPUClass *xcc = S390_CPU_CLASS(oc);
@ -222,7 +262,7 @@ index fb1adc8b21..d76745afa9 100644
/* CPU model based on a CPU definition */
diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c
index d086b1c39c..1b9cc66405 100644
index 87a4480c05..28c1b0486c 100644
--- a/target/s390x/cpu_models_sysemu.c
+++ b/target/s390x/cpu_models_sysemu.c
@@ -60,6 +60,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque)
@ -242,5 +282,5 @@ index d086b1c39c..1b9cc66405 100644
if (cpu_list_data->model) {
Object *obj;
--
2.31.1
2.39.1

@ -1,4 +1,4 @@
From 0935624ccdddc286d6eeeb0c1b70d78983c21aa2 Mon Sep 17 00:00:00 2001
From 510291040cb280e1f68b793a84ec0f7d1c88aafa Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 19 Oct 2018 13:10:31 +0200
Subject: Add x86_64 machine types
@ -13,6 +13,9 @@ Rebase notes (6.1.0):
Rebase notes (7.0.0):
- Reset alias for all machine-types except latest one
Rebase notes (8.0.0-rc1):
- remove legacy_no_rng_seed usage (removed upstream)
Merged patches (6.1.0):
- 59c284ad3b x86: Add x86 rhel8.5 machine types
- a8868b42fe redhat: x86: Enable 'kvm-asyncpf-int' by default
@ -39,24 +42,26 @@ Merged patches (7.1.0 rc0):
Merged patches (7.2.0 rc0):
- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts)
Merged patches (8.0.0-rc1):
- f33ca8aed4 x86: rhel 9.2.0 machine type
---
hw/i386/pc.c | 147 ++++++++++++++++++++++-
hw/i386/pc_piix.c | 86 +++++++++++++-
hw/i386/pc_q35.c | 234 ++++++++++++++++++++++++++++++++++++-
hw/s390x/s390-virtio-ccw.c | 1 +
hw/i386/pc.c | 147 +++++++++++++++++++++-
hw/i386/pc_piix.c | 86 ++++++++++++-
hw/i386/pc_q35.c | 252 ++++++++++++++++++++++++++++++++++++-
include/hw/boards.h | 2 +
include/hw/i386/pc.h | 27 +++++
include/hw/i386/pc.h | 27 ++++
target/i386/cpu.c | 21 ++++
target/i386/kvm/kvm-cpu.c | 1 +
target/i386/kvm/kvm.c | 4 +
tests/qtest/pvpanic-test.c | 5 +-
10 files changed, 521 insertions(+), 7 deletions(-)
9 files changed, 538 insertions(+), 7 deletions(-)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 546b703cb4..c7b1350e64 100644
index 1489abf010..8abb1f872e 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -393,6 +393,149 @@ GlobalProperty pc_compat_1_4[] = {
@@ -407,6 +407,149 @@ GlobalProperty pc_compat_1_4[] = {
};
const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4);
@ -206,7 +211,7 @@ index 546b703cb4..c7b1350e64 100644
GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled)
{
GSIState *s;
@@ -1907,6 +2050,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
@@ -1944,6 +2087,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
pcmc->pvh_enabled = true;
pcmc->kvmclock_create_always = true;
assert(!mc->get_hotplug_handler);
@ -214,7 +219,7 @@ index 546b703cb4..c7b1350e64 100644
mc->get_hotplug_handler = pc_get_hotplug_handler;
mc->hotplug_allowed = pc_hotplug_allowed;
mc->cpu_index_to_instance_props = x86_cpu_index_to_props;
@@ -1917,7 +2061,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
@@ -1954,7 +2098,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
mc->has_hotpluggable_cpus = true;
mc->default_boot_order = "cad";
mc->block_default_type = IF_IDE;
@ -225,10 +230,10 @@ index 546b703cb4..c7b1350e64 100644
mc->wakeup = pc_machine_wakeup;
hc->pre_plug = pc_machine_device_pre_plug_cb;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 0985ff67d2..173a1fd10b 100644
index 14a794081e..3e330fd36f 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -53,6 +53,7 @@
@@ -54,6 +54,7 @@
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "sysemu/xen.h"
@ -236,7 +241,7 @@ index 0985ff67d2..173a1fd10b 100644
#ifdef CONFIG_XEN
#include <xen/hvm/hvm_info_table.h>
#include "hw/xen/xen_pt.h"
@@ -184,8 +185,8 @@ static void pc_init1(MachineState *machine,
@@ -198,8 +199,8 @@ static void pc_init1(MachineState *machine,
if (pcmc->smbios_defaults) {
MachineClass *mc = MACHINE_GET_CLASS(machine);
/* These values are guest ABI, do not change */
@ -247,7 +252,7 @@ index 0985ff67d2..173a1fd10b 100644
pcmc->smbios_uuid_encoded,
pcmc->smbios_stream_product,
pcmc->smbios_stream_version,
@@ -334,6 +335,7 @@ static void pc_init1(MachineState *machine,
@@ -351,6 +352,7 @@ static void pc_init1(MachineState *machine,
* hw_compat_*, pc_compat_*, or * pc_*_machine_options().
*/
@ -255,7 +260,7 @@ index 0985ff67d2..173a1fd10b 100644
static void pc_compat_2_3_fn(MachineState *machine)
{
X86MachineState *x86ms = X86_MACHINE(machine);
@@ -896,3 +898,83 @@ static void xenfv_3_1_machine_options(MachineClass *m)
@@ -899,3 +901,83 @@ static void xenfv_3_1_machine_options(MachineClass *m)
DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init,
xenfv_3_1_machine_options);
#endif
@ -304,7 +309,7 @@ index 0985ff67d2..173a1fd10b 100644
+ pcmc->kvmclock_create_always = false;
+ /* From pc_i440fx_5_1_machine_options() */
+ pcmc->pci_root_uid = 1;
+ pcmc->legacy_no_rng_seed = true;
+ pcmc->enforce_amd_1tb_hole = false;
+ compat_props_add(m->compat_props, hw_compat_rhel_9_1,
+ hw_compat_rhel_9_1_len);
+ compat_props_add(m->compat_props, hw_compat_rhel_9_0,
@ -340,10 +345,10 @@ index 0985ff67d2..173a1fd10b 100644
+DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760,
+ pc_machine_rhel760_options);
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index ea582254e3..97c3630021 100644
index dc0ba5f9e7..98601bb76f 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -198,8 +198,8 @@ static void pc_q35_init(MachineState *machine)
@@ -199,8 +199,8 @@ static void pc_q35_init(MachineState *machine)
if (pcmc->smbios_defaults) {
/* These values are guest ABI, do not change */
@ -354,7 +359,7 @@ index ea582254e3..97c3630021 100644
pcmc->smbios_uuid_encoded,
pcmc->smbios_stream_product,
pcmc->smbios_stream_version,
@@ -352,6 +352,7 @@ static void pc_q35_init(MachineState *machine)
@@ -354,6 +354,7 @@ static void pc_q35_init(MachineState *machine)
DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn)
@ -362,7 +367,7 @@ index ea582254e3..97c3630021 100644
static void pc_q35_machine_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
@@ -666,3 +667,232 @@ static void pc_q35_2_4_machine_options(MachineClass *m)
@@ -663,3 +664,250 @@ static void pc_q35_2_4_machine_options(MachineClass *m)
DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL,
pc_q35_2_4_machine_options);
@ -391,6 +396,23 @@ index ea582254e3..97c3630021 100644
+ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len);
+}
+
+static void pc_q35_init_rhel920(MachineState *machine)
+{
+ pc_q35_init(machine);
+}
+
+static void pc_q35_machine_rhel920_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ pc_q35_machine_rhel_options(m);
+ m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)";
+ pcmc->smbios_stream_product = "RHEL";
+ pcmc->smbios_stream_version = "9.2.0";
+}
+
+DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
+ pc_q35_machine_rhel920_options);
+
+static void pc_q35_init_rhel900(MachineState *machine)
+{
+ pc_q35_init(machine);
@ -399,11 +421,12 @@ index ea582254e3..97c3630021 100644
+static void pc_q35_machine_rhel900_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ pc_q35_machine_rhel_options(m);
+ pc_q35_machine_rhel920_options(m);
+ m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)";
+ m->alias = NULL;
+ pcmc->smbios_stream_product = "RHEL";
+ pcmc->smbios_stream_version = "9.0.0";
+ pcmc->legacy_no_rng_seed = true;
+ pcmc->enforce_amd_1tb_hole = false;
+ compat_props_add(m->compat_props, hw_compat_rhel_9_1,
+ hw_compat_rhel_9_1_len);
+ compat_props_add(m->compat_props, hw_compat_rhel_9_0,
@ -595,23 +618,11 @@ index ea582254e3..97c3630021 100644
+
+DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760,
+ pc_q35_machine_rhel760_options);
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 8d5221fbb1..ba640e3d9e 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -1213,6 +1213,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine)
static void ccw_machine_rhel860_class_options(MachineClass *mc)
{
ccw_machine_rhel900_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len);
/* All RHEL machines for prior major releases are deprecated */
mc->deprecation_reason = rhel_old_machine_deprecation;
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 2209d4e416..fd75f551b1 100644
index c5a965d27f..5e7446ee40 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -266,6 +266,8 @@ struct MachineClass {
@@ -268,6 +268,8 @@ struct MachineClass {
strList *allowed_dynamic_sysbus_devices;
bool auto_enable_numa_with_memhp;
bool auto_enable_numa_with_memdev;
@ -621,12 +632,12 @@ index 2209d4e416..fd75f551b1 100644
bool smbus_no_migration_support;
bool nvdimm_supported;
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 3754eaa97d..4266fe2fdb 100644
index 908a275736..4376f64a47 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -293,6 +293,33 @@ extern const size_t pc_compat_1_5_len;
extern GlobalProperty pc_compat_1_4[];
extern const size_t pc_compat_1_4_len;
@@ -293,6 +293,33 @@ extern const size_t pc_compat_1_4_len;
int pc_machine_kvm_type(MachineState *machine, const char *vm_type);
+extern GlobalProperty pc_rhel_compat[];
+extern const size_t pc_rhel_compat_len;
@ -659,10 +670,10 @@ index 3754eaa97d..4266fe2fdb 100644
static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \
{ \
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 22b681ca37..f7c526cbe6 100644
index 6576287e5b..0ef2bf1b93 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1832,9 +1832,13 @@ static const CPUCaches epyc_milan_cache_info = {
@@ -1834,9 +1834,13 @@ static const CPUCaches epyc_milan_cache_info = {
* PT in VMX operation
*/
@ -676,7 +687,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 0xd,
.vendor = CPUID_VENDOR_AMD,
.family = 15,
@@ -1855,6 +1859,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -1857,6 +1861,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "phenom",
@ -684,7 +695,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 5,
.vendor = CPUID_VENDOR_AMD,
.family = 16,
@@ -1887,6 +1892,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -1889,6 +1894,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "core2duo",
@ -692,7 +703,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -1929,6 +1935,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -1931,6 +1937,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "kvm64",
@ -700,7 +711,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 0xd,
.vendor = CPUID_VENDOR_INTEL,
.family = 15,
@@ -1970,6 +1977,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -1972,6 +1979,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "qemu32",
@ -708,7 +719,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 4,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -1984,6 +1992,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -1986,6 +1994,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "kvm32",
@ -716,7 +727,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 5,
.vendor = CPUID_VENDOR_INTEL,
.family = 15,
@@ -2014,6 +2023,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2016,6 +2025,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "coreduo",
@ -724,7 +735,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -2047,6 +2057,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2049,6 +2059,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "486",
@ -732,7 +743,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 1,
.vendor = CPUID_VENDOR_INTEL,
.family = 4,
@@ -2059,6 +2070,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2061,6 +2072,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "pentium",
@ -740,7 +751,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 1,
.vendor = CPUID_VENDOR_INTEL,
.family = 5,
@@ -2071,6 +2083,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2073,6 +2085,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "pentium2",
@ -748,7 +759,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 2,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -2083,6 +2096,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2085,6 +2098,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "pentium3",
@ -756,7 +767,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 3,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -2095,6 +2109,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2097,6 +2111,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "athlon",
@ -764,7 +775,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 2,
.vendor = CPUID_VENDOR_AMD,
.family = 6,
@@ -2110,6 +2125,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2112,6 +2127,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "n270",
@ -772,7 +783,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -2135,6 +2151,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2137,6 +2153,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Conroe",
@ -780,7 +791,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -2175,6 +2192,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2177,6 +2194,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Penryn",
@ -788,7 +799,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -3762,6 +3780,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -3893,6 +3911,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Opteron_G1",
@ -796,7 +807,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 5,
.vendor = CPUID_VENDOR_AMD,
.family = 15,
@@ -3782,6 +3801,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -3913,6 +3932,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Opteron_G2",
@ -804,7 +815,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 5,
.vendor = CPUID_VENDOR_AMD,
.family = 15,
@@ -3804,6 +3824,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -3935,6 +3955,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Opteron_G3",
@ -825,10 +836,10 @@ index 7237378a7d..7b8a3d5af0 100644
};
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index a213209379..81526a1575 100644
index de531842f6..8d82304609 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -3707,6 +3707,7 @@ static int kvm_get_msrs(X86CPU *cpu)
@@ -3822,6 +3822,7 @@ static int kvm_get_msrs(X86CPU *cpu)
struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries;
int ret, i;
uint64_t mtrr_top_bits;
@ -836,7 +847,7 @@ index a213209379..81526a1575 100644
kvm_msr_buf_reset(cpu);
@@ -4062,6 +4063,9 @@ static int kvm_get_msrs(X86CPU *cpu)
@@ -4177,6 +4178,9 @@ static int kvm_get_msrs(X86CPU *cpu)
break;
case MSR_KVM_ASYNC_PF_EN:
env->async_pf_en_msr = msrs[i].data;
@ -847,7 +858,7 @@ index a213209379..81526a1575 100644
case MSR_KVM_ASYNC_PF_INT:
env->async_pf_int_msr = msrs[i].data;
diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c
index bc7b7dfc39..96e6dee3a1 100644
index 78f1cf8186..ac954c9b06 100644
--- a/tests/qtest/pvpanic-test.c
+++ b/tests/qtest/pvpanic-test.c
@@ -17,7 +17,7 @@ static void test_panic_nopause(void)
@ -870,5 +881,5 @@ index bc7b7dfc39..96e6dee3a1 100644
val = qtest_inb(qts, 0x505);
g_assert_cmpuint(val, ==, 3);
--
2.31.1
2.39.1

@ -1,4 +1,4 @@
From badfb1290c8eea8a2e1769b2392c7899d5077698 Mon Sep 17 00:00:00 2001
From 738db8353055eb6fd902513949c6659af8b401d0 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 2 Sep 2020 09:39:41 +0200
Subject: Enable make check
@ -27,28 +27,37 @@ Rebase changes (7.0.0):
Rebase changes (7.1.0 rc0):
- Disable bcm2835-dma-test (added upstream)
Rebase changes (8.0.0-rc1):
- Removed chunks for disabling bios-table-test (protected upstream)
Rebase change (8.0.0-rc2):
- Disable new qemu-iotests execution
- Revert change in tco qtest (blocking test run)
Merged patches (6.1.0):
- 2f129df7d3 redhat: Enable the 'test-block-iothread' test again
Merged patches (7.1.0 rc0):
- 64d736640e RHEL-only: tests/avocado: Switch aarch64 tests from a53 to a57
---
.distro/qemu-kvm.spec.template | 5 ++---
.distro/qemu-kvm.spec.template | 4 ++--
tests/avocado/replay_kernel.py | 2 +-
tests/avocado/reverse_debugging.py | 2 +-
tests/avocado/tcg_plugins.py | 6 +++---
tests/avocado/tcg_plugins.py | 6 ++---
tests/qemu-iotests/meson.build | 34 ++++++++++++++---------------
tests/qtest/fuzz-e1000e-test.c | 2 +-
tests/qtest/fuzz-virtio-scsi-test.c | 2 +-
tests/qtest/intel-hda-test.c | 2 +-
tests/qtest/libqos/meson.build | 2 +-
tests/qtest/lpc-ich9-test.c | 2 +-
tests/qtest/meson.build | 7 +------
tests/qtest/meson.build | 2 --
tests/qtest/tco-test.c | 2 +-
tests/qtest/usb-hcd-xhci-test.c | 4 ++++
tests/qtest/virtio-net-failover.c | 1 +
12 files changed, 18 insertions(+), 19 deletions(-)
14 files changed, 35 insertions(+), 32 deletions(-)
diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py
index 00a26e4a0c..fe5ecf238a 100644
index f13456e1ec..2fee270a42 100644
--- a/tests/avocado/replay_kernel.py
+++ b/tests/avocado/replay_kernel.py
@@ -147,7 +147,7 @@ def test_aarch64_virt(self):
@ -61,10 +70,10 @@ index 00a26e4a0c..fe5ecf238a 100644
kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora'
'/linux/releases/29/Everything/aarch64/os/images/pxeboot'
diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py
index d2921e70c3..66d185ed42 100644
index 680c314cfc..71eccb8fb6 100644
--- a/tests/avocado/reverse_debugging.py
+++ b/tests/avocado/reverse_debugging.py
@@ -198,7 +198,7 @@ def test_aarch64_virt(self):
@@ -206,7 +206,7 @@ def test_aarch64_virt(self):
"""
:avocado: tags=arch:aarch64
:avocado: tags=machine:virt
@ -104,6 +113,49 @@ index 642d2e49e3..93b3afd823 100644
"""
kernel_path = self._grab_aarch64_kernel()
kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
diff --git a/tests/qemu-iotests/meson.build b/tests/qemu-iotests/meson.build
index 9735071a29..32002335f4 100644
--- a/tests/qemu-iotests/meson.build
+++ b/tests/qemu-iotests/meson.build
@@ -51,21 +51,21 @@ foreach format, speed: qemu_iotests_formats
check: true,
)
- foreach item: rc.stdout().strip().split()
- args = [qemu_iotests_check_cmd,
- '-tap', '-' + format, item,
- '--source-dir', meson.current_source_dir(),
- '--build-dir', meson.current_build_dir()]
- # Some individual tests take as long as 45 seconds
- # Bump the timeout to 3 minutes for some headroom
- # on slow machines to minimize spurious failures
- test('io-' + format + '-' + item,
- python,
- args: args,
- depends: qemu_iotests_binaries,
- env: qemu_iotests_env,
- protocol: 'tap',
- timeout: 180,
- suite: suites)
- endforeach
+# foreach item: rc.stdout().strip().split()
+# args = [qemu_iotests_check_cmd,
+# '-tap', '-' + format, item,
+# '--source-dir', meson.current_source_dir(),
+# '--build-dir', meson.current_build_dir()]
+# # Some individual tests take as long as 45 seconds
+# # Bump the timeout to 3 minutes for some headroom
+# # on slow machines to minimize spurious failures
+# test('io-' + format + '-' + item,
+# python,
+# args: args,
+# depends: qemu_iotests_binaries,
+# env: qemu_iotests_env,
+# protocol: 'tap',
+# timeout: 180,
+# suite: suites)
+# endforeach
endforeach
diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c
index 5052883fb6..b5286f4b12 100644
--- a/tests/qtest/fuzz-e1000e-test.c
@ -144,10 +196,10 @@ index d4a8db6fd6..1a796ec15a 100644
qtest_outl(s, 0xcf8, 0x80000804);
diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build
index 32f028872c..1e78a1a055 100644
index cc209a8de5..42a7c529c9 100644
--- a/tests/qtest/libqos/meson.build
+++ b/tests/qtest/libqos/meson.build
@@ -43,7 +43,7 @@ libqos_srcs = files(
@@ -44,7 +44,7 @@ libqos_srcs = files(
'virtio-rng.c',
'virtio-scsi.c',
'virtio-serial.c',
@ -170,18 +222,10 @@ index 8ac95b89f7..cd2102555c 100644
qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index c07a5b1a5f..9df3f9f8b9 100644
index 85ea4e8d99..893afc8eeb 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -82,7 +82,6 @@ qtests_i386 = \
config_all_devices.has_key('CONFIG_Q35') and \
config_all_devices.has_key('CONFIG_VIRTIO_PCI') and \
slirp.found() ? ['virtio-net-failover'] : []) + \
- (unpack_edk2_blobs ? ['bios-tables-test'] : []) + \
qtests_pci + \
qtests_cxl + \
['fdc-test',
@@ -96,7 +95,6 @@ qtests_i386 = \
@@ -94,7 +94,6 @@ qtests_i386 = \
'drive_del-test',
'tco-test',
'cpu-plug-test',
@ -189,24 +233,7 @@ index c07a5b1a5f..9df3f9f8b9 100644
'vmgenid-test',
'migration-test',
'test-x86-cpuid-compat',
@@ -209,15 +207,13 @@ qtests_arm = \
# TODO: once aarch64 TCG is fixed on ARM 32 bit host, make bios-tables-test unconditional
qtests_aarch64 = \
- (cpu != 'arm' and unpack_edk2_blobs ? ['bios-tables-test'] : []) + \
(config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-test'] : []) + \
(config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-swtpm-test'] : []) + \
(config_all_devices.has_key('CONFIG_XLNX_ZYNQMP_ARM') ? ['xlnx-can-test', 'fuzz-xlnx-dp-test'] : []) + \
['arm-cpu-features',
'numa-test',
'boot-serial-test',
- 'migration-test',
- 'bcm2835-dma-test']
+ 'migration-test']
qtests_s390x = \
(slirp.found() ? ['pxe-test', 'test-netfilter'] : []) + \
@@ -225,7 +221,6 @@ qtests_s390x = \
@@ -223,7 +222,6 @@ qtests_s390x = \
(config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \
['boot-serial-test',
'drive_del-test',
@ -214,6 +241,19 @@ index c07a5b1a5f..9df3f9f8b9 100644
'virtio-ccw-test',
'cpu-plug-test',
'migration-test']
diff --git a/tests/qtest/tco-test.c b/tests/qtest/tco-test.c
index 0547d41173..3756ce82d8 100644
--- a/tests/qtest/tco-test.c
+++ b/tests/qtest/tco-test.c
@@ -60,7 +60,7 @@ static void test_init(TestData *d)
QTestState *qs;
qs = qtest_initf("-machine q35 %s %s",
- d->noreboot ? "-global ICH9-LPC.noreboot=true" : "",
+ d->noreboot ? "" : "-global ICH9-LPC.noreboot=false",
!d->args ? "" : d->args);
qtest_irq_intercept_in(qs, "ioapic");
diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c
index 10ef9d2a91..3855873050 100644
--- a/tests/qtest/usb-hcd-xhci-test.c
@ -257,5 +297,5 @@ index 4a809590bf..1bf3fa641c 100644
"-device pcie-root-port,id=root1,addr=0x2,bus=pcie.0,chassis=2 "
--
2.31.1
2.39.1

@ -1,4 +1,4 @@
From 0804844e4755377be6d2ebad578794ad9f4f3f31 Mon Sep 17 00:00:00 2001
From 34cb4f7ddd762ec46ed1a6a4261aebde39360ca4 Mon Sep 17 00:00:00 2001
From: Bandan Das <bsd@redhat.com>
Date: Tue, 3 Dec 2013 20:05:13 +0100
Subject: vfio: cap number of devices that can be assigned
@ -32,7 +32,7 @@ Signed-off-by: Bandan Das <bsd@redhat.com>
2 files changed, 29 insertions(+), 1 deletion(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 939dcc3d4a..acbc6673ce 100644
index ec9a854361..a779053be3 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -48,6 +48,9 @@
@ -77,7 +77,7 @@ index 939dcc3d4a..acbc6673ce 100644
if (!vbasedev->sysfsdev) {
if (!(~vdev->host.domain || ~vdev->host.bus ||
~vdev->host.slot || ~vdev->host.function)) {
@@ -3293,6 +3317,9 @@ static Property vfio_pci_dev_properties[] = {
@@ -3294,6 +3318,9 @@ static Property vfio_pci_dev_properties[] = {
DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice,
no_geforce_quirks, false),
@ -88,7 +88,7 @@ index 939dcc3d4a..acbc6673ce 100644
false),
DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd,
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 7c236a52f4..7b7d036a8f 100644
index 177abcc8fb..45235d38ba 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -140,6 +140,7 @@ struct VFIOPCIDevice {
@ -100,5 +100,5 @@ index 7c236a52f4..7b7d036a8f 100644
uint32_t device_id;
uint32_t sub_vendor_id;
--
2.31.1
2.39.1

@ -1,4 +1,4 @@
From 283a0e258dc2f3b83c58e6f948bafe430cd2c1d5 Mon Sep 17 00:00:00 2001
From 8964a3e8835992442902d35b011a708787366d82 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Wed, 4 Dec 2013 18:53:17 +0100
Subject: Add support statement to -help output
@ -21,7 +21,7 @@ Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
1 file changed, 9 insertions(+)
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 5115221efe..17188df528 100644
index ea20b23e4c..ad4173138d 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -834,9 +834,17 @@ static void version(void)
@ -51,5 +51,5 @@ index 5115221efe..17188df528 100644
}
--
2.31.1
2.39.1

@ -1,4 +1,4 @@
From d8ded821aa698b3b03bd9089fbd6c2b33da87b9e Mon Sep 17 00:00:00 2001
From 0b72d348fa0714de641ee242e5cee97df006e8fd Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 8 Jul 2020 08:35:50 +0200
Subject: Use qemu-kvm in documentation instead of qemu-system-<arch>
@ -36,10 +36,10 @@ index 52d6454b93..d74dbdeca9 100644
.. |I2C| replace:: I\ :sup:`2`\ C
.. |I2S| replace:: I\ :sup:`2`\ S
diff --git a/qemu-options.hx b/qemu-options.hx
index 7f99d15b23..ea02ca3a45 100644
index 59bdf67a2c..52b49f1f6a 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3300,11 +3300,11 @@ SRST
@@ -3296,11 +3296,11 @@ SRST
::
@ -57,5 +57,5 @@ index 7f99d15b23..ea02ca3a45 100644
``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]``
Establish a vhost-vdpa netdev.
--
2.31.1
2.39.1

@ -1,60 +0,0 @@
From 9c6acadb444c9300d7c18b6939ce4f96484aeacc Mon Sep 17 00:00:00 2001
From: David Gibson <dgibson@redhat.com>
Date: Wed, 6 Feb 2019 03:58:56 +0000
Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts
RH-Author: David Gibson <dgibson@redhat.com>
Message-id: <20190206035856.19058-1-dgibson@redhat.com>
Patchwork-id: 84246
O-Subject: [RHELAV-8.0/rhel qemu-kvm PATCH] BZ1653590: Require at least 64kiB pages for downstream guests & hosts
Bugzilla: 1653590
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
RH-Acked-by: Serhii Popovych <spopovyc@redhat.com>
RH-Acked-by: Thomas Huth <thuth@redhat.com>
Most current POWER guests require 64kiB page support, so that's the default
for the cap-hpt-max-pagesize option in qemu which limits available guest
page sizes. We warn if the value is set smaller than that, but don't
outright fail upstream, because we need to allow for the possibility of
guest (and/or host) kernels configured for 4kiB page sizes.
Downstream, however, we simply don't support 4kiB pagesize configured
kernels in guest or host, so we can have qemu simply error out in this
situation.
Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified
it failed immediately with a qemu error
Signed-off-by: David Gibson <dgibson@redhat.com>
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
---
hw/ppc/spapr_caps.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index b4283055c1..59b88aadff 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -329,12 +329,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr,
uint8_t val, Error **errp)
{
+#if 0 /* disabled for RHEL */
if (val < 12) {
error_setg(errp, "Require at least 4kiB hpt-max-page-size");
return;
} else if (val < 16) {
warn_report("Many guests require at least 64kiB hpt-max-page-size");
}
+#else /* Only page sizes >=64kiB supported for RHEL */
+ if (val < 16) {
+ error_setg(errp, "Require at least 64kiB hpt-max-page-size");
+ return;
+ }
+#endif
spapr_check_pagesize(spapr, qemu_minrampagesize(), errp);
}
--
2.31.1

@ -1,4 +1,4 @@
From 02fde2a0cbd679ebd4104fe5522572c31ec23abd Mon Sep 17 00:00:00 2001
From bd6bcebfd783fa49e283d035d378fb5240423d84 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 20 Aug 2021 18:25:12 +0200
Subject: qcow2: Deprecation warning when opening v2 images rw
@ -44,10 +44,10 @@ Rebase notes (6.1.0):
2 files changed, 7 insertions(+)
diff --git a/block/qcow2.c b/block/qcow2.c
index 4d6666d3ff..d2ba263e9d 100644
index 30fd53fa64..22084730f9 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1336,6 +1336,12 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
@@ -1337,6 +1337,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
ret = -ENOTSUP;
goto fail;
}
@ -61,7 +61,7 @@ index 4d6666d3ff..d2ba263e9d 100644
s->qcow_version = header.version;
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index cc9f1a5891..6a13757177 100644
index 6b32c7fbfa..6ddda2ee64 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -83,6 +83,7 @@ _filter_qemu()
@ -73,5 +73,5 @@ index cc9f1a5891..6a13757177 100644
}
--
2.31.1
2.39.1

@ -1,7 +1,7 @@
From 48f45171b89b8ed24f2b2484d63b00ea7818b5c3 Mon Sep 17 00:00:00 2001
From 78a42cf27aa519bb71214443ab570b40e156fa9c Mon Sep 17 00:00:00 2001
From: Kfir Manor <kfir@daynix.com>
Date: Sun, 22 Jan 2023 17:33:07 +0200
Subject: [PATCH 9/9] qga/linux: add usb support to guest-get-fsinfo
Subject: qga/linux: add usb support to guest-get-fsinfo
RH-Author: Kostiantyn Kostiuk <kkostiuk@redhat.com>
RH-MergeRequest: 140: qga/linux: add usb support to guest-get-fsinfo
@ -16,15 +16,19 @@ Upstream patch: https://patchew.org/QEMU/20230122153307.1050593-1-kfir@daynix.co
Signed-off-by: Kfir Manor <kfir@daynix.com>
Reviewed-by: Konstantin Kostiuk <kkostiuk@redhat.com>
Signed-off-by: Konstantin Kostiuk <kkostiuk@redhat.com>
Patch-name: kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch
Patch-id: 72
Patch-present-in-specfile: True
---
qga/commands-posix.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 32493d6383..f1b2b87c13 100644
index 079689d79a..97754930c1 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -877,7 +877,9 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
@@ -879,7 +879,9 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
g_str_equal(driver, "sym53c8xx") ||
g_str_equal(driver, "virtio-pci") ||
g_str_equal(driver, "ahci") ||
@ -35,7 +39,7 @@ index 32493d6383..f1b2b87c13 100644
break;
}
@@ -974,6 +976,8 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
@@ -976,6 +978,8 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
}
} else if (strcmp(driver, "nvme") == 0) {
disk->bus_type = GUEST_DISK_BUS_TYPE_NVME;
@ -45,5 +49,5 @@ index 32493d6383..f1b2b87c13 100644
g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath);
goto cleanup;
--
2.31.1
2.39.1

@ -0,0 +1,110 @@
From bd5d81d2865c239ffea0fecf32476732149ad05c Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 15 Feb 2023 02:03:17 -0500
Subject: Add RHEL 9.2.0 compat structure
Adding compatibility bits necessary to keep 9.2.0 machine
types same after rebase to 8.0.
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
Rebase notes (8.0.0 rc4):
- Added migration.x-preempt-pre-7-2 compat)
---
hw/arm/virt.c | 1 +
hw/core/machine.c | 10 ++++++++++
hw/i386/pc_piix.c | 2 ++
hw/i386/pc_q35.c | 3 +++
hw/s390x/s390-virtio-ccw.c | 1 +
include/hw/boards.h | 3 +++
6 files changed, 20 insertions(+)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 1ae1654be5..9be53e9355 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3669,6 +3669,7 @@ type_init(rhel_machine_init);
static void rhel920_virt_options(MachineClass *mc)
{
compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
}
DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 5aa567fad3..0e0120b7f2 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -52,6 +52,16 @@ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
const char *rhel_old_machine_deprecation =
"machine types for previous major releases are deprecated";
+GlobalProperty hw_compat_rhel_9_2[] = {
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
+ { "e1000e", "migrate-timadj", "off" },
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
+ { "virtio-mem", "x-early-migration", "false" },
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
+ { "migration", "x-preempt-pre-7-2", "true" },
+};
+const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2);
+
/*
* Mostly the same as hw_compat_7_0
*/
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 3e330fd36f..90fb6e2e03 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -947,6 +947,8 @@ static void pc_machine_rhel760_options(MachineClass *m)
/* From pc_i440fx_5_1_machine_options() */
pcmc->pci_root_uid = 1;
pcmc->enforce_amd_1tb_hole = false;
+ compat_props_add(m->compat_props, hw_compat_rhel_9_2,
+ hw_compat_rhel_9_2_len);
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
hw_compat_rhel_9_1_len);
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 98601bb76f..8945b69175 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -701,6 +701,9 @@ static void pc_q35_machine_rhel920_options(MachineClass *m)
m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)";
pcmc->smbios_stream_product = "RHEL";
pcmc->smbios_stream_version = "9.2.0";
+
+ compat_props_add(m->compat_props, hw_compat_rhel_9_2,
+ hw_compat_rhel_9_2_len);
}
DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index dcd3b966b0..6a0b93c63d 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -1211,6 +1211,7 @@ static void ccw_machine_rhel920_instance_options(MachineState *machine)
static void ccw_machine_rhel920_class_options(MachineClass *mc)
{
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
}
DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true);
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 5e7446ee40..5f08bd7550 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -461,6 +461,9 @@ extern const size_t hw_compat_2_2_len;
extern GlobalProperty hw_compat_2_1[];
extern const size_t hw_compat_2_1_len;
+extern GlobalProperty hw_compat_rhel_9_2[];
+extern const size_t hw_compat_rhel_9_2_len;
+
extern GlobalProperty hw_compat_rhel_9_1[];
extern const size_t hw_compat_rhel_9_1_len;
--
2.39.1

@ -1,26 +0,0 @@
From 21ed34787b9492c2cfe3d8fc12a32748bcf02307 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 9 Nov 2022 07:08:32 -0500
Subject: Addd 7.2 compat bits for RHEL 9.1 machine type
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
hw/core/machine.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 9edec1ca05..3d851d34da 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -54,6 +54,8 @@ GlobalProperty hw_compat_rhel_9_1[] = {
{ "arm-gicv3-common", "force-8-bit-prio", "on" },
/* hw_compat_rhel_9_1 from hw_compat_7_0 */
{ "nvme-ns", "eui64-default", "on"},
+ /* hw_compat_rhel_9_1 from hw_compat_7_1 */
+ { "virtio-device", "queue_reset", "false" },
};
const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1);
--
2.31.1

@ -0,0 +1,76 @@
From c6eaf73adda2e87fe91c9a3836f45dd58a553e06 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Mon, 27 Mar 2023 15:14:03 +0200
Subject: redhat: hw/i386/pc: Update x86 machine type compatibility for QEMU
8.0.0 update
Add pc_rhel_9_2_compat based on upstream pc_compat_7_2.
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
hw/i386/pc.c | 6 ++++++
hw/i386/pc_piix.c | 2 ++
hw/i386/pc_q35.c | 2 ++
include/hw/i386/pc.h | 3 +++
4 files changed, 13 insertions(+)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 8abb1f872e..f216922cee 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -429,6 +429,12 @@ GlobalProperty pc_rhel_compat[] = {
};
const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat);
+GlobalProperty pc_rhel_9_2_compat[] = {
+ /* pc_rhel_9_2_compat from pc_compat_7_2 */
+ { "ICH9-LPC", "noreboot", "true" },
+};
+const size_t pc_rhel_9_2_compat_len = G_N_ELEMENTS(pc_rhel_9_2_compat);
+
GlobalProperty pc_rhel_9_0_compat[] = {
/* pc_rhel_9_0_compat from pc_compat_6_2 */
{ "virtio-mem", "unplugged-inaccessible", "off" },
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 90fb6e2e03..fc704d783f 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -949,6 +949,8 @@ static void pc_machine_rhel760_options(MachineClass *m)
pcmc->enforce_amd_1tb_hole = false;
compat_props_add(m->compat_props, hw_compat_rhel_9_2,
hw_compat_rhel_9_2_len);
+ compat_props_add(m->compat_props, pc_rhel_9_2_compat,
+ pc_rhel_9_2_compat_len);
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
hw_compat_rhel_9_1_len);
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 8945b69175..e97655616a 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -704,6 +704,8 @@ static void pc_q35_machine_rhel920_options(MachineClass *m)
compat_props_add(m->compat_props, hw_compat_rhel_9_2,
hw_compat_rhel_9_2_len);
+ compat_props_add(m->compat_props, pc_rhel_9_2_compat,
+ pc_rhel_9_2_compat_len);
}
DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 4376f64a47..d218ad1628 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -296,6 +296,9 @@ int pc_machine_kvm_type(MachineState *machine, const char *vm_type);
extern GlobalProperty pc_rhel_compat[];
extern const size_t pc_rhel_compat_len;
+extern GlobalProperty pc_rhel_9_2_compat[];
+extern const size_t pc_rhel_9_2_compat_len;
+
extern GlobalProperty pc_rhel_9_0_compat[];
extern const size_t pc_rhel_9_0_compat_len;
--
2.39.1

@ -0,0 +1,83 @@
From 8173d2eabaf77312d36b00c618f6770948b80593 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Mon, 17 Apr 2023 01:24:18 -0400
Subject: Disable unwanted new devices
QEMU 8.0 adds two new device we do not want to support that can't
be disabled using configure switch.
1) ide-cf - virtual CompactFlash card
2) i2c-echo - testing echo device
Use manual disabling of the device by changing code (1) and meson configs (2).
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
hw/ide/qdev.c | 9 +++++++++
hw/misc/meson.build | 3 ++-
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index 1b3b4da01d..454bfa5783 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -283,10 +283,13 @@ static void ide_cd_realize(IDEDevice *dev, Error **errp)
ide_dev_initfn(dev, IDE_CD, errp);
}
+/* Disabled for Red Hat Enterprise Linux */
+#if 0
static void ide_cf_realize(IDEDevice *dev, Error **errp)
{
ide_dev_initfn(dev, IDE_CFATA, errp);
}
+#endif
#define DEFINE_IDE_DEV_PROPERTIES() \
DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \
@@ -346,6 +349,8 @@ static const TypeInfo ide_cd_info = {
.class_init = ide_cd_class_init,
};
+/* Disabled for Red Hat Enterprise Linux */
+#if 0
static Property ide_cf_properties[] = {
DEFINE_IDE_DEV_PROPERTIES(),
DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf),
@@ -371,6 +376,7 @@ static const TypeInfo ide_cf_info = {
.instance_size = sizeof(IDEDrive),
.class_init = ide_cf_class_init,
};
+#endif
static void ide_device_class_init(ObjectClass *klass, void *data)
{
@@ -396,7 +402,10 @@ static void ide_register_types(void)
type_register_static(&ide_bus_info);
type_register_static(&ide_hd_info);
type_register_static(&ide_cd_info);
+/* Disabled for Red Hat Enterprise Linux */
+#if 0
type_register_static(&ide_cf_info);
+#endif
type_register_static(&ide_device_type_info);
}
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
index a40245ad44..9cc5a61ed7 100644
--- a/hw/misc/meson.build
+++ b/hw/misc/meson.build
@@ -128,7 +128,8 @@ softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_rng.c'))
softmmu_ss.add(when: 'CONFIG_GRLIB', if_true: files('grlib_ahb_apb_pnp.c'))
-softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c'))
+# Disabled for Red Hat Enterprise Linux
+# softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c'))
specific_ss.add(when: 'CONFIG_AVR_POWER', if_true: files('avr_power.c'))
--
2.39.1

@ -1,47 +0,0 @@
From 27c188c6a4cbd908269cf06affd24025708ecb5c Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Thu, 17 Nov 2022 16:47:16 +0100
Subject: redhat: Update s390x machine type compatibility for QEMU 7.2.0 update
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2143585
Upstream Status: n/a (rhel-only)
Add the compatibility handling for the rebase from QEMU 7.1 to 7.2,
i.e. the settings from ccw_machine_7_1_class_options() and
ccw_machine_7_1_instance_options() to the rhel9.1.0 machine type
(earlier settings have been added by previous rebases already).
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
hw/s390x/s390-virtio-ccw.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index ba640e3d9e..97e868ada0 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -1195,12 +1195,21 @@ static void ccw_machine_rhel900_instance_options(MachineState *machine)
static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 };
s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat);
+ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE);
}
static void ccw_machine_rhel900_class_options(MachineClass *mc)
{
+ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc);
+ static GlobalProperty compat[] = {
+ { TYPE_S390_PCI_DEVICE, "interpret", "off", },
+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", },
+ };
+
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len);
+ s390mc->max_threads = S390_MAX_CPUS;
}
DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true);
--
2.31.1

@ -1,43 +0,0 @@
From c1a21266d8bed27f1ef1f705818fde5f9350b73f Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Wed, 23 Nov 2022 14:15:37 +0100
Subject: redhat: aarch64: add rhel9.2.0 virt machine type
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2131982
Upstream: RHEL only
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
hw/arm/virt.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index dfcab40a73..0a94f31dd1 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3518,14 +3518,21 @@ static void rhel_machine_init(void)
}
type_init(rhel_machine_init);
+static void rhel920_virt_options(MachineClass *mc)
+{
+}
+DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0)
+
static void rhel900_virt_options(MachineClass *mc)
{
VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
+ rhel920_virt_options(mc);
+
compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
/* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */
vmc->no_tcg_lpa2 = true;
}
-DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0)
+DEFINE_RHEL_MACHINE(9, 0, 0)
--
2.31.1

@ -1,62 +0,0 @@
From a932b8d4296066be01613ada84241b501488f99f Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Thu, 17 Nov 2022 17:03:24 +0100
Subject: redhat: Add new rhel-9.2.0 s390x machine type
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2136473
Upstream Status: n/a (rhel-only)
RHEL 9.2 will be an EUS release - we want to have a new machine
type here to make sure that we have a spot where we can wire up
fixes later.
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
hw/s390x/s390-virtio-ccw.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 97e868ada0..aa142a1a4e 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -1190,10 +1190,21 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false);
#endif
+static void ccw_machine_rhel920_instance_options(MachineState *machine)
+{
+}
+
+static void ccw_machine_rhel920_class_options(MachineClass *mc)
+{
+}
+DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true);
+
static void ccw_machine_rhel900_instance_options(MachineState *machine)
{
static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 };
+ ccw_machine_rhel920_instance_options(machine);
+
s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat);
s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE);
}
@@ -1206,12 +1217,14 @@ static void ccw_machine_rhel900_class_options(MachineClass *mc)
{ TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", },
};
+ ccw_machine_rhel920_class_options(mc);
+
compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len);
s390mc->max_threads = S390_MAX_CPUS;
}
-DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true);
+DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", false);
static void ccw_machine_rhel860_instance_options(MachineState *machine)
{
--
2.31.1

@ -1,75 +0,0 @@
From f33ca8aed4744238230f1f2cc47df77aa4c9e0ac Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Thu, 17 Nov 2022 12:36:30 +0000
Subject: x86: rhel 9.2.0 machine type
Add a 9.2.0 x86 machine type, and fix up the compatibility
for 9.0.0 and older.
pc_compat_7_1 and pc_compat_7_0 are both empty upstream so there's
nothing to do there.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
hw/i386/pc_piix.c | 1 +
hw/i386/pc_q35.c | 21 ++++++++++++++++++++-
2 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 173a1fd10b..fc06877344 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -944,6 +944,7 @@ static void pc_machine_rhel760_options(MachineClass *m)
/* From pc_i440fx_5_1_machine_options() */
pcmc->pci_root_uid = 1;
pcmc->legacy_no_rng_seed = true;
+ pcmc->enforce_amd_1tb_hole = false;
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
hw_compat_rhel_9_1_len);
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 97c3630021..52cfe3bf45 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -692,6 +692,23 @@ static void pc_q35_machine_rhel_options(MachineClass *m)
compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len);
}
+static void pc_q35_init_rhel920(MachineState *machine)
+{
+ pc_q35_init(machine);
+}
+
+static void pc_q35_machine_rhel920_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ pc_q35_machine_rhel_options(m);
+ m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)";
+ pcmc->smbios_stream_product = "RHEL";
+ pcmc->smbios_stream_version = "9.2.0";
+}
+
+DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
+ pc_q35_machine_rhel920_options);
+
static void pc_q35_init_rhel900(MachineState *machine)
{
pc_q35_init(machine);
@@ -700,11 +717,13 @@ static void pc_q35_init_rhel900(MachineState *machine)
static void pc_q35_machine_rhel900_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_q35_machine_rhel_options(m);
+ pc_q35_machine_rhel920_options(m);
m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)";
+ m->alias = NULL;
pcmc->smbios_stream_product = "RHEL";
pcmc->smbios_stream_version = "9.0.0";
pcmc->legacy_no_rng_seed = true;
+ pcmc->enforce_amd_1tb_hole = false;
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
hw_compat_rhel_9_1_len);
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
--
2.31.1

@ -1,82 +0,0 @@
From 6aebc271d95f9c59cb63c923b6ce35f16fce10e4 Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Mon, 16 Jan 2023 07:17:23 -0500
Subject: [PATCH 30/31] KVM: keep track of running ioctls
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 138: accel: introduce accelerator blocker API
RH-Bugzilla: 1979276
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [2/3] f566f81dda26ae733008f32261ecd1253ec1796d (eesposit/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276
commit a27dd2de68f37ba96fe164a42121daa5f0750afc
Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Fri Nov 11 10:47:57 2022 -0500
KVM: keep track of running ioctls
Using the new accel-blocker API, mark where ioctls are being called
in KVM. Next, we will implement the critical section that will take
care of performing memslots modifications atomically, therefore
preventing any new ioctl from running and allowing the running ones
to finish.
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Message-Id: <20221111154758.1372674-3-eesposit@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
accel/kvm/kvm-all.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index f99b0becd8..ff660fd469 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2310,6 +2310,7 @@ static int kvm_init(MachineState *ms)
assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size());
s->sigmask_len = 8;
+ accel_blocker_init();
#ifdef KVM_CAP_SET_GUEST_DEBUG
QTAILQ_INIT(&s->kvm_sw_breakpoints);
@@ -3014,7 +3015,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
va_end(ap);
trace_kvm_vm_ioctl(type, arg);
+ accel_ioctl_begin();
ret = ioctl(s->vmfd, type, arg);
+ accel_ioctl_end();
if (ret == -1) {
ret = -errno;
}
@@ -3032,7 +3035,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
va_end(ap);
trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg);
+ accel_cpu_ioctl_begin(cpu);
ret = ioctl(cpu->kvm_fd, type, arg);
+ accel_cpu_ioctl_end(cpu);
if (ret == -1) {
ret = -errno;
}
@@ -3050,7 +3055,9 @@ int kvm_device_ioctl(int fd, int type, ...)
va_end(ap);
trace_kvm_device_ioctl(fd, type, arg);
+ accel_ioctl_begin();
ret = ioctl(fd, type, arg);
+ accel_ioctl_end();
if (ret == -1) {
ret = -errno;
}
--
2.31.1

@ -1,140 +0,0 @@
From 0c19fb7c4a22a30830152b224b2e66963f829a7a Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Thu, 19 Jan 2023 18:24:24 +0100
Subject: [PATCH 19/20] Revert "vhost-user: Introduce nested event loop in
vhost_user_read()"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Laurent Vivier <lvivier@redhat.com>
RH-MergeRequest: 146: Fix vhost-user with dpdk
RH-Bugzilla: 2155173
RH-Acked-by: Cindy Lu <lulu@redhat.com>
RH-Acked-by: Greg Kurz (RH) <gkurz@redhat.com>
RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
RH-Commit: [2/2] 9b67041f92f29f70b7ccb41d8087801e4e4e38af (lvivier/qemu-kvm-centos)
This reverts commit a7f523c7d114d445c5d83aecdba3efc038e5a692.
The nested event loop is broken by design. It's only user was removed.
Drop the code as well so that nobody ever tries to use it again.
I had to fix a couple of trivial conflicts around return values because
of 025faa872bcf ("vhost-user: stick to -errno error return convention").
Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <20230119172424.478268-3-groug@kaod.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Maxime Coquelin <maxime.coquelin@redhat.com>
(cherry picked from commit 4382138f642f69fdbc79ebf4e93d84be8061191f)
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
hw/virtio/vhost-user.c | 65 ++++--------------------------------------
1 file changed, 5 insertions(+), 60 deletions(-)
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 0ac00eb901..7cb49c50f9 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -305,19 +305,8 @@ static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
return 0;
}
-struct vhost_user_read_cb_data {
- struct vhost_dev *dev;
- VhostUserMsg *msg;
- GMainLoop *loop;
- int ret;
-};
-
-static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
- gpointer opaque)
+static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
{
- struct vhost_user_read_cb_data *data = opaque;
- struct vhost_dev *dev = data->dev;
- VhostUserMsg *msg = data->msg;
struct vhost_user *u = dev->opaque;
CharBackend *chr = u->user->chr;
uint8_t *p = (uint8_t *) msg;
@@ -325,8 +314,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
r = vhost_user_read_header(dev, msg);
if (r < 0) {
- data->ret = r;
- goto end;
+ return r;
}
/* validate message size is sane */
@@ -334,8 +322,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
error_report("Failed to read msg header."
" Size %d exceeds the maximum %zu.", msg->hdr.size,
VHOST_USER_PAYLOAD_SIZE);
- data->ret = -EPROTO;
- goto end;
+ return -EPROTO;
}
if (msg->hdr.size) {
@@ -346,53 +333,11 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
int saved_errno = errno;
error_report("Failed to read msg payload."
" Read %d instead of %d.", r, msg->hdr.size);
- data->ret = r < 0 ? -saved_errno : -EIO;
- goto end;
+ return r < 0 ? -saved_errno : -EIO;
}
}
-end:
- g_main_loop_quit(data->loop);
- return G_SOURCE_REMOVE;
-}
-
-static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
-{
- struct vhost_user *u = dev->opaque;
- CharBackend *chr = u->user->chr;
- GMainContext *prev_ctxt = chr->chr->gcontext;
- GMainContext *ctxt = g_main_context_new();
- GMainLoop *loop = g_main_loop_new(ctxt, FALSE);
- struct vhost_user_read_cb_data data = {
- .dev = dev,
- .loop = loop,
- .msg = msg,
- .ret = 0
- };
-
- /*
- * We want to be able to monitor the slave channel fd while waiting
- * for chr I/O. This requires an event loop, but we can't nest the
- * one to which chr is currently attached : its fd handlers might not
- * be prepared for re-entrancy. So we create a new one and switch chr
- * to use it.
- */
- qemu_chr_be_update_read_handlers(chr->chr, ctxt);
- qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data);
-
- g_main_loop_run(loop);
-
- /*
- * Restore the previous event loop context. This also destroys/recreates
- * event sources : this guarantees that all pending events in the original
- * context that have been processed by the nested loop are purged.
- */
- qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt);
-
- g_main_loop_unref(loop);
- g_main_context_unref(ctxt);
-
- return data.ret;
+ return 0;
}
static int process_message_reply(struct vhost_dev *dev,
--
2.31.1

@ -1,143 +0,0 @@
From 9fb47ad317ad8cdda9960190d499ad6c3a9817f0 Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Thu, 19 Jan 2023 18:24:23 +0100
Subject: [PATCH 18/20] Revert "vhost-user: Monitor slave channel in
vhost_user_read()"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Laurent Vivier <lvivier@redhat.com>
RH-MergeRequest: 146: Fix vhost-user with dpdk
RH-Bugzilla: 2155173
RH-Acked-by: Cindy Lu <lulu@redhat.com>
RH-Acked-by: Greg Kurz (RH) <gkurz@redhat.com>
RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
RH-Commit: [1/2] c583a7f121ca9c93c9a2ad17bf0ccf5c1241dc99 (lvivier/qemu-kvm-centos)
This reverts commit db8a3772e300c1a656331a92da0785d81667dc81.
Motivation : this is breaking vhost-user with DPDK as reported in [0].
Received unexpected msg type. Expected 22 received 40
Fail to update device iotlb
Received unexpected msg type. Expected 40 received 22
Received unexpected msg type. Expected 22 received 11
Fail to update device iotlb
Received unexpected msg type. Expected 11 received 22
vhost VQ 1 ring restore failed: -71: Protocol error (71)
Received unexpected msg type. Expected 22 received 11
Fail to update device iotlb
Received unexpected msg type. Expected 11 received 22
vhost VQ 0 ring restore failed: -71: Protocol error (71)
unable to start vhost net: 71: falling back on userspace virtio
The failing sequence that leads to the first error is :
- QEMU sends a VHOST_USER_GET_STATUS (40) request to DPDK on the master
socket
- QEMU starts a nested event loop in order to wait for the
VHOST_USER_GET_STATUS response and to be able to process messages from
the slave channel
- DPDK sends a couple of legitimate IOTLB miss messages on the slave
channel
- QEMU processes each IOTLB request and sends VHOST_USER_IOTLB_MSG (22)
updates on the master socket
- QEMU assumes to receive a response for the latest VHOST_USER_IOTLB_MSG
but it gets the response for the VHOST_USER_GET_STATUS instead
The subsequent errors have the same root cause : the nested event loop
breaks the order by design. It lures QEMU to expect responses to the
latest message sent on the master socket to arrive first.
Since this was only needed for DAX enablement which is still not merged
upstream, just drop the code for now. A working solution will have to
be merged later on. Likely protect the master socket with a mutex
and service the slave channel with a separate thread, as discussed with
Maxime in the mail thread below.
[0] https://lore.kernel.org/qemu-devel/43145ede-89dc-280e-b953-6a2b436de395@redhat.com/
Reported-by: Yanghang Liu <yanghliu@redhat.com>
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2155173
Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <20230119172424.478268-2-groug@kaod.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Maxime Coquelin <maxime.coquelin@redhat.com>
(cherry picked from commit f340a59d5a852d75ae34555723694c7e8eafbd0c)
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
hw/virtio/vhost-user.c | 35 +++--------------------------------
1 file changed, 3 insertions(+), 32 deletions(-)
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 8f635844af..0ac00eb901 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -356,35 +356,6 @@ end:
return G_SOURCE_REMOVE;
}
-static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
- gpointer opaque);
-
-/*
- * This updates the read handler to use a new event loop context.
- * Event sources are removed from the previous context : this ensures
- * that events detected in the previous context are purged. They will
- * be re-detected and processed in the new context.
- */
-static void slave_update_read_handler(struct vhost_dev *dev,
- GMainContext *ctxt)
-{
- struct vhost_user *u = dev->opaque;
-
- if (!u->slave_ioc) {
- return;
- }
-
- if (u->slave_src) {
- g_source_destroy(u->slave_src);
- g_source_unref(u->slave_src);
- }
-
- u->slave_src = qio_channel_add_watch_source(u->slave_ioc,
- G_IO_IN | G_IO_HUP,
- slave_read, dev, NULL,
- ctxt);
-}
-
static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
{
struct vhost_user *u = dev->opaque;
@@ -406,7 +377,6 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
* be prepared for re-entrancy. So we create a new one and switch chr
* to use it.
*/
- slave_update_read_handler(dev, ctxt);
qemu_chr_be_update_read_handlers(chr->chr, ctxt);
qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data);
@@ -418,7 +388,6 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
* context that have been processed by the nested loop are purged.
*/
qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt);
- slave_update_read_handler(dev, NULL);
g_main_loop_unref(loop);
g_main_context_unref(ctxt);
@@ -1802,7 +1771,9 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev)
return -ECONNREFUSED;
}
u->slave_ioc = ioc;
- slave_update_read_handler(dev, NULL);
+ u->slave_src = qio_channel_add_watch_source(u->slave_ioc,
+ G_IO_IN | G_IO_HUP,
+ slave_read, dev, NULL, NULL);
if (reply_supported) {
msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
--
2.31.1

@ -1,348 +0,0 @@
From ae2077fd5d351a68c313c64f07fb225dff694a8f Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Mon, 16 Jan 2023 07:16:41 -0500
Subject: [PATCH 29/31] accel: introduce accelerator blocker API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 138: accel: introduce accelerator blocker API
RH-Bugzilla: 1979276
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [1/3] 56b07cd7db516c5066e6d66b4695064fdf73abbf (eesposit/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276
commit bd688fc93120fb3e28aa70e3dfdf567ccc1e0bc1
Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Fri Nov 11 10:47:56 2022 -0500
accel: introduce accelerator blocker API
This API allows the accelerators to prevent vcpus from issuing
new ioctls while execting a critical section marked with the
accel_ioctl_inhibit_begin/end functions.
Note that all functions submitting ioctls must mark where the
ioctl is being called with accel_{cpu_}ioctl_begin/end().
This API requires the caller to always hold the BQL.
API documentation is in sysemu/accel-blocker.h
Internally, it uses a QemuLockCnt together with a per-CPU QemuLockCnt
(to minimize cache line bouncing) to keep avoid that new ioctls
run when the critical section starts, and a QemuEvent to wait
that all running ioctls finish.
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-Id: <20221111154758.1372674-2-eesposit@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Conflicts:
util/meson.build: "interval-tree.c" does not exist
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
accel/accel-blocker.c | 154 +++++++++++++++++++++++++++++++++
accel/meson.build | 2 +-
hw/core/cpu-common.c | 2 +
include/hw/core/cpu.h | 3 +
include/sysemu/accel-blocker.h | 56 ++++++++++++
util/meson.build | 2 +-
6 files changed, 217 insertions(+), 2 deletions(-)
create mode 100644 accel/accel-blocker.c
create mode 100644 include/sysemu/accel-blocker.h
diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c
new file mode 100644
index 0000000000..1e7f423462
--- /dev/null
+++ b/accel/accel-blocker.c
@@ -0,0 +1,154 @@
+/*
+ * Lock to inhibit accelerator ioctls
+ *
+ * Copyright (c) 2022 Red Hat Inc.
+ *
+ * Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "qemu/main-loop.h"
+#include "hw/core/cpu.h"
+#include "sysemu/accel-blocker.h"
+
+static QemuLockCnt accel_in_ioctl_lock;
+static QemuEvent accel_in_ioctl_event;
+
+void accel_blocker_init(void)
+{
+ qemu_lockcnt_init(&accel_in_ioctl_lock);
+ qemu_event_init(&accel_in_ioctl_event, false);
+}
+
+void accel_ioctl_begin(void)
+{
+ if (likely(qemu_mutex_iothread_locked())) {
+ return;
+ }
+
+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */
+ qemu_lockcnt_inc(&accel_in_ioctl_lock);
+}
+
+void accel_ioctl_end(void)
+{
+ if (likely(qemu_mutex_iothread_locked())) {
+ return;
+ }
+
+ qemu_lockcnt_dec(&accel_in_ioctl_lock);
+ /* change event to SET. If event was BUSY, wake up all waiters */
+ qemu_event_set(&accel_in_ioctl_event);
+}
+
+void accel_cpu_ioctl_begin(CPUState *cpu)
+{
+ if (unlikely(qemu_mutex_iothread_locked())) {
+ return;
+ }
+
+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */
+ qemu_lockcnt_inc(&cpu->in_ioctl_lock);
+}
+
+void accel_cpu_ioctl_end(CPUState *cpu)
+{
+ if (unlikely(qemu_mutex_iothread_locked())) {
+ return;
+ }
+
+ qemu_lockcnt_dec(&cpu->in_ioctl_lock);
+ /* change event to SET. If event was BUSY, wake up all waiters */
+ qemu_event_set(&accel_in_ioctl_event);
+}
+
+static bool accel_has_to_wait(void)
+{
+ CPUState *cpu;
+ bool needs_to_wait = false;
+
+ CPU_FOREACH(cpu) {
+ if (qemu_lockcnt_count(&cpu->in_ioctl_lock)) {
+ /* exit the ioctl, if vcpu is running it */
+ qemu_cpu_kick(cpu);
+ needs_to_wait = true;
+ }
+ }
+
+ return needs_to_wait || qemu_lockcnt_count(&accel_in_ioctl_lock);
+}
+
+void accel_ioctl_inhibit_begin(void)
+{
+ CPUState *cpu;
+
+ /*
+ * We allow to inhibit only when holding the BQL, so we can identify
+ * when an inhibitor wants to issue an ioctl easily.
+ */
+ g_assert(qemu_mutex_iothread_locked());
+
+ /* Block further invocations of the ioctls outside the BQL. */
+ CPU_FOREACH(cpu) {
+ qemu_lockcnt_lock(&cpu->in_ioctl_lock);
+ }
+ qemu_lockcnt_lock(&accel_in_ioctl_lock);
+
+ /* Keep waiting until there are running ioctls */
+ while (true) {
+
+ /* Reset event to FREE. */
+ qemu_event_reset(&accel_in_ioctl_event);
+
+ if (accel_has_to_wait()) {
+ /*
+ * If event is still FREE, and there are ioctls still in progress,
+ * wait.
+ *
+ * If an ioctl finishes before qemu_event_wait(), it will change
+ * the event state to SET. This will prevent qemu_event_wait() from
+ * blocking, but it's not a problem because if other ioctls are
+ * still running the loop will iterate once more and reset the event
+ * status to FREE so that it can wait properly.
+ *
+ * If an ioctls finishes while qemu_event_wait() is blocking, then
+ * it will be waken up, but also here the while loop makes sure
+ * to re-enter the wait if there are other running ioctls.
+ */
+ qemu_event_wait(&accel_in_ioctl_event);
+ } else {
+ /* No ioctl is running */
+ return;
+ }
+ }
+}
+
+void accel_ioctl_inhibit_end(void)
+{
+ CPUState *cpu;
+
+ qemu_lockcnt_unlock(&accel_in_ioctl_lock);
+ CPU_FOREACH(cpu) {
+ qemu_lockcnt_unlock(&cpu->in_ioctl_lock);
+ }
+}
+
diff --git a/accel/meson.build b/accel/meson.build
index 259c35c4c8..061332610f 100644
--- a/accel/meson.build
+++ b/accel/meson.build
@@ -1,4 +1,4 @@
-specific_ss.add(files('accel-common.c'))
+specific_ss.add(files('accel-common.c', 'accel-blocker.c'))
softmmu_ss.add(files('accel-softmmu.c'))
user_ss.add(files('accel-user.c'))
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
index f9fdd46b9d..8d6a4b1b65 100644
--- a/hw/core/cpu-common.c
+++ b/hw/core/cpu-common.c
@@ -237,6 +237,7 @@ static void cpu_common_initfn(Object *obj)
cpu->nr_threads = 1;
qemu_mutex_init(&cpu->work_mutex);
+ qemu_lockcnt_init(&cpu->in_ioctl_lock);
QSIMPLEQ_INIT(&cpu->work_list);
QTAILQ_INIT(&cpu->breakpoints);
QTAILQ_INIT(&cpu->watchpoints);
@@ -248,6 +249,7 @@ static void cpu_common_finalize(Object *obj)
{
CPUState *cpu = CPU(obj);
+ qemu_lockcnt_destroy(&cpu->in_ioctl_lock);
qemu_mutex_destroy(&cpu->work_mutex);
}
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 8830546121..2417597236 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -398,6 +398,9 @@ struct CPUState {
uint32_t kvm_fetch_index;
uint64_t dirty_pages;
+ /* Use by accel-block: CPU is executing an ioctl() */
+ QemuLockCnt in_ioctl_lock;
+
/* Used for events with 'vcpu' and *without* the 'disabled' properties */
DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS);
DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS);
diff --git a/include/sysemu/accel-blocker.h b/include/sysemu/accel-blocker.h
new file mode 100644
index 0000000000..72020529ef
--- /dev/null
+++ b/include/sysemu/accel-blocker.h
@@ -0,0 +1,56 @@
+/*
+ * Accelerator blocking API, to prevent new ioctls from starting and wait the
+ * running ones finish.
+ * This mechanism differs from pause/resume_all_vcpus() in that it does not
+ * release the BQL.
+ *
+ * Copyright (c) 2022 Red Hat Inc.
+ *
+ * Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef ACCEL_BLOCKER_H
+#define ACCEL_BLOCKER_H
+
+#include "qemu/osdep.h"
+#include "sysemu/cpus.h"
+
+extern void accel_blocker_init(void);
+
+/*
+ * accel_{cpu_}ioctl_begin/end:
+ * Mark when ioctl is about to run or just finished.
+ *
+ * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is
+ * called, preventing new ioctls to run. They will continue only after
+ * accel_ioctl_inibith_end().
+ */
+extern void accel_ioctl_begin(void);
+extern void accel_ioctl_end(void);
+extern void accel_cpu_ioctl_begin(CPUState *cpu);
+extern void accel_cpu_ioctl_end(CPUState *cpu);
+
+/*
+ * accel_ioctl_inhibit_begin: start critical section
+ *
+ * This function makes sure that:
+ * 1) incoming accel_{cpu_}ioctl_begin() calls block
+ * 2) wait that all ioctls that were already running reach
+ * accel_{cpu_}ioctl_end(), kicking vcpus if necessary.
+ *
+ * This allows the caller to access shared data or perform operations without
+ * worrying of concurrent vcpus accesses.
+ */
+extern void accel_ioctl_inhibit_begin(void);
+
+/*
+ * accel_ioctl_inhibit_end: end critical section started by
+ * accel_ioctl_inhibit_begin()
+ *
+ * This function allows blocked accel_{cpu_}ioctl_begin() to continue.
+ */
+extern void accel_ioctl_inhibit_end(void);
+
+#endif /* ACCEL_BLOCKER_H */
diff --git a/util/meson.build b/util/meson.build
index 25b9b61f98..85a5504c4d 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -57,6 +57,7 @@ util_ss.add(files('guest-random.c'))
util_ss.add(files('yank.c'))
util_ss.add(files('int128.c'))
util_ss.add(files('memalign.c'))
+util_ss.add(files('lockcnt.c'))
if have_user
util_ss.add(files('selfmap.c'))
@@ -71,7 +72,6 @@ endif
if have_block or have_ga
util_ss.add(files('aiocb.c', 'async.c'))
util_ss.add(files('base64.c'))
- util_ss.add(files('lockcnt.c'))
util_ss.add(files('main-loop.c'))
util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
util_ss.add(files('coroutine-@0@.c'.format(config_host['CONFIG_COROUTINE_BACKEND'])))
--
2.31.1

@ -1,58 +0,0 @@
From ab68e13b7628f2348d41a4518a92508542af712f Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Fri, 3 Feb 2023 18:15:10 +0100
Subject: [PATCH 05/20] accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 144: accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page
RH-Bugzilla: 2165280
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Gavin Shan <gshan@redhat.com>
RH-Acked-by: Shaoqin Huang <None>
RH-Commit: [1/1] 5b0863c34ba06c01c4e343d1ecd72402779c7de3 (eauger1/centos-qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/2165280
Upstream: yes
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=50530041
Test: 'kvm unit test ./run_tests.sh -g debug' does not SIGSEV anymore
After commit 4e4fa6c12d ("accel/tcg: Complete cpu initialization
before registration"), it looks the CPUJumpCache pointer can be NULL.
This causes a SIGSEV when running debug-wp-migration kvm unit test.
At the first place it should be clarified why this TCG code is called
with KVM acceleration. This may hide another bug.
Fixes: 4e4fa6c12d ("accel/tcg: Complete cpu initialization before registration")
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Message-Id: <20230203171510.2867451-1-eric.auger@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
(cherry picked from commit 99ab4d500af638ba3ebb20e8aa89d72201b70860)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
accel/tcg/cputlb.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 6f1c00682b..4244b0e4e3 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -100,9 +100,14 @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
{
- int i, i0 = tb_jmp_cache_hash_page(page_addr);
CPUJumpCache *jc = cpu->tb_jmp_cache;
+ int i, i0;
+ if (unlikely(!jc)) {
+ return;
+ }
+
+ i0 = tb_jmp_cache_hash_page(page_addr);
for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
qatomic_set(&jc->array[i0 + i].tb, NULL);
}
--
2.31.1

@ -1,15 +1,16 @@
From d110c11b5658df93533698fdb428455f5e770866 Mon Sep 17 00:00:00 2001
From 61256a82ce78f40222455becb8850b5f5ebb5d72 Mon Sep 17 00:00:00 2001
From: Igor Mammedov <imammedo@redhat.com>
Date: Tue, 18 Apr 2023 11:04:49 +0200
Subject: [PATCH] acpi: pcihp: allow repeating hot-unplug requests
Subject: [PATCH 1/3] acpi: pcihp: allow repeating hot-unplug requests
RH-Author: Igor Mammedov <imammedo@redhat.com>
RH-MergeRequest: 280: acpi: pcihp: allow repeating hot-unplug requests
RH-Bugzilla: 2203745
RH-MergeRequest: 159: acpi: pcihp: allow repeating hot-unplug requests
RH-Bugzilla: 2087047
RH-Acked-by: Ani Sinha <None>
RH-Acked-by: MST <mst@redhat.com>
RH-Acked-by: Julia Suvorova <None>
RH-Commit: [1/1] e884ac48ebd43c3ebdbc65b01ce5ad75f4cb9284
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: MST <mst@redhat.com>
RH-Commit: [1/1] 9c597232466b27d91f127ee6004322d6ba69755f (imammedo/qemu-kvm-c-9-s-imam)
with Q35 using ACPI PCI hotplug by default, user's request to unplug
device is ignored when it's issued before guest OS has been booted.
@ -58,10 +59,10 @@ Signed-off-by: Igor Mammedov <imammedo@redhat.com>
1 file changed, 10 insertions(+)
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index 84d75e6b84..a2a3738b46 100644
index dcfb779a7a..cdd6f775a1 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -429,6 +429,16 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev,
@@ -357,6 +357,16 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev,
* acpi_pcihp_eject_slot() when the operation is completed.
*/
pdev->qdev.pending_deleted_event = true;

@ -1,90 +0,0 @@
From 244e92fea388d2be9fe81a5c5912d92b8f599caa Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 23 Mar 2023 10:48:59 -0400
Subject: [PATCH 1/2] aio-posix: fix race between epoll upgrade and
aio_set_fd_handler()
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
RH-MergeRequest: 292: aio-posix: fix race between epoll upgrade and aio_set_fd_handler()
RH-Bugzilla: 2211923
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Commit: [1/1] 182471bac79fa2b2ae8a34087eb6c4ab1af786e1
If another thread calls aio_set_fd_handler() while the IOThread event
loop is upgrading from ppoll(2) to epoll(7) then we might miss new
AioHandlers. The epollfd will not monitor the new AioHandler's fd,
resulting in hangs.
Take the AioHandler list lock while upgrading to epoll. This prevents
AioHandlers from changing while epoll is being set up. If we cannot lock
because we're in a nested event loop, then don't upgrade to epoll (it
will happen next time we're not in a nested call).
The downside to taking the lock is that the aio_set_fd_handler() thread
has to wait until the epoll upgrade is finished, which involves many
epoll_ctl(2) system calls. However, this scenario is rare and I couldn't
think of another solution that is still simple.
Reported-by: Qing Wang <qinwang@redhat.com>
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2090998
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Fam Zheng <fam@euphon.net>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20230323144859.1338495-1-stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit e62da98527fa35fe5f532cded01a33edf9fbe7b2)
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
util/fdmon-epoll.c | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/util/fdmon-epoll.c b/util/fdmon-epoll.c
index e11a8a022e..1683aa1105 100644
--- a/util/fdmon-epoll.c
+++ b/util/fdmon-epoll.c
@@ -127,6 +127,8 @@ static bool fdmon_epoll_try_enable(AioContext *ctx)
bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd)
{
+ bool ok;
+
if (ctx->epollfd < 0) {
return false;
}
@@ -136,14 +138,23 @@ bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd)
return false;
}
- if (npfd >= EPOLL_ENABLE_THRESHOLD) {
- if (fdmon_epoll_try_enable(ctx)) {
- return true;
- } else {
- fdmon_epoll_disable(ctx);
- }
+ if (npfd < EPOLL_ENABLE_THRESHOLD) {
+ return false;
+ }
+
+ /* The list must not change while we add fds to epoll */
+ if (!qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
+ return false;
+ }
+
+ ok = fdmon_epoll_try_enable(ctx);
+
+ qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
+
+ if (!ok) {
+ fdmon_epoll_disable(ctx);
}
- return false;
+ return ok;
}
void fdmon_epoll_setup(AioContext *ctx)
--
2.39.3

@ -1,50 +0,0 @@
From e9a9c0b023ae0dcbb14543b74063cca931d8230f Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 9 Mar 2023 08:24:36 -0500
Subject: [PATCH 08/12] aio-wait: switch to smp_mb__after_rmw()
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
RH-Bugzilla: 2175660
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [5/9] a90c96d148fdbec340a45dc6cedf3660d8be2aab (eesposit/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
commit b532526a07ef3b903ead2e055fe6cc87b41057a3
Author: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri Mar 3 11:03:52 2023 +0100
aio-wait: switch to smp_mb__after_rmw()
The barrier comes after an atomic increment, so it is enough to use
smp_mb__after_rmw(); this avoids a double barrier on x86 systems.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
include/block/aio-wait.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
index dd9a7f6461..da13357bb8 100644
--- a/include/block/aio-wait.h
+++ b/include/block/aio-wait.h
@@ -85,7 +85,7 @@ extern AioWait global_aio_wait;
/* Increment wait_->num_waiters before evaluating cond. */ \
qatomic_inc(&wait_->num_waiters); \
/* Paired with smp_mb in aio_wait_kick(). */ \
- smp_mb(); \
+ smp_mb__after_rmw(); \
if (ctx_ && in_aio_context_home_thread(ctx_)) { \
while ((cond)) { \
aio_poll(ctx_, true); \
--
2.39.1

@ -0,0 +1,55 @@
From 5beea8b889a38aa59259679d7f1ba050f09eb0f0 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 12/21] apic: disable reentrancy detection for apic-msi
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
RH-Jira: RHEL-516
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [8/13] 329f3b1c02fc42d85c821dd14c70e6b885cf849a (jmaloy/jmaloy-qemu-kvm-2)
Jira: https://issues.redhat.com/browse/RHEL-516
Upstream: Merged
CVE: CVE-2023-2680
commit 50795ee051a342c681a9b45671c552fbd6274db8
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Thu Apr 27 17:10:13 2023 -0400
apic: disable reentrancy detection for apic-msi
As the code is designed for re-entrant calls to apic-msi, mark apic-msi
as reentrancy-safe.
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Message-Id: <20230427211013.2994127-9-alxndr@bu.edu>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/intc/apic.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 20b5a94073..ac3d47d231 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -885,6 +885,13 @@ static void apic_realize(DeviceState *dev, Error **errp)
memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi",
APIC_SPACE_SIZE);
+ /*
+ * apic-msi's apic_mem_write can call into ioapic_eoi_broadcast, which can
+ * write back to apic-msi. As such mark the apic-msi region re-entrancy
+ * safe.
+ */
+ s->io_memory.disable_reentrancy_guard = true;
+
s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s);
local_apics[s->id] = s;
--
2.39.3

@ -0,0 +1,231 @@
From f6db359f543723e2eb840653d35004af357ea5ac Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 06/21] async: Add an optional reentrancy guard to the BH API
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
RH-Jira: RHEL-516
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [2/13] 009a9a68c1c25b9ad0cd9bc0d73b3e07bee2a19d (jmaloy/jmaloy-qemu-kvm-2)
Jira: https://issues.redhat.com/browse/RHEL-516
Upstream: Merged
CVE: CVE-2023-2680
commit 9c86c97f12c060bf7484dd931f38634e166a81f0
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Thu Apr 27 17:10:07 2023 -0400
async: Add an optional reentrancy guard to the BH API
Devices can pass their MemoryReentrancyGuard (from their DeviceState),
when creating new BHes. Then, the async API will toggle the guard
before/after calling the BH call-back. This prevents bh->mmio reentrancy
issues.
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Message-Id: <20230427211013.2994127-3-alxndr@bu.edu>
[thuth: Fix "line over 90 characters" checkpatch.pl error]
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
docs/devel/multiple-iothreads.txt | 7 +++++++
include/block/aio.h | 18 ++++++++++++++++--
include/qemu/main-loop.h | 7 +++++--
tests/unit/ptimer-test-stubs.c | 3 ++-
util/async.c | 18 +++++++++++++++++-
util/main-loop.c | 6 ++++--
util/trace-events | 1 +
7 files changed, 52 insertions(+), 8 deletions(-)
diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt
index 343120f2ef..a3e949f6b3 100644
--- a/docs/devel/multiple-iothreads.txt
+++ b/docs/devel/multiple-iothreads.txt
@@ -61,6 +61,7 @@ There are several old APIs that use the main loop AioContext:
* LEGACY qemu_aio_set_event_notifier() - monitor an event notifier
* LEGACY timer_new_ms() - create a timer
* LEGACY qemu_bh_new() - create a BH
+ * LEGACY qemu_bh_new_guarded() - create a BH with a device re-entrancy guard
* LEGACY qemu_aio_wait() - run an event loop iteration
Since they implicitly work on the main loop they cannot be used in code that
@@ -72,8 +73,14 @@ Instead, use the AioContext functions directly (see include/block/aio.h):
* aio_set_event_notifier() - monitor an event notifier
* aio_timer_new() - create a timer
* aio_bh_new() - create a BH
+ * aio_bh_new_guarded() - create a BH with a device re-entrancy guard
* aio_poll() - run an event loop iteration
+The qemu_bh_new_guarded/aio_bh_new_guarded APIs accept a "MemReentrancyGuard"
+argument, which is used to check for and prevent re-entrancy problems. For
+BHs associated with devices, the reentrancy-guard is contained in the
+corresponding DeviceState and named "mem_reentrancy_guard".
+
The AioContext can be obtained from the IOThread using
iothread_get_aio_context() or for the main loop using qemu_get_aio_context().
Code that takes an AioContext argument works both in IOThreads or the main
diff --git a/include/block/aio.h b/include/block/aio.h
index 543717f294..db6f23c619 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -23,6 +23,8 @@
#include "qemu/thread.h"
#include "qemu/timer.h"
#include "block/graph-lock.h"
+#include "hw/qdev-core.h"
+
typedef struct BlockAIOCB BlockAIOCB;
typedef void BlockCompletionFunc(void *opaque, int ret);
@@ -331,9 +333,11 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
* is opaque and must be allocated prior to its use.
*
* @name: A human-readable identifier for debugging purposes.
+ * @reentrancy_guard: A guard set when entering a cb to prevent
+ * device-reentrancy issues
*/
QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
- const char *name);
+ const char *name, MemReentrancyGuard *reentrancy_guard);
/**
* aio_bh_new: Allocate a new bottom half structure
@@ -342,7 +346,17 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
* string.
*/
#define aio_bh_new(ctx, cb, opaque) \
- aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)))
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL)
+
+/**
+ * aio_bh_new_guarded: Allocate a new bottom half structure with a
+ * reentrancy_guard
+ *
+ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name
+ * string.
+ */
+#define aio_bh_new_guarded(ctx, cb, opaque, guard) \
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard)
/**
* aio_notify: Force processing of pending events.
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index b3e54e00bc..68e70e61aa 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -387,9 +387,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms);
/* internal interfaces */
+#define qemu_bh_new_guarded(cb, opaque, guard) \
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard)
#define qemu_bh_new(cb, opaque) \
- qemu_bh_new_full((cb), (opaque), (stringify(cb)))
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name);
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL)
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
+ MemReentrancyGuard *reentrancy_guard);
void qemu_bh_schedule_idle(QEMUBH *bh);
enum {
diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c
index f2bfcede93..8c9407c560 100644
--- a/tests/unit/ptimer-test-stubs.c
+++ b/tests/unit/ptimer-test-stubs.c
@@ -107,7 +107,8 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
return deadline;
}
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
+ MemReentrancyGuard *reentrancy_guard)
{
QEMUBH *bh = g_new(QEMUBH, 1);
diff --git a/util/async.c b/util/async.c
index 21016a1ac7..a9b528c370 100644
--- a/util/async.c
+++ b/util/async.c
@@ -65,6 +65,7 @@ struct QEMUBH {
void *opaque;
QSLIST_ENTRY(QEMUBH) next;
unsigned flags;
+ MemReentrancyGuard *reentrancy_guard;
};
/* Called concurrently from any thread */
@@ -137,7 +138,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb,
}
QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
- const char *name)
+ const char *name, MemReentrancyGuard *reentrancy_guard)
{
QEMUBH *bh;
bh = g_new(QEMUBH, 1);
@@ -146,13 +147,28 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
.cb = cb,
.opaque = opaque,
.name = name,
+ .reentrancy_guard = reentrancy_guard,
};
return bh;
}
void aio_bh_call(QEMUBH *bh)
{
+ bool last_engaged_in_io = false;
+
+ if (bh->reentrancy_guard) {
+ last_engaged_in_io = bh->reentrancy_guard->engaged_in_io;
+ if (bh->reentrancy_guard->engaged_in_io) {
+ trace_reentrant_aio(bh->ctx, bh->name);
+ }
+ bh->reentrancy_guard->engaged_in_io = true;
+ }
+
bh->cb(bh->opaque);
+
+ if (bh->reentrancy_guard) {
+ bh->reentrancy_guard->engaged_in_io = last_engaged_in_io;
+ }
}
/* Multiple occurrences of aio_bh_poll cannot be called concurrently. */
diff --git a/util/main-loop.c b/util/main-loop.c
index e180c85145..7022f02ef8 100644
--- a/util/main-loop.c
+++ b/util/main-loop.c
@@ -605,9 +605,11 @@ void main_loop_wait(int nonblocking)
/* Functions to operate on the main QEMU AioContext. */
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
+ MemReentrancyGuard *reentrancy_guard)
{
- return aio_bh_new_full(qemu_aio_context, cb, opaque, name);
+ return aio_bh_new_full(qemu_aio_context, cb, opaque, name,
+ reentrancy_guard);
}
/*
diff --git a/util/trace-events b/util/trace-events
index 16f78d8fe5..3f7e766683 100644
--- a/util/trace-events
+++ b/util/trace-events
@@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d"
# async.c
aio_co_schedule(void *ctx, void *co) "ctx %p co %p"
aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p"
+reentrant_aio(void *ctx, const char *name) "ctx %p name %s"
# thread-pool.c
thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
--
2.39.3

@ -0,0 +1,70 @@
From 137e84f68da06666ebf7f391766cc6209ce1c39c Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 13/21] async: avoid use-after-free on re-entrancy guard
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
RH-Jira: RHEL-516
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [9/13] d4b957108aaacf4a597122aaeeaa8e56985f1fca (jmaloy/jmaloy-qemu-kvm-2)
Jira: https://issues.redhat.com/browse/RHEL-516
Upstream: Merged
CVE: CVE-2023-2680
commit 7915bd06f25e1803778081161bf6fa10c42dc7cd
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Mon May 1 10:19:56 2023 -0400
async: avoid use-after-free on re-entrancy guard
A BH callback can free the BH, causing a use-after-free in aio_bh_call.
Fix that by keeping a local copy of the re-entrancy guard pointer.
Buglink: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58513
Fixes: 9c86c97f12 ("async: Add an optional reentrancy guard to the BH API")
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Message-Id: <20230501141956.3444868-1-alxndr@bu.edu>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
util/async.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/util/async.c b/util/async.c
index a9b528c370..cd1a1815f9 100644
--- a/util/async.c
+++ b/util/async.c
@@ -156,18 +156,20 @@ void aio_bh_call(QEMUBH *bh)
{
bool last_engaged_in_io = false;
- if (bh->reentrancy_guard) {
- last_engaged_in_io = bh->reentrancy_guard->engaged_in_io;
- if (bh->reentrancy_guard->engaged_in_io) {
+ /* Make a copy of the guard-pointer as cb may free the bh */
+ MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard;
+ if (reentrancy_guard) {
+ last_engaged_in_io = reentrancy_guard->engaged_in_io;
+ if (reentrancy_guard->engaged_in_io) {
trace_reentrant_aio(bh->ctx, bh->name);
}
- bh->reentrancy_guard->engaged_in_io = true;
+ reentrancy_guard->engaged_in_io = true;
}
bh->cb(bh->opaque);
- if (bh->reentrancy_guard) {
- bh->reentrancy_guard->engaged_in_io = last_engaged_in_io;
+ if (reentrancy_guard) {
+ reentrancy_guard->engaged_in_io = last_engaged_in_io;
}
}
--
2.39.3

@ -1,66 +0,0 @@
From 3d823dda6832b76fd3d776131008107b0b0f7166 Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 9 Mar 2023 08:24:36 -0500
Subject: [PATCH 12/12] async: clarify usage of barriers in the polling case
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
RH-Bugzilla: 2175660
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [9/9] b4ea298d75a75bb61e07a27d1296e0095fbc2bbf (eesposit/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
commit 6229438cca037d42f44a96d38feb15cb102a444f
Author: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon Mar 6 10:43:52 2023 +0100
async: clarify usage of barriers in the polling case
Explain that aio_context_notifier_poll() relies on
aio_notify_accept() to catch all the memory writes that were
done before ctx->notified was set to true.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
util/async.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/util/async.c b/util/async.c
index 37d3e6036d..e0846baf93 100644
--- a/util/async.c
+++ b/util/async.c
@@ -472,8 +472,9 @@ void aio_notify_accept(AioContext *ctx)
qatomic_set(&ctx->notified, false);
/*
- * Write ctx->notified before reading e.g. bh->flags. Pairs with smp_wmb
- * in aio_notify.
+ * Order reads of ctx->notified (in aio_context_notifier_poll()) and the
+ * above clearing of ctx->notified before reads of e.g. bh->flags. Pairs
+ * with smp_wmb() in aio_notify.
*/
smp_mb();
}
@@ -496,6 +497,11 @@ static bool aio_context_notifier_poll(void *opaque)
EventNotifier *e = opaque;
AioContext *ctx = container_of(e, AioContext, notifier);
+ /*
+ * No need for load-acquire because we just want to kick the
+ * event loop. aio_notify_accept() takes care of synchronizing
+ * the event loop with the producers.
+ */
return qatomic_read(&ctx->notified);
}
--
2.39.1

@ -1,111 +0,0 @@
From 29bcf843d796ffc2a0906dea947e4cdfe9f7ec60 Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 9 Mar 2023 08:24:36 -0500
Subject: [PATCH 11/12] async: update documentation of the memory barriers
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
RH-Bugzilla: 2175660
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [8/9] 5ca20e4c8983e0bc1ecee66bead3472777abe4d1 (eesposit/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
commit 8dd48650b43dfde4ebea34191ac267e474bcc29e
Author: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon Mar 6 10:15:06 2023 +0100
async: update documentation of the memory barriers
Ever since commit 8c6b0356b539 ("util/async: make bh_aio_poll() O(1)",
2020-02-22), synchronization between qemu_bh_schedule() and aio_bh_poll()
is happening when the bottom half is enqueued in the bh_list; not
when the flags are set. Update the documentation to match.
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
util/async.c | 33 +++++++++++++++++++--------------
1 file changed, 19 insertions(+), 14 deletions(-)
diff --git a/util/async.c b/util/async.c
index 63434ddae4..37d3e6036d 100644
--- a/util/async.c
+++ b/util/async.c
@@ -73,14 +73,21 @@ static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags)
unsigned old_flags;
/*
- * The memory barrier implicit in qatomic_fetch_or makes sure that:
- * 1. idle & any writes needed by the callback are done before the
- * locations are read in the aio_bh_poll.
- * 2. ctx is loaded before the callback has a chance to execute and bh
- * could be freed.
+ * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that
+ * insertion starts after BH_PENDING is set.
*/
old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags);
+
if (!(old_flags & BH_PENDING)) {
+ /*
+ * At this point the bottom half becomes visible to aio_bh_poll().
+ * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in
+ * aio_bh_poll(), ensuring that:
+ * 1. any writes needed by the callback are visible from the callback
+ * after aio_bh_dequeue() returns bh.
+ * 2. ctx is loaded before the callback has a chance to execute and bh
+ * could be freed.
+ */
QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next);
}
@@ -106,11 +113,8 @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
QSLIST_REMOVE_HEAD(head, next);
/*
- * The qatomic_and is paired with aio_bh_enqueue(). The implicit memory
- * barrier ensures that the callback sees all writes done by the scheduling
- * thread. It also ensures that the scheduling thread sees the cleared
- * flag before bh->cb has run, and thus will call aio_notify again if
- * necessary.
+ * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that
+ * the removal finishes before BH_PENDING is reset.
*/
*flags = qatomic_fetch_and(&bh->flags,
~(BH_PENDING | BH_SCHEDULED | BH_IDLE));
@@ -157,6 +161,7 @@ int aio_bh_poll(AioContext *ctx)
BHListSlice *s;
int ret = 0;
+ /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */
QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list);
QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next);
@@ -446,15 +451,15 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx)
void aio_notify(AioContext *ctx)
{
/*
- * Write e.g. bh->flags before writing ctx->notified. Pairs with smp_mb in
- * aio_notify_accept.
+ * Write e.g. ctx->bh_list before writing ctx->notified. Pairs with
+ * smp_mb() in aio_notify_accept().
*/
smp_wmb();
qatomic_set(&ctx->notified, true);
/*
- * Write ctx->notified before reading ctx->notify_me. Pairs
- * with smp_mb in aio_ctx_prepare or aio_poll.
+ * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me.
+ * Pairs with smp_mb() in aio_ctx_prepare or aio_poll.
*/
smp_mb();
if (qatomic_read(&ctx->notify_me)) {
--
2.39.1

@ -0,0 +1,57 @@
From 40866640d15e6a8c9f6af7e437edc1ec1e17ba34 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 10/21] bcm2835_property: disable reentrancy detection for
iomem
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
RH-Jira: RHEL-516
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [6/13] 128ebc85e228674af66553af82fba70eb87960e6 (jmaloy/jmaloy-qemu-kvm-2)
Jira: https://issues.redhat.com/browse/RHEL-516
Upstream: Merged
CVE: CVE-2023-2680
commit 985c4a4e547afb9573b6bd6843d20eb2c3d1d1cd
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Thu Apr 27 17:10:11 2023 -0400
bcm2835_property: disable reentrancy detection for iomem
As the code is designed for re-entrant calls from bcm2835_property to
bcm2835_mbox and back into bcm2835_property, mark iomem as
reentrancy-safe.
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <20230427211013.2994127-7-alxndr@bu.edu>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/misc/bcm2835_property.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c
index 890ae7bae5..de056ea2df 100644
--- a/hw/misc/bcm2835_property.c
+++ b/hw/misc/bcm2835_property.c
@@ -382,6 +382,13 @@ static void bcm2835_property_init(Object *obj)
memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_property_ops, s,
TYPE_BCM2835_PROPERTY, 0x10);
+
+ /*
+ * bcm2835_property_ops call into bcm2835_mbox, which in-turn reads from
+ * iomem. As such, mark iomem as re-entracy safe.
+ */
+ s->iomem.disable_reentrancy_guard = true;
+
sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq);
}
--
2.39.3

@ -1,250 +0,0 @@
From 9bb9cafd736057fd2a8ebfa6f5769668f125fbe6 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:06 +0100
Subject: [PATCH 24/31] block: Call drain callbacks only once
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [12/16] ea9a433dc01d1b8539a2d4ea12887f2a3ce830ea (sgarzarella/qemu-kvm-c-9-s)
We only need to call both the BlockDriver's callback and the parent
callbacks when going from undrained to drained or vice versa. A second
drain section doesn't make a difference for the driver or the parent,
they weren't supposed to send new requests before and after the second
drain.
One thing that gets in the way is the 'ignore_bds_parents' parameter in
bdrv_do_drained_begin_quiesce() and bdrv_do_drained_end(): It means that
bdrv_drain_all_begin() increases bs->quiesce_counter, but does not
quiesce the parent through BdrvChildClass callbacks. If an additional
drain section is started now, bs->quiesce_counter will be non-zero, but
we would still need to quiesce the parent through BdrvChildClass in
order to keep things consistent (and unquiesce it on the matching
bdrv_drained_end(), even though the counter would not reach 0 yet as
long as the bdrv_drain_all() section is still active).
Instead of keeping track of this, let's just get rid of the parameter.
It was introduced in commit 6cd5c9d7b2d as an optimisation so that
during bdrv_drain_all(), we wouldn't recursively drain all parents up to
the root for each node, resulting in quadratic complexity. As it happens,
calling the callbacks only once solves the same problem, so as of this
patch, we'll still have O(n) complexity and ignore_bds_parents is not
needed any more.
This patch only ignores the 'ignore_bds_parents' parameter. It will be
removed in a separate patch.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20221118174110.55183-12-kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 57e05be343f33f4e5899a8d8946a8596d68424a1)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 25 +++++++------------------
block/io.c | 30 ++++++++++++++++++------------
include/block/block_int-common.h | 8 ++++----
tests/unit/test-bdrv-drain.c | 16 ++++++++++------
4 files changed, 39 insertions(+), 40 deletions(-)
diff --git a/block.c b/block.c
index e0e3b21790..5a583e260d 100644
--- a/block.c
+++ b/block.c
@@ -2824,7 +2824,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
{
BlockDriverState *old_bs = child->bs;
int new_bs_quiesce_counter;
- int drain_saldo;
assert(!child->frozen);
assert(old_bs != new_bs);
@@ -2834,16 +2833,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
}
- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
- drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;
-
/*
* If the new child node is drained but the old one was not, flush
* all outstanding requests to the old child node.
*/
- while (drain_saldo > 0 && child->klass->drained_begin) {
+ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
+ if (new_bs_quiesce_counter && !child->quiesced_parent) {
bdrv_parent_drained_begin_single(child, true);
- drain_saldo--;
}
if (old_bs) {
@@ -2859,16 +2855,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
if (new_bs) {
assert_bdrv_graph_writable(new_bs);
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
-
- /*
- * Polling in bdrv_parent_drained_begin_single() may have led to the new
- * node's quiesce_counter having been decreased. Not a problem, we just
- * need to recognize this here and then invoke drained_end appropriately
- * more often.
- */
- assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
- drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
-
if (child->klass->attach) {
child->klass->attach(child);
}
@@ -2877,10 +2863,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
/*
* If the old child node was drained but the new one is not, allow
* requests to come in only after the new node has been attached.
+ *
+ * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single()
+ * polls, which could have changed the value.
*/
- while (drain_saldo < 0 && child->klass->drained_end) {
+ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
+ if (!new_bs_quiesce_counter && child->quiesced_parent) {
bdrv_parent_drained_end_single(child);
- drain_saldo++;
}
}
diff --git a/block/io.c b/block/io.c
index 75224480d0..87d6f22ec4 100644
--- a/block/io.c
+++ b/block/io.c
@@ -62,8 +62,9 @@ void bdrv_parent_drained_end_single(BdrvChild *c)
{
IO_OR_GS_CODE();
- assert(c->parent_quiesce_counter > 0);
- c->parent_quiesce_counter--;
+ assert(c->quiesced_parent);
+ c->quiesced_parent = false;
+
if (c->klass->drained_end) {
c->klass->drained_end(c);
}
@@ -110,7 +111,10 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
{
AioContext *ctx = bdrv_child_get_parent_aio_context(c);
IO_OR_GS_CODE();
- c->parent_quiesce_counter++;
+
+ assert(!c->quiesced_parent);
+ c->quiesced_parent = true;
+
if (c->klass->drained_begin) {
c->klass->drained_begin(c);
}
@@ -358,11 +362,12 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
/* Stop things in parent-to-child order */
if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
aio_disable_external(bdrv_get_aio_context(bs));
- }
- bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
- if (bs->drv && bs->drv->bdrv_drain_begin) {
- bs->drv->bdrv_drain_begin(bs);
+ /* TODO Remove ignore_bds_parents, we don't consider it any more */
+ bdrv_parent_drained_begin(bs, parent, false);
+ if (bs->drv && bs->drv->bdrv_drain_begin) {
+ bs->drv->bdrv_drain_begin(bs);
+ }
}
}
@@ -413,13 +418,14 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
assert(bs->quiesce_counter > 0);
/* Re-enable things in child-to-parent order */
- if (bs->drv && bs->drv->bdrv_drain_end) {
- bs->drv->bdrv_drain_end(bs);
- }
- bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
-
old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
if (old_quiesce_counter == 1) {
+ if (bs->drv && bs->drv->bdrv_drain_end) {
+ bs->drv->bdrv_drain_end(bs);
+ }
+ /* TODO Remove ignore_bds_parents, we don't consider it any more */
+ bdrv_parent_drained_end(bs, parent, false);
+
aio_enable_external(bdrv_get_aio_context(bs));
}
}
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 791dddfd7d..a6bc6b7fe9 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -980,13 +980,13 @@ struct BdrvChild {
bool frozen;
/*
- * How many times the parent of this child has been drained
+ * True if the parent of this child has been drained by this BdrvChild
* (through klass->drained_*).
- * Usually, this is equal to bs->quiesce_counter (potentially
- * reduced by bdrv_drain_all_count). It may differ while the
+ *
+ * It is generally true if bs->quiesce_counter > 0. It may differ while the
* child is entering or leaving a drained section.
*/
- int parent_quiesce_counter;
+ bool quiesced_parent;
QLIST_ENTRY(BdrvChild) next;
QLIST_ENTRY(BdrvChild) next_parent;
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index dda08de8db..172bc6debc 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -296,7 +296,11 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive)
do_drain_begin(drain_type, bs);
- g_assert_cmpint(bs->quiesce_counter, ==, 1);
+ if (drain_type == BDRV_DRAIN_ALL) {
+ g_assert_cmpint(bs->quiesce_counter, ==, 2);
+ } else {
+ g_assert_cmpint(bs->quiesce_counter, ==, 1);
+ }
g_assert_cmpint(backing->quiesce_counter, ==, !!recursive);
do_drain_end(drain_type, bs);
@@ -348,8 +352,8 @@ static void test_nested(void)
for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) {
for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) {
- int backing_quiesce = (outer != BDRV_DRAIN) +
- (inner != BDRV_DRAIN);
+ int backing_quiesce = (outer == BDRV_DRAIN_ALL) +
+ (inner == BDRV_DRAIN_ALL);
g_assert_cmpint(bs->quiesce_counter, ==, 0);
g_assert_cmpint(backing->quiesce_counter, ==, 0);
@@ -359,10 +363,10 @@ static void test_nested(void)
do_drain_begin(outer, bs);
do_drain_begin(inner, bs);
- g_assert_cmpint(bs->quiesce_counter, ==, 2);
+ g_assert_cmpint(bs->quiesce_counter, ==, 2 + !!backing_quiesce);
g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce);
- g_assert_cmpint(s->drain_count, ==, 2);
- g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce);
+ g_assert_cmpint(s->drain_count, ==, 1);
+ g_assert_cmpint(backing_s->drain_count, ==, !!backing_quiesce);
do_drain_end(inner, bs);
do_drain_end(outer, bs);
--
2.31.1

@ -0,0 +1,354 @@
From ff05c0b0d3414c0e5b3903048280accdc6c75ca0 Mon Sep 17 00:00:00 2001
From: Hanna Czenczek <hreitz@redhat.com>
Date: Tue, 11 Apr 2023 19:34:16 +0200
Subject: [PATCH 2/9] block: Collapse padded I/O vecs exceeding IOV_MAX
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX
RH-Bugzilla: 2174676
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [2/5] 84c56bd16f841a18cf2baa918dfeab3240e3944d (hreitz/qemu-kvm-c-9-s)
When processing vectored guest requests that are not aligned to the
storage request alignment, we pad them by adding head and/or tail
buffers for a read-modify-write cycle.
The guest can submit I/O vectors up to IOV_MAX (1024) in length, but
with this padding, the vector can exceed that limit. As of
4c002cef0e9abe7135d7916c51abce47f7fc1ee2 ("util/iov: make
qemu_iovec_init_extended() honest"), we refuse to pad vectors beyond the
limit, instead returning an error to the guest.
To the guest, this appears as a random I/O error. We should not return
an I/O error to the guest when it issued a perfectly valid request.
Before 4c002cef0e9abe7135d7916c51abce47f7fc1ee2, we just made the vector
longer than IOV_MAX, which generally seems to work (because the guest
assumes a smaller alignment than we really have, file-posix's
raw_co_prw() will generally see bdrv_qiov_is_aligned() return false, and
so emulate the request, so that the IOV_MAX does not matter). However,
that does not seem exactly great.
I see two ways to fix this problem:
1. We split such long requests into two requests.
2. We join some elements of the vector into new buffers to make it
shorter.
I am wary of (1), because it seems like it may have unintended side
effects.
(2) on the other hand seems relatively simple to implement, with
hopefully few side effects, so this patch does that.
To do this, the use of qemu_iovec_init_extended() in bdrv_pad_request()
is effectively replaced by the new function bdrv_create_padded_qiov(),
which not only wraps the request IOV with padding head/tail, but also
ensures that the resulting vector will not have more than IOV_MAX
elements. Putting that functionality into qemu_iovec_init_extended() is
infeasible because it requires allocating a bounce buffer; doing so
would require many more parameters (buffer alignment, how to initialize
the buffer, and out parameters like the buffer, its length, and the
original elements), which is not reasonable.
Conversely, it is not difficult to move qemu_iovec_init_extended()'s
functionality into bdrv_create_padded_qiov() by using public
qemu_iovec_* functions, so that is what this patch does.
Because bdrv_pad_request() was the only "serious" user of
qemu_iovec_init_extended(), the next patch will remove the latter
function, so the functionality is not implemented twice.
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2141964
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
Message-Id: <20230411173418.19549-3-hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
(cherry picked from commit 18743311b829cafc1737a5f20bc3248d5f91ee2a)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/io.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 151 insertions(+), 15 deletions(-)
diff --git a/block/io.c b/block/io.c
index 2e267a85ab..4e8e90208b 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1439,6 +1439,14 @@ out:
* @merge_reads is true for small requests,
* if @buf_len == @head + bytes + @tail. In this case it is possible that both
* head and tail exist but @buf_len == align and @tail_buf == @buf.
+ *
+ * @write is true for write requests, false for read requests.
+ *
+ * If padding makes the vector too long (exceeding IOV_MAX), then we need to
+ * merge existing vector elements into a single one. @collapse_bounce_buf acts
+ * as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse
+ * I/O vector elements so for read requests, the data can be copied back after
+ * the read is done.
*/
typedef struct BdrvRequestPadding {
uint8_t *buf;
@@ -1447,11 +1455,17 @@ typedef struct BdrvRequestPadding {
size_t head;
size_t tail;
bool merge_reads;
+ bool write;
QEMUIOVector local_qiov;
+
+ uint8_t *collapse_bounce_buf;
+ size_t collapse_len;
+ QEMUIOVector pre_collapse_qiov;
} BdrvRequestPadding;
static bool bdrv_init_padding(BlockDriverState *bs,
int64_t offset, int64_t bytes,
+ bool write,
BdrvRequestPadding *pad)
{
int64_t align = bs->bl.request_alignment;
@@ -1483,6 +1497,8 @@ static bool bdrv_init_padding(BlockDriverState *bs,
pad->tail_buf = pad->buf + pad->buf_len - align;
}
+ pad->write = write;
+
return true;
}
@@ -1547,8 +1563,23 @@ zero_mem:
return 0;
}
-static void bdrv_padding_destroy(BdrvRequestPadding *pad)
+/**
+ * Free *pad's associated buffers, and perform any necessary finalization steps.
+ */
+static void bdrv_padding_finalize(BdrvRequestPadding *pad)
{
+ if (pad->collapse_bounce_buf) {
+ if (!pad->write) {
+ /*
+ * If padding required elements in the vector to be collapsed into a
+ * bounce buffer, copy the bounce buffer content back
+ */
+ qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0,
+ pad->collapse_bounce_buf, pad->collapse_len);
+ }
+ qemu_vfree(pad->collapse_bounce_buf);
+ qemu_iovec_destroy(&pad->pre_collapse_qiov);
+ }
if (pad->buf) {
qemu_vfree(pad->buf);
qemu_iovec_destroy(&pad->local_qiov);
@@ -1556,6 +1587,101 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
memset(pad, 0, sizeof(*pad));
}
+/*
+ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while
+ * ensuring that the resulting vector will not exceed IOV_MAX elements.
+ *
+ * To ensure this, when necessary, the first two or three elements of @iov are
+ * merged into pad->collapse_bounce_buf and replaced by a reference to that
+ * bounce buffer in pad->local_qiov.
+ *
+ * After performing a read request, the data from the bounce buffer must be
+ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()).
+ */
+static int bdrv_create_padded_qiov(BlockDriverState *bs,
+ BdrvRequestPadding *pad,
+ struct iovec *iov, int niov,
+ size_t iov_offset, size_t bytes)
+{
+ int padded_niov, surplus_count, collapse_count;
+
+ /* Assert this invariant */
+ assert(niov <= IOV_MAX);
+
+ /*
+ * Cannot pad if resulting length would exceed SIZE_MAX. Returning an error
+ * to the guest is not ideal, but there is little else we can do. At least
+ * this will practically never happen on 64-bit systems.
+ */
+ if (SIZE_MAX - pad->head < bytes ||
+ SIZE_MAX - pad->head - bytes < pad->tail)
+ {
+ return -EINVAL;
+ }
+
+ /* Length of the resulting IOV if we just concatenated everything */
+ padded_niov = !!pad->head + niov + !!pad->tail;
+
+ qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX));
+
+ if (pad->head) {
+ qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head);
+ }
+
+ /*
+ * If padded_niov > IOV_MAX, we cannot just concatenate everything.
+ * Instead, merge the first two or three elements of @iov to reduce the
+ * number of vector elements as necessary.
+ */
+ if (padded_niov > IOV_MAX) {
+ /*
+ * Only head and tail can have lead to the number of entries exceeding
+ * IOV_MAX, so we can exceed it by the head and tail at most. We need
+ * to reduce the number of elements by `surplus_count`, so we merge that
+ * many elements plus one into one element.
+ */
+ surplus_count = padded_niov - IOV_MAX;
+ assert(surplus_count <= !!pad->head + !!pad->tail);
+ collapse_count = surplus_count + 1;
+
+ /*
+ * Move the elements to collapse into `pad->pre_collapse_qiov`, then
+ * advance `iov` (and associated variables) by those elements.
+ */
+ qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count);
+ qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov,
+ collapse_count, iov_offset, SIZE_MAX);
+ iov += collapse_count;
+ iov_offset = 0;
+ niov -= collapse_count;
+ bytes -= pad->pre_collapse_qiov.size;
+
+ /*
+ * Construct the bounce buffer to match the length of the to-collapse
+ * vector elements, and for write requests, initialize it with the data
+ * from those elements. Then add it to `pad->local_qiov`.
+ */
+ pad->collapse_len = pad->pre_collapse_qiov.size;
+ pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len);
+ if (pad->write) {
+ qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0,
+ pad->collapse_bounce_buf, pad->collapse_len);
+ }
+ qemu_iovec_add(&pad->local_qiov,
+ pad->collapse_bounce_buf, pad->collapse_len);
+ }
+
+ qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes);
+
+ if (pad->tail) {
+ qemu_iovec_add(&pad->local_qiov,
+ pad->buf + pad->buf_len - pad->tail, pad->tail);
+ }
+
+ assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX));
+ return 0;
+}
+
/*
* bdrv_pad_request
*
@@ -1563,6 +1689,8 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
* read of padding, bdrv_padding_rmw_read() should be called separately if
* needed.
*
+ * @write is true for write requests, false for read requests.
+ *
* Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out:
* - on function start they represent original request
* - on failure or when padding is not needed they are unchanged
@@ -1571,26 +1699,34 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
static int bdrv_pad_request(BlockDriverState *bs,
QEMUIOVector **qiov, size_t *qiov_offset,
int64_t *offset, int64_t *bytes,
+ bool write,
BdrvRequestPadding *pad, bool *padded,
BdrvRequestFlags *flags)
{
int ret;
+ struct iovec *sliced_iov;
+ int sliced_niov;
+ size_t sliced_head, sliced_tail;
bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
- if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
+ if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
if (padded) {
*padded = false;
}
return 0;
}
- ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
- *qiov, *qiov_offset, *bytes,
- pad->buf + pad->buf_len - pad->tail,
- pad->tail);
+ sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
+ &sliced_head, &sliced_tail,
+ &sliced_niov);
+
+ /* Guaranteed by bdrv_check_qiov_request() */
+ assert(*bytes <= SIZE_MAX);
+ ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
+ sliced_head, *bytes);
if (ret < 0) {
- bdrv_padding_destroy(pad);
+ bdrv_padding_finalize(pad);
return ret;
}
*bytes += pad->head + pad->tail;
@@ -1657,8 +1793,8 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
flags |= BDRV_REQ_COPY_ON_READ;
}
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
- NULL, &flags);
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false,
+ &pad, NULL, &flags);
if (ret < 0) {
goto fail;
}
@@ -1668,7 +1804,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
bs->bl.request_alignment,
qiov, qiov_offset, flags);
tracked_request_end(&req);
- bdrv_padding_destroy(&pad);
+ bdrv_padding_finalize(&pad);
fail:
bdrv_dec_in_flight(bs);
@@ -2000,7 +2136,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
/* This flag doesn't make sense for padding or zero writes */
flags &= ~BDRV_REQ_REGISTERED_BUF;
- padding = bdrv_init_padding(bs, offset, bytes, &pad);
+ padding = bdrv_init_padding(bs, offset, bytes, true, &pad);
if (padding) {
assert(!(flags & BDRV_REQ_NO_WAIT));
bdrv_make_request_serialising(req, align);
@@ -2048,7 +2184,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
}
out:
- bdrv_padding_destroy(&pad);
+ bdrv_padding_finalize(&pad);
return ret;
}
@@ -2116,8 +2252,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
* bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
* alignment only if there is no ZERO flag.
*/
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
- &padded, &flags);
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true,
+ &pad, &padded, &flags);
if (ret < 0) {
return ret;
}
@@ -2147,7 +2283,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
qiov, qiov_offset, flags);
- bdrv_padding_destroy(&pad);
+ bdrv_padding_finalize(&pad);
out:
tracked_request_end(&req);
--
2.39.3

@ -0,0 +1,56 @@
From dfa2811e88afaf996345552330e97f0513c1803c Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 4 May 2023 13:57:34 +0200
Subject: [PATCH 53/56] block: Don't call no_coroutine_fns in
qmp_block_resize()
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize()
RH-Bugzilla: 2185688
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [2/4] 7ac7e34821cfc8bd5f0daadd7a1c4a5596bc60a6 (kmwolf/centos-qemu-kvm)
This QMP handler runs in a coroutine, so it must use the corresponding
no_co_wrappers instead.
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2185688
Cc: qemu-stable@nongnu.org
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20230504115750.54437-5-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 0c7d204f50c382c6baac8c94bd57af4a022b3888)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
blockdev.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/blockdev.c b/blockdev.c
index d7b5c18f0a..eb509cf964 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2430,7 +2430,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
return;
}
- blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
+ blk = blk_co_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
if (!blk) {
return;
}
@@ -2445,7 +2445,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
bdrv_co_lock(bs);
bdrv_drained_end(bs);
- blk_unref(blk);
+ blk_co_unref(blk);
bdrv_co_unlock(bs);
}
--
2.39.1

@ -1,298 +0,0 @@
From 150ef3356cc6732fede7ca059168fc0565ed0b76 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:09 +0100
Subject: [PATCH 27/31] block: Don't poll in bdrv_replace_child_noperm()
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [15/16] 5fc7d6b703a2d6c1118d875056f0afbd6ba5cca9 (sgarzarella/qemu-kvm-c-9-s)
In order to make sure that bdrv_replace_child_noperm() doesn't have to
poll any more, get rid of the bdrv_parent_drained_begin_single() call.
This is possible now because we can require that the parent is already
drained through the child in question when the function is called and we
don't call the parent drain callbacks more than once.
The additional drain calls needed in callers cause the test case to run
its code in the drain handler too early (bdrv_attach_child() drains
now), so modify it to only enable the code after the test setup has
completed.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20221118174110.55183-15-kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 23987471285a26397e3152a9244b652445fd36c4)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 103 ++++++++++++++++++++++++++++++-----
block/io.c | 2 +-
include/block/block-io.h | 8 +++
tests/unit/test-bdrv-drain.c | 10 ++++
4 files changed, 108 insertions(+), 15 deletions(-)
diff --git a/block.c b/block.c
index af31a94863..65588d313a 100644
--- a/block.c
+++ b/block.c
@@ -2407,6 +2407,20 @@ static void bdrv_replace_child_abort(void *opaque)
GLOBAL_STATE_CODE();
/* old_bs reference is transparently moved from @s to @s->child */
+ if (!s->child->bs) {
+ /*
+ * The parents were undrained when removing old_bs from the child. New
+ * requests can't have been made, though, because the child was empty.
+ *
+ * TODO Make bdrv_replace_child_noperm() transactionable to avoid
+ * undraining the parent in the first place. Once this is done, having
+ * new_bs drained when calling bdrv_replace_child_tran() is not a
+ * requirement any more.
+ */
+ bdrv_parent_drained_begin_single(s->child, false);
+ assert(!bdrv_parent_drained_poll_single(s->child));
+ }
+ assert(s->child->quiesced_parent);
bdrv_replace_child_noperm(s->child, s->old_bs);
bdrv_unref(new_bs);
}
@@ -2422,12 +2436,19 @@ static TransactionActionDrv bdrv_replace_child_drv = {
*
* Note: real unref of old_bs is done only on commit.
*
+ * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be
+ * kept drained until the transaction is completed.
+ *
* The function doesn't update permissions, caller is responsible for this.
*/
static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
Transaction *tran)
{
BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
+
+ assert(child->quiesced_parent);
+ assert(!new_bs || new_bs->quiesce_counter);
+
*s = (BdrvReplaceChildState) {
.child = child,
.old_bs = child->bs,
@@ -2819,6 +2840,14 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
return permissions[qapi_perm];
}
+/*
+ * Replaces the node that a BdrvChild points to without updating permissions.
+ *
+ * If @new_bs is non-NULL, the parent of @child must already be drained through
+ * @child.
+ *
+ * This function does not poll.
+ */
static void bdrv_replace_child_noperm(BdrvChild *child,
BlockDriverState *new_bs)
{
@@ -2826,6 +2855,28 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
int new_bs_quiesce_counter;
assert(!child->frozen);
+
+ /*
+ * If we want to change the BdrvChild to point to a drained node as its new
+ * child->bs, we need to make sure that its new parent is drained, too. In
+ * other words, either child->quiesce_parent must already be true or we must
+ * be able to set it and keep the parent's quiesce_counter consistent with
+ * that, but without polling or starting new requests (this function
+ * guarantees that it doesn't poll, and starting new requests would be
+ * against the invariants of drain sections).
+ *
+ * To keep things simple, we pick the first option (child->quiesce_parent
+ * must already be true). We also generalise the rule a bit to make it
+ * easier to verify in callers and more likely to be covered in test cases:
+ * The parent must be quiesced through this child even if new_bs isn't
+ * currently drained.
+ *
+ * The only exception is for callers that always pass new_bs == NULL. In
+ * this case, we obviously never need to consider the case of a drained
+ * new_bs, so we can keep the callers simpler by allowing them not to drain
+ * the parent.
+ */
+ assert(!new_bs || child->quiesced_parent);
assert(old_bs != new_bs);
GLOBAL_STATE_CODE();
@@ -2833,15 +2884,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
}
- /*
- * If the new child node is drained but the old one was not, flush
- * all outstanding requests to the old child node.
- */
- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
- if (new_bs_quiesce_counter && !child->quiesced_parent) {
- bdrv_parent_drained_begin_single(child, true);
- }
-
if (old_bs) {
if (child->klass->detach) {
child->klass->detach(child);
@@ -2861,11 +2903,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
}
/*
- * If the old child node was drained but the new one is not, allow
- * requests to come in only after the new node has been attached.
- *
- * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single()
- * polls, which could have changed the value.
+ * If the parent was drained through this BdrvChild previously, but new_bs
+ * is not drained, allow requests to come in only after the new node has
+ * been attached.
*/
new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
if (!new_bs_quiesce_counter && child->quiesced_parent) {
@@ -3002,6 +3042,24 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
}
bdrv_ref(child_bs);
+ /*
+ * Let every new BdrvChild start with a drained parent. Inserting the child
+ * in the graph with bdrv_replace_child_noperm() will undrain it if
+ * @child_bs is not drained.
+ *
+ * The child was only just created and is not yet visible in global state
+ * until bdrv_replace_child_noperm() inserts it into the graph, so nobody
+ * could have sent requests and polling is not necessary.
+ *
+ * Note that this means that the parent isn't fully drained yet, we only
+ * stop new requests from coming in. This is fine, we don't care about the
+ * old requests here, they are not for this child. If another place enters a
+ * drain section for the same parent, but wants it to be fully quiesced, it
+ * will not run most of the the code in .drained_begin() again (which is not
+ * a problem, we already did this), but it will still poll until the parent
+ * is fully quiesced, so it will not be negatively affected either.
+ */
+ bdrv_parent_drained_begin_single(new_child, false);
bdrv_replace_child_noperm(new_child, child_bs);
BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
@@ -5059,12 +5117,24 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran)
}
if (child->bs) {
+ BlockDriverState *bs = child->bs;
+ bdrv_drained_begin(bs);
bdrv_replace_child_tran(child, NULL, tran);
+ bdrv_drained_end(bs);
}
tran_add(tran, &bdrv_remove_child_drv, child);
}
+static void undrain_on_clean_cb(void *opaque)
+{
+ bdrv_drained_end(opaque);
+}
+
+static TransactionActionDrv undrain_on_clean = {
+ .clean = undrain_on_clean_cb,
+};
+
static int bdrv_replace_node_noperm(BlockDriverState *from,
BlockDriverState *to,
bool auto_skip, Transaction *tran,
@@ -5074,6 +5144,11 @@ static int bdrv_replace_node_noperm(BlockDriverState *from,
GLOBAL_STATE_CODE();
+ bdrv_drained_begin(from);
+ bdrv_drained_begin(to);
+ tran_add(tran, &undrain_on_clean, from);
+ tran_add(tran, &undrain_on_clean, to);
+
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
assert(c->bs == from);
if (!should_update_child(c, to)) {
diff --git a/block/io.c b/block/io.c
index 5e9150d92c..ae64830eac 100644
--- a/block/io.c
+++ b/block/io.c
@@ -81,7 +81,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
}
}
-static bool bdrv_parent_drained_poll_single(BdrvChild *c)
+bool bdrv_parent_drained_poll_single(BdrvChild *c)
{
if (c->klass->drained_poll) {
return c->klass->drained_poll(c);
diff --git a/include/block/block-io.h b/include/block/block-io.h
index 8f5e75756a..65e6d2569b 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -292,6 +292,14 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
*/
void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
+/**
+ * bdrv_parent_drained_poll_single:
+ *
+ * Returns true if there is any pending activity to cease before @c can be
+ * called quiesced, false otherwise.
+ */
+bool bdrv_parent_drained_poll_single(BdrvChild *c);
+
/**
* bdrv_parent_drained_end_single:
*
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 172bc6debc..2686a8acee 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -1654,6 +1654,7 @@ static void test_drop_intermediate_poll(void)
typedef struct BDRVReplaceTestState {
+ bool setup_completed;
bool was_drained;
bool was_undrained;
bool has_read;
@@ -1738,6 +1739,10 @@ static void bdrv_replace_test_drain_begin(BlockDriverState *bs)
{
BDRVReplaceTestState *s = bs->opaque;
+ if (!s->setup_completed) {
+ return;
+ }
+
if (!s->drain_count) {
s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs);
bdrv_inc_in_flight(bs);
@@ -1769,6 +1774,10 @@ static void bdrv_replace_test_drain_end(BlockDriverState *bs)
{
BDRVReplaceTestState *s = bs->opaque;
+ if (!s->setup_completed) {
+ return;
+ }
+
g_assert(s->drain_count > 0);
if (!--s->drain_count) {
s->was_undrained = true;
@@ -1867,6 +1876,7 @@ static void do_test_replace_child_mid_drain(int old_drain_count,
bdrv_ref(old_child_bs);
bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds,
BDRV_CHILD_COW, &error_abort);
+ parent_s->setup_completed = true;
for (i = 0; i < old_drain_count; i++) {
bdrv_drained_begin(old_child_bs);
--
2.31.1

@ -1,54 +0,0 @@
From 6af6de77dace29aa8548b3649dc9c6163740ac86 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:03 +0100
Subject: [PATCH 21/31] block: Don't use subtree drains in
bdrv_drop_intermediate()
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [9/16] 3c06fa826f262558f57d38b0155500c2e8e23a53 (sgarzarella/qemu-kvm-c-9-s)
Instead of using a subtree drain from the top node (which also drains
child nodes of base that we're not even interested in), use a normal
drain for base, which automatically drains all of the parents, too.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20221118174110.55183-9-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 631086deefc32690ee56efed1c5b891dec31ae37)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/block.c b/block.c
index cb5e96b1cf..b3449a312e 100644
--- a/block.c
+++ b/block.c
@@ -5586,7 +5586,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
GLOBAL_STATE_CODE();
bdrv_ref(top);
- bdrv_subtree_drained_begin(top);
+ bdrv_drained_begin(base);
if (!top->drv || !base->drv) {
goto exit;
@@ -5659,7 +5659,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
ret = 0;
exit:
- bdrv_subtree_drained_end(top);
+ bdrv_drained_end(base);
bdrv_unref(top);
return ret;
}
--
2.31.1

@ -1,157 +0,0 @@
From ad52cb621daad45d3c2a0e2e670d6ca2e16690bd Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:02 +0100
Subject: [PATCH 20/31] block: Drain individual nodes during reopen
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [8/16] 5441b6f0ae9102ef40d1093e1db3084eea81e3b0 (sgarzarella/qemu-kvm-c-9-s)
bdrv_reopen() and friends use subtree drains as a lazy way of covering
all the nodes they touch. Turns out that this lazy way is a lot more
complicated than just draining the nodes individually, even not
accounting for the additional complexity in the drain mechanism itself.
Simplify the code by switching to draining the individual nodes that are
already managed in the BlockReopenQueue anyway.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20221118174110.55183-8-kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit d22933acd2f470eeef779e4d444e848f76dcfaf8)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 16 +++++++++-------
block/replication.c | 6 ------
blockdev.c | 13 -------------
3 files changed, 9 insertions(+), 26 deletions(-)
diff --git a/block.c b/block.c
index 46df410b07..cb5e96b1cf 100644
--- a/block.c
+++ b/block.c
@@ -4150,7 +4150,7 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
* returns a pointer to bs_queue, which is either the newly allocated
* bs_queue, or the existing bs_queue being used.
*
- * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
+ * bs is drained here and undrained by bdrv_reopen_queue_free().
*
* To be called with bs->aio_context locked.
*/
@@ -4172,12 +4172,10 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
int flags;
QemuOpts *opts;
- /* Make sure that the caller remembered to use a drained section. This is
- * important to avoid graph changes between the recursive queuing here and
- * bdrv_reopen_multiple(). */
- assert(bs->quiesce_counter > 0);
GLOBAL_STATE_CODE();
+ bdrv_drained_begin(bs);
+
if (bs_queue == NULL) {
bs_queue = g_new0(BlockReopenQueue, 1);
QTAILQ_INIT(bs_queue);
@@ -4328,6 +4326,12 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue)
if (bs_queue) {
BlockReopenQueueEntry *bs_entry, *next;
QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
+ AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs);
+
+ aio_context_acquire(ctx);
+ bdrv_drained_end(bs_entry->state.bs);
+ aio_context_release(ctx);
+
qobject_unref(bs_entry->state.explicit_options);
qobject_unref(bs_entry->state.options);
g_free(bs_entry);
@@ -4475,7 +4479,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
GLOBAL_STATE_CODE();
- bdrv_subtree_drained_begin(bs);
queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
if (ctx != qemu_get_aio_context()) {
@@ -4486,7 +4489,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
if (ctx != qemu_get_aio_context()) {
aio_context_acquire(ctx);
}
- bdrv_subtree_drained_end(bs);
return ret;
}
diff --git a/block/replication.c b/block/replication.c
index f1eed25e43..c62f48a874 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -374,9 +374,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs);
}
- bdrv_subtree_drained_begin(hidden_disk->bs);
- bdrv_subtree_drained_begin(secondary_disk->bs);
-
if (s->orig_hidden_read_only) {
QDict *opts = qdict_new();
qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
@@ -401,9 +398,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
aio_context_acquire(ctx);
}
}
-
- bdrv_subtree_drained_end(hidden_disk->bs);
- bdrv_subtree_drained_end(secondary_disk->bs);
}
static void backup_job_cleanup(BlockDriverState *bs)
diff --git a/blockdev.c b/blockdev.c
index 3f1dec6242..8ffb3d9537 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3547,8 +3547,6 @@ fail:
void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
{
BlockReopenQueue *queue = NULL;
- GSList *drained = NULL;
- GSList *p;
/* Add each one of the BDS that we want to reopen to the queue */
for (; reopen_list != NULL; reopen_list = reopen_list->next) {
@@ -3585,9 +3583,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
ctx = bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
- bdrv_subtree_drained_begin(bs);
queue = bdrv_reopen_queue(queue, bs, qdict, false);
- drained = g_slist_prepend(drained, bs);
aio_context_release(ctx);
}
@@ -3598,15 +3594,6 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
fail:
bdrv_reopen_queue_free(queue);
- for (p = drained; p; p = p->next) {
- BlockDriverState *bs = p->data;
- AioContext *ctx = bdrv_get_aio_context(bs);
-
- aio_context_acquire(ctx);
- bdrv_subtree_drained_end(bs);
- aio_context_release(ctx);
- }
- g_slist_free(drained);
}
void qmp_blockdev_del(const char *node_name, Error **errp)
--
2.31.1

@ -1,96 +0,0 @@
From 9a789d104a4a69031ad95d7fad6380ab21e82503 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:08 +0100
Subject: [PATCH 26/31] block: Drop out of coroutine in
bdrv_do_drained_begin_quiesce()
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [14/16] c9266663b822f703e55b6a07de98ceb56e69e924 (sgarzarella/qemu-kvm-c-9-s)
The next patch adds a parent drain to bdrv_attach_child_common(), which
shouldn't be, but is currently called from coroutines in some cases (e.g.
.bdrv_co_create implementations generally open new nodes). Therefore,
the assertion that we're not in a coroutine doesn't hold true any more.
We could just remove the assertion because there is nothing in the
function that should be in conflict with running in a coroutine, but
just to be on the safe side, we can reverse the caller relationship
between bdrv_do_drained_begin() and bdrv_do_drained_begin_quiesce() so
that the latter also just drops out of coroutine context and we can
still be certain in the future that any drain code doesn't run in
coroutines.
As a nice side effect, the structure of bdrv_do_drained_begin() is now
symmetrical with bdrv_do_drained_end().
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20221118174110.55183-14-kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 05c272ff0cf1b16cc3606f746182dd99b774f553)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/io.c | 25 ++++++++++++-------------
1 file changed, 12 insertions(+), 13 deletions(-)
diff --git a/block/io.c b/block/io.c
index 2e9503df6a..5e9150d92c 100644
--- a/block/io.c
+++ b/block/io.c
@@ -346,10 +346,15 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
}
}
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
+ bool poll)
{
IO_OR_GS_CODE();
- assert(!qemu_in_coroutine());
+
+ if (qemu_in_coroutine()) {
+ bdrv_co_yield_to_drain(bs, true, parent, poll);
+ return;
+ }
/* Stop things in parent-to-child order */
if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
@@ -359,17 +364,6 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
bs->drv->bdrv_drain_begin(bs);
}
}
-}
-
-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
- bool poll)
-{
- if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(bs, true, parent, poll);
- return;
- }
-
- bdrv_do_drained_begin_quiesce(bs, parent);
/*
* Wait for drained requests to finish.
@@ -385,6 +379,11 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
}
}
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
+{
+ bdrv_do_drained_begin(bs, parent, false);
+}
+
void bdrv_drained_begin(BlockDriverState *bs)
{
IO_OR_GS_CODE();
--
2.31.1

@ -1,67 +0,0 @@
From e790b4c20a5124239fe93e91fbc87745e5f2cea6 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:01 +0100
Subject: [PATCH 19/31] block: Fix locking for bdrv_reopen_queue_child()
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [7/16] 46bb54506c4400b9a1bf66b6bd7987ff67260003 (sgarzarella/qemu-kvm-c-9-s)
Callers don't agree whether bdrv_reopen_queue_child() should be called
with the AioContext lock held or not. Standardise on holding the lock
(as done by QMP blockdev-reopen and the replication block driver) and
fix bdrv_reopen() to do the same.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20221118174110.55183-7-kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 2e117866d7c96cc17e84cd2946fee1bf3292d814)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/block.c b/block.c
index 7999fd08c5..46df410b07 100644
--- a/block.c
+++ b/block.c
@@ -4151,6 +4151,8 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
* bs_queue, or the existing bs_queue being used.
*
* bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
+ *
+ * To be called with bs->aio_context locked.
*/
static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
BlockDriverState *bs,
@@ -4309,6 +4311,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
return bs_queue;
}
+/* To be called with bs->aio_context locked */
BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
BlockDriverState *bs,
QDict *options, bool keep_old_opts)
@@ -4473,11 +4476,11 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
GLOBAL_STATE_CODE();
bdrv_subtree_drained_begin(bs);
+ queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
+
if (ctx != qemu_get_aio_context()) {
aio_context_release(ctx);
}
-
- queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
ret = bdrv_reopen_multiple(queue, errp);
if (ctx != qemu_get_aio_context()) {
--
2.31.1

@ -0,0 +1,73 @@
From 547f6bf93734f7c13675eebb93273ef2273f7c31 Mon Sep 17 00:00:00 2001
From: Hanna Czenczek <hreitz@redhat.com>
Date: Fri, 14 Jul 2023 10:59:38 +0200
Subject: [PATCH 5/9] block: Fix pad_request's request restriction
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX
RH-Bugzilla: 2174676
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [5/5] e8abc0485f6e0608a1ec55143ff40a14d273dfc8 (hreitz/qemu-kvm-c-9-s)
bdrv_pad_request() relies on requests' lengths not to exceed SIZE_MAX,
which bdrv_check_qiov_request() does not guarantee.
bdrv_check_request32() however will guarantee this, and both of
bdrv_pad_request()'s callers (bdrv_co_preadv_part() and
bdrv_co_pwritev_part()) already run it before calling
bdrv_pad_request(). Therefore, bdrv_pad_request() can safely call
bdrv_check_request32() without expecting error, too.
In effect, this patch will not change guest-visible behavior. It is a
clean-up to tighten a condition to match what is guaranteed by our
callers, and which exists purely to show clearly why the subsequent
assertion (`assert(*bytes <= SIZE_MAX)`) is always true.
Note there is a difference between the interfaces of
bdrv_check_qiov_request() and bdrv_check_request32(): The former takes
an errp, the latter does not, so we can no longer just pass
&error_abort. Instead, we need to check the returned value. While we
do expect success (because the callers have already run this function),
an assert(ret == 0) is not much simpler than just to return an error if
it occurs, so let us handle errors by returning them up the stack now.
Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
Message-id: 20230714085938.202730-1-hreitz@redhat.com
Fixes: 18743311b829cafc1737a5f20bc3248d5f91ee2a
("block: Collapse padded I/O vecs exceeding IOV_MAX")
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block/io.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/block/io.c b/block/io.c
index 4e8e90208b..807c9fb720 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1708,7 +1708,11 @@ static int bdrv_pad_request(BlockDriverState *bs,
int sliced_niov;
size_t sliced_head, sliced_tail;
- bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
+ /* Should have been checked by the caller already */
+ ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset);
+ if (ret < 0) {
+ return ret;
+ }
if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
if (padded) {
@@ -1721,7 +1725,7 @@ static int bdrv_pad_request(BlockDriverState *bs,
&sliced_head, &sliced_tail,
&sliced_niov);
- /* Guaranteed by bdrv_check_qiov_request() */
+ /* Guaranteed by bdrv_check_request32() */
assert(*bytes <= SIZE_MAX);
ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
sliced_head, *bytes);
--
2.39.3

@ -1,132 +0,0 @@
From 074c89b05dae971c7118cb769fd34e22135c8f4c Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 20 Jun 2022 18:26:53 +0200
Subject: [PATCH 06/20] block: Improve empty format-specific info dump
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
RH-Bugzilla: 1860292
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [1/12] be551e83f426e620e673302198b51368bfd324ce (hreitz/qemu-kvm-c-9-s)
When a block driver supports obtaining format-specific information, but
that object only contains optional fields, it is possible that none of
them are present, so that dump_qobject() (called by
bdrv_image_info_specific_dump()) will not print anything.
The callers of bdrv_image_info_specific_dump() put a header above this
information ("Format specific information:\n"), which will look strange
when there is nothing below. Modify bdrv_image_info_specific_dump() to
print this header instead of its callers, and only if there is indeed
something to be printed.
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220620162704.80987-2-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 3716470b24f0f63090d59bcf28ad8fe6fb7835bd)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/qapi.c | 41 +++++++++++++++++++++++++++++++++++++----
include/block/qapi.h | 3 ++-
qemu-io-cmds.c | 4 ++--
3 files changed, 41 insertions(+), 7 deletions(-)
diff --git a/block/qapi.c b/block/qapi.c
index cf557e3aea..51202b470a 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -777,7 +777,35 @@ static void dump_qdict(int indentation, QDict *dict)
}
}
-void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec)
+/*
+ * Return whether dumping the given QObject with dump_qobject() would
+ * yield an empty dump, i.e. not print anything.
+ */
+static bool qobject_is_empty_dump(const QObject *obj)
+{
+ switch (qobject_type(obj)) {
+ case QTYPE_QNUM:
+ case QTYPE_QSTRING:
+ case QTYPE_QBOOL:
+ return false;
+
+ case QTYPE_QDICT:
+ return qdict_size(qobject_to(QDict, obj)) == 0;
+
+ case QTYPE_QLIST:
+ return qlist_empty(qobject_to(QList, obj));
+
+ default:
+ abort();
+ }
+}
+
+/**
+ * Dumps the given ImageInfoSpecific object in a human-readable form,
+ * prepending an optional prefix if the dump is not empty.
+ */
+void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
+ const char *prefix)
{
QObject *obj, *data;
Visitor *v = qobject_output_visitor_new(&obj);
@@ -785,7 +813,12 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec)
visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort);
visit_complete(v, &obj);
data = qdict_get(qobject_to(QDict, obj), "data");
- dump_qobject(1, data);
+ if (!qobject_is_empty_dump(data)) {
+ if (prefix) {
+ qemu_printf("%s", prefix);
+ }
+ dump_qobject(1, data);
+ }
qobject_unref(obj);
visit_free(v);
}
@@ -866,7 +899,7 @@ void bdrv_image_info_dump(ImageInfo *info)
}
if (info->has_format_specific) {
- qemu_printf("Format specific information:\n");
- bdrv_image_info_specific_dump(info->format_specific);
+ bdrv_image_info_specific_dump(info->format_specific,
+ "Format specific information:\n");
}
}
diff --git a/include/block/qapi.h b/include/block/qapi.h
index 22c7807c89..c09859ea78 100644
--- a/include/block/qapi.h
+++ b/include/block/qapi.h
@@ -40,6 +40,7 @@ void bdrv_query_image_info(BlockDriverState *bs,
Error **errp);
void bdrv_snapshot_dump(QEMUSnapshotInfo *sn);
-void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec);
+void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
+ const char *prefix);
void bdrv_image_info_dump(ImageInfo *info);
#endif
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index 952dc940f1..f4a374528e 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -1825,8 +1825,8 @@ static int info_f(BlockBackend *blk, int argc, char **argv)
return -EIO;
}
if (spec_info) {
- printf("Format specific information:\n");
- bdrv_image_info_specific_dump(spec_info);
+ bdrv_image_info_specific_dump(spec_info,
+ "Format specific information:\n");
qapi_free_ImageInfoSpecific(spec_info);
}
--
2.31.1

@ -1,81 +0,0 @@
From 1808e560396872173f787f8e338e9837a4c3d626 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:00 +0100
Subject: [PATCH 18/31] block: Inline bdrv_drain_invoke()
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [6/16] 2c7473a36360eb43d94b967deb12308cb5ea0d3b (sgarzarella/qemu-kvm-c-9-s)
bdrv_drain_invoke() has now two entirely separate cases that share no
code any more and are selected depending on a bool parameter. Each case
has only one caller. Just inline the function.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20221118174110.55183-6-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit c7bc05f78ab31fb02fc9635f60b9bd22efc8d121)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/io.c | 23 ++++++-----------------
1 file changed, 6 insertions(+), 17 deletions(-)
diff --git a/block/io.c b/block/io.c
index f4ca62b034..a25103be6f 100644
--- a/block/io.c
+++ b/block/io.c
@@ -242,21 +242,6 @@ typedef struct {
bool ignore_bds_parents;
} BdrvCoDrainData;
-/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */
-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
-{
- if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) ||
- (!begin && !bs->drv->bdrv_drain_end)) {
- return;
- }
-
- if (begin) {
- bs->drv->bdrv_drain_begin(bs);
- } else {
- bs->drv->bdrv_drain_end(bs);
- }
-}
-
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
BdrvChild *ignore_parent, bool ignore_bds_parents)
@@ -390,7 +375,9 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
}
bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
- bdrv_drain_invoke(bs, true);
+ if (bs->drv && bs->drv->bdrv_drain_begin) {
+ bs->drv->bdrv_drain_begin(bs);
+ }
}
static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
@@ -461,7 +448,9 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
assert(bs->quiesce_counter > 0);
/* Re-enable things in child-to-parent order */
- bdrv_drain_invoke(bs, false);
+ if (bs->drv && bs->drv->bdrv_drain_end) {
+ bs->drv->bdrv_drain_end(bs);
+ }
bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
--
2.31.1

@ -1,433 +0,0 @@
From 3009e49f242ab371ffad35bb29c2c26ddfac75d4 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:40:59 +0100
Subject: [PATCH 17/31] block: Remove drained_end_counter
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [5/16] 5589e3f05dece5394a05641f7f42096e8dc62bdb (sgarzarella/qemu-kvm-c-9-s)
drained_end_counter is unused now, nobody changes its value any more. It
can be removed.
In cases where we had two almost identical functions that only differed
in whether the caller passes drained_end_counter, or whether they would
poll for a local drained_end_counter to reach 0, these become a single
function.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Message-Id: <20221118174110.55183-5-kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 2f65df6e16dea2d6e7212fa675f4779d9281e26f)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 5 +-
block/block-backend.c | 4 +-
block/io.c | 98 ++++++++------------------------
blockjob.c | 2 +-
include/block/block-io.h | 24 --------
include/block/block_int-common.h | 6 +-
6 files changed, 30 insertions(+), 109 deletions(-)
diff --git a/block.c b/block.c
index 16a62a329c..7999fd08c5 100644
--- a/block.c
+++ b/block.c
@@ -1235,11 +1235,10 @@ static bool bdrv_child_cb_drained_poll(BdrvChild *child)
return bdrv_drain_poll(bs, false, NULL, false);
}
-static void bdrv_child_cb_drained_end(BdrvChild *child,
- int *drained_end_counter)
+static void bdrv_child_cb_drained_end(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
- bdrv_drained_end_no_poll(bs, drained_end_counter);
+ bdrv_drained_end(bs);
}
static int bdrv_child_cb_inactivate(BdrvChild *child)
diff --git a/block/block-backend.c b/block/block-backend.c
index d98a96ff37..feaf2181fa 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -129,7 +129,7 @@ static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format,
}
static void blk_root_drained_begin(BdrvChild *child);
static bool blk_root_drained_poll(BdrvChild *child);
-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter);
+static void blk_root_drained_end(BdrvChild *child);
static void blk_root_change_media(BdrvChild *child, bool load);
static void blk_root_resize(BdrvChild *child);
@@ -2556,7 +2556,7 @@ static bool blk_root_drained_poll(BdrvChild *child)
return busy || !!blk->in_flight;
}
-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
+static void blk_root_drained_end(BdrvChild *child)
{
BlockBackend *blk = child->opaque;
assert(blk->quiesce_counter);
diff --git a/block/io.c b/block/io.c
index c2ed4b2af9..f4ca62b034 100644
--- a/block/io.c
+++ b/block/io.c
@@ -58,28 +58,19 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
}
}
-static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c,
- int *drained_end_counter)
+void bdrv_parent_drained_end_single(BdrvChild *c)
{
+ IO_OR_GS_CODE();
+
assert(c->parent_quiesce_counter > 0);
c->parent_quiesce_counter--;
if (c->klass->drained_end) {
- c->klass->drained_end(c, drained_end_counter);
+ c->klass->drained_end(c);
}
}
-void bdrv_parent_drained_end_single(BdrvChild *c)
-{
- int drained_end_counter = 0;
- AioContext *ctx = bdrv_child_get_parent_aio_context(c);
- IO_OR_GS_CODE();
- bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter);
- AIO_WAIT_WHILE(ctx, qatomic_read(&drained_end_counter) > 0);
-}
-
static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
- bool ignore_bds_parents,
- int *drained_end_counter)
+ bool ignore_bds_parents)
{
BdrvChild *c;
@@ -87,7 +78,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
continue;
}
- bdrv_parent_drained_end_single_no_poll(c, drained_end_counter);
+ bdrv_parent_drained_end_single(c);
}
}
@@ -249,12 +240,10 @@ typedef struct {
bool poll;
BdrvChild *parent;
bool ignore_bds_parents;
- int *drained_end_counter;
} BdrvCoDrainData;
/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */
-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
- int *drained_end_counter)
+static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
{
if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) ||
(!begin && !bs->drv->bdrv_drain_end)) {
@@ -305,8 +294,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
BdrvChild *parent, bool ignore_bds_parents,
bool poll);
static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
- BdrvChild *parent, bool ignore_bds_parents,
- int *drained_end_counter);
+ BdrvChild *parent, bool ignore_bds_parents);
static void bdrv_co_drain_bh_cb(void *opaque)
{
@@ -319,14 +307,12 @@ static void bdrv_co_drain_bh_cb(void *opaque)
aio_context_acquire(ctx);
bdrv_dec_in_flight(bs);
if (data->begin) {
- assert(!data->drained_end_counter);
bdrv_do_drained_begin(bs, data->recursive, data->parent,
data->ignore_bds_parents, data->poll);
} else {
assert(!data->poll);
bdrv_do_drained_end(bs, data->recursive, data->parent,
- data->ignore_bds_parents,
- data->drained_end_counter);
+ data->ignore_bds_parents);
}
aio_context_release(ctx);
} else {
@@ -342,8 +328,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
bool begin, bool recursive,
BdrvChild *parent,
bool ignore_bds_parents,
- bool poll,
- int *drained_end_counter)
+ bool poll)
{
BdrvCoDrainData data;
Coroutine *self = qemu_coroutine_self();
@@ -363,7 +348,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
.parent = parent,
.ignore_bds_parents = ignore_bds_parents,
.poll = poll,
- .drained_end_counter = drained_end_counter,
};
if (bs) {
@@ -406,7 +390,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
}
bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
- bdrv_drain_invoke(bs, true, NULL);
+ bdrv_drain_invoke(bs, true);
}
static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
@@ -417,7 +401,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
if (qemu_in_coroutine()) {
bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
- poll, NULL);
+ poll);
return;
}
@@ -461,38 +445,24 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs)
/**
* This function does not poll, nor must any of its recursively called
- * functions. The *drained_end_counter pointee will be incremented
- * once for every background operation scheduled, and decremented once
- * the operation settles. Therefore, the pointer must remain valid
- * until the pointee reaches 0. That implies that whoever sets up the
- * pointee has to poll until it is 0.
- *
- * We use atomic operations to access *drained_end_counter, because
- * (1) when called from bdrv_set_aio_context_ignore(), the subgraph of
- * @bs may contain nodes in different AioContexts,
- * (2) bdrv_drain_all_end() uses the same counter for all nodes,
- * regardless of which AioContext they are in.
+ * functions.
*/
static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
- BdrvChild *parent, bool ignore_bds_parents,
- int *drained_end_counter)
+ BdrvChild *parent, bool ignore_bds_parents)
{
BdrvChild *child;
int old_quiesce_counter;
- assert(drained_end_counter != NULL);
-
if (qemu_in_coroutine()) {
bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
- false, drained_end_counter);
+ false);
return;
}
assert(bs->quiesce_counter > 0);
/* Re-enable things in child-to-parent order */
- bdrv_drain_invoke(bs, false, drained_end_counter);
- bdrv_parent_drained_end(bs, parent, ignore_bds_parents,
- drained_end_counter);
+ bdrv_drain_invoke(bs, false);
+ bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
if (old_quiesce_counter == 1) {
@@ -503,32 +473,21 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
assert(!ignore_bds_parents);
bs->recursive_quiesce_counter--;
QLIST_FOREACH(child, &bs->children, next) {
- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents,
- drained_end_counter);
+ bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents);
}
}
}
void bdrv_drained_end(BlockDriverState *bs)
{
- int drained_end_counter = 0;
IO_OR_GS_CODE();
- bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter);
- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
-}
-
-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter)
-{
- IO_CODE();
- bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter);
+ bdrv_do_drained_end(bs, false, NULL, false);
}
void bdrv_subtree_drained_end(BlockDriverState *bs)
{
- int drained_end_counter = 0;
IO_OR_GS_CODE();
- bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter);
- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
+ bdrv_do_drained_end(bs, true, NULL, false);
}
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
@@ -543,16 +502,12 @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
{
- int drained_end_counter = 0;
int i;
IO_OR_GS_CODE();
for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
- bdrv_do_drained_end(child->bs, true, child, false,
- &drained_end_counter);
+ bdrv_do_drained_end(child->bs, true, child, false);
}
-
- BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0);
}
void bdrv_drain(BlockDriverState *bs)
@@ -610,7 +565,7 @@ void bdrv_drain_all_begin(void)
GLOBAL_STATE_CODE();
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
+ bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true);
return;
}
@@ -649,22 +604,19 @@ void bdrv_drain_all_begin(void)
void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
{
- int drained_end_counter = 0;
GLOBAL_STATE_CODE();
g_assert(bs->quiesce_counter > 0);
g_assert(!bs->refcnt);
while (bs->quiesce_counter) {
- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
+ bdrv_do_drained_end(bs, false, NULL, true);
}
- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
}
void bdrv_drain_all_end(void)
{
BlockDriverState *bs = NULL;
- int drained_end_counter = 0;
GLOBAL_STATE_CODE();
/*
@@ -680,13 +632,11 @@ void bdrv_drain_all_end(void)
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
+ bdrv_do_drained_end(bs, false, NULL, true);
aio_context_release(aio_context);
}
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
- AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0);
-
assert(bdrv_drain_all_count > 0);
bdrv_drain_all_count--;
}
diff --git a/blockjob.c b/blockjob.c
index f51d4e18f3..0ab721e139 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -120,7 +120,7 @@ static bool child_job_drained_poll(BdrvChild *c)
}
}
-static void child_job_drained_end(BdrvChild *c, int *drained_end_counter)
+static void child_job_drained_end(BdrvChild *c)
{
BlockJob *job = c->opaque;
job_resume(&job->job);
diff --git a/include/block/block-io.h b/include/block/block-io.h
index b099d7db45..054e964c9b 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -237,21 +237,6 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
int64_t bytes, BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags);
-/**
- * bdrv_drained_end_no_poll:
- *
- * Same as bdrv_drained_end(), but do not poll for the subgraph to
- * actually become unquiesced. Therefore, no graph changes will occur
- * with this function.
- *
- * *drained_end_counter is incremented for every background operation
- * that is scheduled, and will be decremented for every operation once
- * it settles. The caller must poll until it reaches 0. The counter
- * should be accessed using atomic operations only.
- */
-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter);
-
-
/*
* "I/O or GS" API functions. These functions can run without
* the BQL, but only in one specific iothread/main loop.
@@ -311,9 +296,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
* bdrv_parent_drained_end_single:
*
* End a quiesced section for the parent of @c.
- *
- * This polls @bs's AioContext until all scheduled sub-drained_ends
- * have settled, which may result in graph changes.
*/
void bdrv_parent_drained_end_single(BdrvChild *c);
@@ -361,12 +343,6 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs);
* bdrv_drained_end:
*
* End a quiescent section started by bdrv_drained_begin().
- *
- * This polls @bs's AioContext until all scheduled sub-drained_ends
- * have settled. On one hand, that may result in graph changes. On
- * the other, this requires that the caller either runs in the main
- * loop; or that all involved nodes (@bs and all of its parents) are
- * in the caller's AioContext.
*/
void bdrv_drained_end(BlockDriverState *bs);
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 40d646d1ed..2b97576f6d 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -939,15 +939,11 @@ struct BdrvChildClass {
* These functions must not change the graph (and therefore also must not
* call aio_poll(), which could change the graph indirectly).
*
- * If drained_end() schedules background operations, it must atomically
- * increment *drained_end_counter for each such operation and atomically
- * decrement it once the operation has settled.
- *
* Note that this can be nested. If drained_begin() was called twice, new
* I/O is allowed only after drained_end() was called twice, too.
*/
void (*drained_begin)(BdrvChild *child);
- void (*drained_end)(BdrvChild *child, int *drained_end_counter);
+ void (*drained_end)(BdrvChild *child);
/*
* Returns whether the parent has pending requests for the child. This
--
2.31.1

@ -1,274 +0,0 @@
From 0dc7990533cef41e58579ee96315aca1fdc44ea1 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:07 +0100
Subject: [PATCH 25/31] block: Remove ignore_bds_parents parameter from
drain_begin/end.
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [13/16] 1ed88d975a9569bffeb33ad847874417780ce408 (sgarzarella/qemu-kvm-c-9-s)
ignore_bds_parents is now ignored during drain_begin and drain_end, so
we can just remove it there. It is still a valid optimisation for
drain_all in bdrv_drained_poll(), so leave it around there.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20221118174110.55183-13-kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit a82a3bd135078d14f1bb4b5e50f51e77d3748270)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 2 +-
block/io.c | 58 +++++++++++++++-------------------------
include/block/block-io.h | 3 +--
3 files changed, 24 insertions(+), 39 deletions(-)
diff --git a/block.c b/block.c
index 5a583e260d..af31a94863 100644
--- a/block.c
+++ b/block.c
@@ -1226,7 +1226,7 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c)
static void bdrv_child_cb_drained_begin(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
- bdrv_do_drained_begin_quiesce(bs, NULL, false);
+ bdrv_do_drained_begin_quiesce(bs, NULL);
}
static bool bdrv_child_cb_drained_poll(BdrvChild *child)
diff --git a/block/io.c b/block/io.c
index 87d6f22ec4..2e9503df6a 100644
--- a/block/io.c
+++ b/block/io.c
@@ -45,13 +45,12 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs);
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int64_t bytes, BdrvRequestFlags flags);
-static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
- bool ignore_bds_parents)
+static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
{
BdrvChild *c, *next;
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
+ if (c == ignore) {
continue;
}
bdrv_parent_drained_begin_single(c, false);
@@ -70,13 +69,12 @@ void bdrv_parent_drained_end_single(BdrvChild *c)
}
}
-static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
- bool ignore_bds_parents)
+static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
{
BdrvChild *c;
QLIST_FOREACH(c, &bs->parents, next_parent) {
- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
+ if (c == ignore) {
continue;
}
bdrv_parent_drained_end_single(c);
@@ -242,7 +240,6 @@ typedef struct {
bool begin;
bool poll;
BdrvChild *parent;
- bool ignore_bds_parents;
} BdrvCoDrainData;
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
@@ -269,9 +266,8 @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
}
static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
- bool ignore_bds_parents, bool poll);
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
- bool ignore_bds_parents);
+ bool poll);
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent);
static void bdrv_co_drain_bh_cb(void *opaque)
{
@@ -284,11 +280,10 @@ static void bdrv_co_drain_bh_cb(void *opaque)
aio_context_acquire(ctx);
bdrv_dec_in_flight(bs);
if (data->begin) {
- bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents,
- data->poll);
+ bdrv_do_drained_begin(bs, data->parent, data->poll);
} else {
assert(!data->poll);
- bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents);
+ bdrv_do_drained_end(bs, data->parent);
}
aio_context_release(ctx);
} else {
@@ -303,7 +298,6 @@ static void bdrv_co_drain_bh_cb(void *opaque)
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
bool begin,
BdrvChild *parent,
- bool ignore_bds_parents,
bool poll)
{
BdrvCoDrainData data;
@@ -321,7 +315,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
.done = false,
.begin = begin,
.parent = parent,
- .ignore_bds_parents = ignore_bds_parents,
.poll = poll,
};
@@ -353,8 +346,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
}
}
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
- BdrvChild *parent, bool ignore_bds_parents)
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
{
IO_OR_GS_CODE();
assert(!qemu_in_coroutine());
@@ -362,9 +354,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
/* Stop things in parent-to-child order */
if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
aio_disable_external(bdrv_get_aio_context(bs));
-
- /* TODO Remove ignore_bds_parents, we don't consider it any more */
- bdrv_parent_drained_begin(bs, parent, false);
+ bdrv_parent_drained_begin(bs, parent);
if (bs->drv && bs->drv->bdrv_drain_begin) {
bs->drv->bdrv_drain_begin(bs);
}
@@ -372,14 +362,14 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
}
static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
- bool ignore_bds_parents, bool poll)
+ bool poll)
{
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll);
+ bdrv_co_yield_to_drain(bs, true, parent, poll);
return;
}
- bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
+ bdrv_do_drained_begin_quiesce(bs, parent);
/*
* Wait for drained requests to finish.
@@ -391,7 +381,6 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
* nodes.
*/
if (poll) {
- assert(!ignore_bds_parents);
BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
}
}
@@ -399,20 +388,19 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
void bdrv_drained_begin(BlockDriverState *bs)
{
IO_OR_GS_CODE();
- bdrv_do_drained_begin(bs, NULL, false, true);
+ bdrv_do_drained_begin(bs, NULL, true);
}
/**
* This function does not poll, nor must any of its recursively called
* functions.
*/
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
- bool ignore_bds_parents)
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
{
int old_quiesce_counter;
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false);
+ bdrv_co_yield_to_drain(bs, false, parent, false);
return;
}
assert(bs->quiesce_counter > 0);
@@ -423,9 +411,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
if (bs->drv && bs->drv->bdrv_drain_end) {
bs->drv->bdrv_drain_end(bs);
}
- /* TODO Remove ignore_bds_parents, we don't consider it any more */
- bdrv_parent_drained_end(bs, parent, false);
-
+ bdrv_parent_drained_end(bs, parent);
aio_enable_external(bdrv_get_aio_context(bs));
}
}
@@ -433,7 +419,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
void bdrv_drained_end(BlockDriverState *bs)
{
IO_OR_GS_CODE();
- bdrv_do_drained_end(bs, NULL, false);
+ bdrv_do_drained_end(bs, NULL);
}
void bdrv_drain(BlockDriverState *bs)
@@ -491,7 +477,7 @@ void bdrv_drain_all_begin(void)
GLOBAL_STATE_CODE();
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(NULL, true, NULL, true, true);
+ bdrv_co_yield_to_drain(NULL, true, NULL, true);
return;
}
@@ -516,7 +502,7 @@ void bdrv_drain_all_begin(void)
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- bdrv_do_drained_begin(bs, NULL, true, false);
+ bdrv_do_drained_begin(bs, NULL, false);
aio_context_release(aio_context);
}
@@ -536,7 +522,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
g_assert(!bs->refcnt);
while (bs->quiesce_counter) {
- bdrv_do_drained_end(bs, NULL, true);
+ bdrv_do_drained_end(bs, NULL);
}
}
@@ -558,7 +544,7 @@ void bdrv_drain_all_end(void)
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- bdrv_do_drained_end(bs, NULL, true);
+ bdrv_do_drained_end(bs, NULL);
aio_context_release(aio_context);
}
diff --git a/include/block/block-io.h b/include/block/block-io.h
index 9c36a16a1f..8f5e75756a 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -329,8 +329,7 @@ void bdrv_drained_begin(BlockDriverState *bs);
* Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
* running requests to complete.
*/
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
- BdrvChild *parent, bool ignore_bds_parents);
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent);
/**
* bdrv_drained_end:
--
2.31.1

@ -1,106 +0,0 @@
From 60b66881fb972e1cdff1cd7b4c865e5e21c141b0 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:10 +0100
Subject: [PATCH 28/31] block: Remove poll parameter from
bdrv_parent_drained_begin_single()
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [16/16] fd526cc9e5bebeb256cfa56d23ec596f26caa37a (sgarzarella/qemu-kvm-c-9-s)
All callers of bdrv_parent_drained_begin_single() pass poll=false now,
so we don't need the parameter any more.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20221118174110.55183-16-kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 606ed756c1d69cba4822be8923248d2fd714f069)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 4 ++--
block/io.c | 8 ++------
include/block/block-io.h | 5 ++---
3 files changed, 6 insertions(+), 11 deletions(-)
diff --git a/block.c b/block.c
index 65588d313a..0d78711416 100644
--- a/block.c
+++ b/block.c
@@ -2417,7 +2417,7 @@ static void bdrv_replace_child_abort(void *opaque)
* new_bs drained when calling bdrv_replace_child_tran() is not a
* requirement any more.
*/
- bdrv_parent_drained_begin_single(s->child, false);
+ bdrv_parent_drained_begin_single(s->child);
assert(!bdrv_parent_drained_poll_single(s->child));
}
assert(s->child->quiesced_parent);
@@ -3059,7 +3059,7 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
* a problem, we already did this), but it will still poll until the parent
* is fully quiesced, so it will not be negatively affected either.
*/
- bdrv_parent_drained_begin_single(new_child, false);
+ bdrv_parent_drained_begin_single(new_child);
bdrv_replace_child_noperm(new_child, child_bs);
BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
diff --git a/block/io.c b/block/io.c
index ae64830eac..38e57d1f67 100644
--- a/block/io.c
+++ b/block/io.c
@@ -53,7 +53,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
if (c == ignore) {
continue;
}
- bdrv_parent_drained_begin_single(c, false);
+ bdrv_parent_drained_begin_single(c);
}
}
@@ -105,9 +105,8 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
return busy;
}
-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
+void bdrv_parent_drained_begin_single(BdrvChild *c)
{
- AioContext *ctx = bdrv_child_get_parent_aio_context(c);
IO_OR_GS_CODE();
assert(!c->quiesced_parent);
@@ -116,9 +115,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
if (c->klass->drained_begin) {
c->klass->drained_begin(c);
}
- if (poll) {
- AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c));
- }
}
static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
diff --git a/include/block/block-io.h b/include/block/block-io.h
index 65e6d2569b..92aaa7c1e9 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -287,10 +287,9 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
/**
* bdrv_parent_drained_begin_single:
*
- * Begin a quiesced section for the parent of @c. If @poll is true, wait for
- * any pending activity to cease.
+ * Begin a quiesced section for the parent of @c.
*/
-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
+void bdrv_parent_drained_begin_single(BdrvChild *c);
/**
* bdrv_parent_drained_poll_single:
--
2.31.1

@ -1,896 +0,0 @@
From 79063522861cb2baf921b204bcdf4c3bfb5697f4 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:05 +0100
Subject: [PATCH 23/31] block: Remove subtree drains
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [11/16] d92f5041cceeeec49a65441b22d20f692c0f1c77 (sgarzarella/qemu-kvm-c-9-s)
Subtree drains are not used any more. Remove them.
After this, BdrvChildClass.attach/detach() don't poll any more.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20221118174110.55183-11-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 299403aedaeb7f08d8e98aa8614b29d4e5546066)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 20 +--
block/io.c | 121 +++-----------
include/block/block-io.h | 18 +--
include/block/block_int-common.h | 1 -
include/block/block_int-io.h | 12 --
tests/unit/test-bdrv-drain.c | 261 ++-----------------------------
6 files changed, 44 insertions(+), 389 deletions(-)
diff --git a/block.c b/block.c
index 5330e89903..e0e3b21790 100644
--- a/block.c
+++ b/block.c
@@ -1232,7 +1232,7 @@ static void bdrv_child_cb_drained_begin(BdrvChild *child)
static bool bdrv_child_cb_drained_poll(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
- return bdrv_drain_poll(bs, false, NULL, false);
+ return bdrv_drain_poll(bs, NULL, false);
}
static void bdrv_child_cb_drained_end(BdrvChild *child)
@@ -1482,8 +1482,6 @@ static void bdrv_child_cb_attach(BdrvChild *child)
assert(!bs->file);
bs->file = child;
}
-
- bdrv_apply_subtree_drain(child, bs);
}
static void bdrv_child_cb_detach(BdrvChild *child)
@@ -1494,8 +1492,6 @@ static void bdrv_child_cb_detach(BdrvChild *child)
bdrv_backing_detach(child);
}
- bdrv_unapply_subtree_drain(child, bs);
-
assert_bdrv_graph_writable(bs);
QLIST_REMOVE(child, next);
if (child == bs->backing) {
@@ -2851,9 +2847,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
}
if (old_bs) {
- /* Detach first so that the recursive drain sections coming from @child
- * are already gone and we only end the drain sections that came from
- * elsewhere. */
if (child->klass->detach) {
child->klass->detach(child);
}
@@ -2868,17 +2861,14 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
/*
- * Detaching the old node may have led to the new node's
- * quiesce_counter having been decreased. Not a problem, we
- * just need to recognize this here and then invoke
- * drained_end appropriately more often.
+ * Polling in bdrv_parent_drained_begin_single() may have led to the new
+ * node's quiesce_counter having been decreased. Not a problem, we just
+ * need to recognize this here and then invoke drained_end appropriately
+ * more often.
*/
assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
- /* Attach only after starting new drained sections, so that recursive
- * drain sections coming from @child don't get an extra .drained_begin
- * callback. */
if (child->klass->attach) {
child->klass->attach(child);
}
diff --git a/block/io.c b/block/io.c
index a25103be6f..75224480d0 100644
--- a/block/io.c
+++ b/block/io.c
@@ -236,17 +236,15 @@ typedef struct {
BlockDriverState *bs;
bool done;
bool begin;
- bool recursive;
bool poll;
BdrvChild *parent;
bool ignore_bds_parents;
} BdrvCoDrainData;
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
- BdrvChild *ignore_parent, bool ignore_bds_parents)
+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
+ bool ignore_bds_parents)
{
- BdrvChild *child, *next;
IO_OR_GS_CODE();
if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
@@ -257,29 +255,19 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
return true;
}
- if (recursive) {
- assert(!ignore_bds_parents);
- QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
- if (bdrv_drain_poll(child->bs, recursive, child, false)) {
- return true;
- }
- }
- }
-
return false;
}
-static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
+static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
BdrvChild *ignore_parent)
{
- return bdrv_drain_poll(bs, recursive, ignore_parent, false);
+ return bdrv_drain_poll(bs, ignore_parent, false);
}
-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
- BdrvChild *parent, bool ignore_bds_parents,
- bool poll);
-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
- BdrvChild *parent, bool ignore_bds_parents);
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
+ bool ignore_bds_parents, bool poll);
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
+ bool ignore_bds_parents);
static void bdrv_co_drain_bh_cb(void *opaque)
{
@@ -292,12 +280,11 @@ static void bdrv_co_drain_bh_cb(void *opaque)
aio_context_acquire(ctx);
bdrv_dec_in_flight(bs);
if (data->begin) {
- bdrv_do_drained_begin(bs, data->recursive, data->parent,
- data->ignore_bds_parents, data->poll);
+ bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents,
+ data->poll);
} else {
assert(!data->poll);
- bdrv_do_drained_end(bs, data->recursive, data->parent,
- data->ignore_bds_parents);
+ bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents);
}
aio_context_release(ctx);
} else {
@@ -310,7 +297,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
}
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
- bool begin, bool recursive,
+ bool begin,
BdrvChild *parent,
bool ignore_bds_parents,
bool poll)
@@ -329,7 +316,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
.bs = bs,
.done = false,
.begin = begin,
- .recursive = recursive,
.parent = parent,
.ignore_bds_parents = ignore_bds_parents,
.poll = poll,
@@ -380,29 +366,16 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
}
}
-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
- BdrvChild *parent, bool ignore_bds_parents,
- bool poll)
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
+ bool ignore_bds_parents, bool poll)
{
- BdrvChild *child, *next;
-
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
- poll);
+ bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll);
return;
}
bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
- if (recursive) {
- assert(!ignore_bds_parents);
- bs->recursive_quiesce_counter++;
- QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
- bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
- false);
- }
- }
-
/*
* Wait for drained requests to finish.
*
@@ -414,35 +387,27 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
*/
if (poll) {
assert(!ignore_bds_parents);
- BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
+ BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
}
}
void bdrv_drained_begin(BlockDriverState *bs)
{
IO_OR_GS_CODE();
- bdrv_do_drained_begin(bs, false, NULL, false, true);
-}
-
-void bdrv_subtree_drained_begin(BlockDriverState *bs)
-{
- IO_OR_GS_CODE();
- bdrv_do_drained_begin(bs, true, NULL, false, true);
+ bdrv_do_drained_begin(bs, NULL, false, true);
}
/**
* This function does not poll, nor must any of its recursively called
* functions.
*/
-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
- BdrvChild *parent, bool ignore_bds_parents)
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
+ bool ignore_bds_parents)
{
- BdrvChild *child;
int old_quiesce_counter;
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
- false);
+ bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false);
return;
}
assert(bs->quiesce_counter > 0);
@@ -457,46 +422,12 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
if (old_quiesce_counter == 1) {
aio_enable_external(bdrv_get_aio_context(bs));
}
-
- if (recursive) {
- assert(!ignore_bds_parents);
- bs->recursive_quiesce_counter--;
- QLIST_FOREACH(child, &bs->children, next) {
- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents);
- }
- }
}
void bdrv_drained_end(BlockDriverState *bs)
{
IO_OR_GS_CODE();
- bdrv_do_drained_end(bs, false, NULL, false);
-}
-
-void bdrv_subtree_drained_end(BlockDriverState *bs)
-{
- IO_OR_GS_CODE();
- bdrv_do_drained_end(bs, true, NULL, false);
-}
-
-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
-{
- int i;
- IO_OR_GS_CODE();
-
- for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
- bdrv_do_drained_begin(child->bs, true, child, false, true);
- }
-}
-
-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
-{
- int i;
- IO_OR_GS_CODE();
-
- for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
- bdrv_do_drained_end(child->bs, true, child, false);
- }
+ bdrv_do_drained_end(bs, NULL, false);
}
void bdrv_drain(BlockDriverState *bs)
@@ -529,7 +460,7 @@ static bool bdrv_drain_all_poll(void)
while ((bs = bdrv_next_all_states(bs))) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- result |= bdrv_drain_poll(bs, false, NULL, true);
+ result |= bdrv_drain_poll(bs, NULL, true);
aio_context_release(aio_context);
}
@@ -554,7 +485,7 @@ void bdrv_drain_all_begin(void)
GLOBAL_STATE_CODE();
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true);
+ bdrv_co_yield_to_drain(NULL, true, NULL, true, true);
return;
}
@@ -579,7 +510,7 @@ void bdrv_drain_all_begin(void)
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- bdrv_do_drained_begin(bs, false, NULL, true, false);
+ bdrv_do_drained_begin(bs, NULL, true, false);
aio_context_release(aio_context);
}
@@ -599,7 +530,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
g_assert(!bs->refcnt);
while (bs->quiesce_counter) {
- bdrv_do_drained_end(bs, false, NULL, true);
+ bdrv_do_drained_end(bs, NULL, true);
}
}
@@ -621,7 +552,7 @@ void bdrv_drain_all_end(void)
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- bdrv_do_drained_end(bs, false, NULL, true);
+ bdrv_do_drained_end(bs, NULL, true);
aio_context_release(aio_context);
}
diff --git a/include/block/block-io.h b/include/block/block-io.h
index 054e964c9b..9c36a16a1f 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -302,8 +302,7 @@ void bdrv_parent_drained_end_single(BdrvChild *c);
/**
* bdrv_drain_poll:
*
- * Poll for pending requests in @bs, its parents (except for @ignore_parent),
- * and if @recursive is true its children as well (used for subtree drain).
+ * Poll for pending requests in @bs and its parents (except for @ignore_parent).
*
* If @ignore_bds_parents is true, parents that are BlockDriverStates must
* ignore the drain request because they will be drained separately (used for
@@ -311,8 +310,8 @@ void bdrv_parent_drained_end_single(BdrvChild *c);
*
* This is part of bdrv_drained_begin.
*/
-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
- BdrvChild *ignore_parent, bool ignore_bds_parents);
+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
+ bool ignore_bds_parents);
/**
* bdrv_drained_begin:
@@ -333,12 +332,6 @@ void bdrv_drained_begin(BlockDriverState *bs);
void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
BdrvChild *parent, bool ignore_bds_parents);
-/**
- * Like bdrv_drained_begin, but recursively begins a quiesced section for
- * exclusive access to all child nodes as well.
- */
-void bdrv_subtree_drained_begin(BlockDriverState *bs);
-
/**
* bdrv_drained_end:
*
@@ -346,9 +339,4 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs);
*/
void bdrv_drained_end(BlockDriverState *bs);
-/**
- * End a quiescent section started by bdrv_subtree_drained_begin().
- */
-void bdrv_subtree_drained_end(BlockDriverState *bs);
-
#endif /* BLOCK_IO_H */
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 2b97576f6d..791dddfd7d 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -1184,7 +1184,6 @@ struct BlockDriverState {
/* Accessed with atomic ops. */
int quiesce_counter;
- int recursive_quiesce_counter;
unsigned int write_gen; /* Current data generation */
diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
index 4b0b3e17ef..8bc061ebb8 100644
--- a/include/block/block_int-io.h
+++ b/include/block/block_int-io.h
@@ -179,16 +179,4 @@ void bdrv_bsc_invalidate_range(BlockDriverState *bs,
*/
void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes);
-
-/*
- * "I/O or GS" API functions. These functions can run without
- * the BQL, but only in one specific iothread/main loop.
- *
- * See include/block/block-io.h for more information about
- * the "I/O or GS" API.
- */
-
-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
-
#endif /* BLOCK_INT_IO_H */
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 695519ee02..dda08de8db 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -156,7 +156,6 @@ static void call_in_coroutine(void (*entry)(void))
enum drain_type {
BDRV_DRAIN_ALL,
BDRV_DRAIN,
- BDRV_SUBTREE_DRAIN,
DRAIN_TYPE_MAX,
};
@@ -165,7 +164,6 @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
switch (drain_type) {
case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break;
case BDRV_DRAIN: bdrv_drained_begin(bs); break;
- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break;
default: g_assert_not_reached();
}
}
@@ -175,7 +173,6 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
switch (drain_type) {
case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break;
case BDRV_DRAIN: bdrv_drained_end(bs); break;
- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break;
default: g_assert_not_reached();
}
}
@@ -271,11 +268,6 @@ static void test_drv_cb_drain(void)
test_drv_cb_common(BDRV_DRAIN, false);
}
-static void test_drv_cb_drain_subtree(void)
-{
- test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
-}
-
static void test_drv_cb_co_drain_all(void)
{
call_in_coroutine(test_drv_cb_drain_all);
@@ -286,11 +278,6 @@ static void test_drv_cb_co_drain(void)
call_in_coroutine(test_drv_cb_drain);
}
-static void test_drv_cb_co_drain_subtree(void)
-{
- call_in_coroutine(test_drv_cb_drain_subtree);
-}
-
static void test_quiesce_common(enum drain_type drain_type, bool recursive)
{
BlockBackend *blk;
@@ -332,11 +319,6 @@ static void test_quiesce_drain(void)
test_quiesce_common(BDRV_DRAIN, false);
}
-static void test_quiesce_drain_subtree(void)
-{
- test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
-}
-
static void test_quiesce_co_drain_all(void)
{
call_in_coroutine(test_quiesce_drain_all);
@@ -347,11 +329,6 @@ static void test_quiesce_co_drain(void)
call_in_coroutine(test_quiesce_drain);
}
-static void test_quiesce_co_drain_subtree(void)
-{
- call_in_coroutine(test_quiesce_drain_subtree);
-}
-
static void test_nested(void)
{
BlockBackend *blk;
@@ -402,158 +379,6 @@ static void test_nested(void)
blk_unref(blk);
}
-static void test_multiparent(void)
-{
- BlockBackend *blk_a, *blk_b;
- BlockDriverState *bs_a, *bs_b, *backing;
- BDRVTestState *a_s, *b_s, *backing_s;
-
- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
- &error_abort);
- a_s = bs_a->opaque;
- blk_insert_bs(blk_a, bs_a, &error_abort);
-
- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
- &error_abort);
- b_s = bs_b->opaque;
- blk_insert_bs(blk_b, bs_b, &error_abort);
-
- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
- backing_s = backing->opaque;
- bdrv_set_backing_hd(bs_a, backing, &error_abort);
- bdrv_set_backing_hd(bs_b, backing, &error_abort);
-
- g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
- g_assert_cmpint(backing->quiesce_counter, ==, 0);
- g_assert_cmpint(a_s->drain_count, ==, 0);
- g_assert_cmpint(b_s->drain_count, ==, 0);
- g_assert_cmpint(backing_s->drain_count, ==, 0);
-
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
-
- g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
- g_assert_cmpint(backing->quiesce_counter, ==, 1);
- g_assert_cmpint(a_s->drain_count, ==, 1);
- g_assert_cmpint(b_s->drain_count, ==, 1);
- g_assert_cmpint(backing_s->drain_count, ==, 1);
-
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
-
- g_assert_cmpint(bs_a->quiesce_counter, ==, 2);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
- g_assert_cmpint(backing->quiesce_counter, ==, 2);
- g_assert_cmpint(a_s->drain_count, ==, 2);
- g_assert_cmpint(b_s->drain_count, ==, 2);
- g_assert_cmpint(backing_s->drain_count, ==, 2);
-
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
-
- g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
- g_assert_cmpint(backing->quiesce_counter, ==, 1);
- g_assert_cmpint(a_s->drain_count, ==, 1);
- g_assert_cmpint(b_s->drain_count, ==, 1);
- g_assert_cmpint(backing_s->drain_count, ==, 1);
-
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
-
- g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
- g_assert_cmpint(backing->quiesce_counter, ==, 0);
- g_assert_cmpint(a_s->drain_count, ==, 0);
- g_assert_cmpint(b_s->drain_count, ==, 0);
- g_assert_cmpint(backing_s->drain_count, ==, 0);
-
- bdrv_unref(backing);
- bdrv_unref(bs_a);
- bdrv_unref(bs_b);
- blk_unref(blk_a);
- blk_unref(blk_b);
-}
-
-static void test_graph_change_drain_subtree(void)
-{
- BlockBackend *blk_a, *blk_b;
- BlockDriverState *bs_a, *bs_b, *backing;
- BDRVTestState *a_s, *b_s, *backing_s;
-
- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
- &error_abort);
- a_s = bs_a->opaque;
- blk_insert_bs(blk_a, bs_a, &error_abort);
-
- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
- &error_abort);
- b_s = bs_b->opaque;
- blk_insert_bs(blk_b, bs_b, &error_abort);
-
- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
- backing_s = backing->opaque;
- bdrv_set_backing_hd(bs_a, backing, &error_abort);
-
- g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
- g_assert_cmpint(backing->quiesce_counter, ==, 0);
- g_assert_cmpint(a_s->drain_count, ==, 0);
- g_assert_cmpint(b_s->drain_count, ==, 0);
- g_assert_cmpint(backing_s->drain_count, ==, 0);
-
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
-
- bdrv_set_backing_hd(bs_b, backing, &error_abort);
- g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
- g_assert_cmpint(backing->quiesce_counter, ==, 5);
- g_assert_cmpint(a_s->drain_count, ==, 5);
- g_assert_cmpint(b_s->drain_count, ==, 5);
- g_assert_cmpint(backing_s->drain_count, ==, 5);
-
- bdrv_set_backing_hd(bs_b, NULL, &error_abort);
- g_assert_cmpint(bs_a->quiesce_counter, ==, 3);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
- g_assert_cmpint(backing->quiesce_counter, ==, 3);
- g_assert_cmpint(a_s->drain_count, ==, 3);
- g_assert_cmpint(b_s->drain_count, ==, 2);
- g_assert_cmpint(backing_s->drain_count, ==, 3);
-
- bdrv_set_backing_hd(bs_b, backing, &error_abort);
- g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
- g_assert_cmpint(backing->quiesce_counter, ==, 5);
- g_assert_cmpint(a_s->drain_count, ==, 5);
- g_assert_cmpint(b_s->drain_count, ==, 5);
- g_assert_cmpint(backing_s->drain_count, ==, 5);
-
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
-
- g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
- g_assert_cmpint(backing->quiesce_counter, ==, 0);
- g_assert_cmpint(a_s->drain_count, ==, 0);
- g_assert_cmpint(b_s->drain_count, ==, 0);
- g_assert_cmpint(backing_s->drain_count, ==, 0);
-
- bdrv_unref(backing);
- bdrv_unref(bs_a);
- bdrv_unref(bs_b);
- blk_unref(blk_a);
- blk_unref(blk_b);
-}
-
static void test_graph_change_drain_all(void)
{
BlockBackend *blk_a, *blk_b;
@@ -773,12 +598,6 @@ static void test_iothread_drain(void)
test_iothread_common(BDRV_DRAIN, 1);
}
-static void test_iothread_drain_subtree(void)
-{
- test_iothread_common(BDRV_SUBTREE_DRAIN, 0);
- test_iothread_common(BDRV_SUBTREE_DRAIN, 1);
-}
-
typedef struct TestBlockJob {
BlockJob common;
@@ -863,7 +682,6 @@ enum test_job_result {
enum test_job_drain_node {
TEST_JOB_DRAIN_SRC,
TEST_JOB_DRAIN_SRC_CHILD,
- TEST_JOB_DRAIN_SRC_PARENT,
};
static void test_blockjob_common_drain_node(enum drain_type drain_type,
@@ -901,9 +719,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
case TEST_JOB_DRAIN_SRC_CHILD:
drain_bs = src_backing;
break;
- case TEST_JOB_DRAIN_SRC_PARENT:
- drain_bs = src_overlay;
- break;
default:
g_assert_not_reached();
}
@@ -1055,10 +870,6 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
TEST_JOB_DRAIN_SRC);
test_blockjob_common_drain_node(drain_type, use_iothread, result,
TEST_JOB_DRAIN_SRC_CHILD);
- if (drain_type == BDRV_SUBTREE_DRAIN) {
- test_blockjob_common_drain_node(drain_type, use_iothread, result,
- TEST_JOB_DRAIN_SRC_PARENT);
- }
}
static void test_blockjob_drain_all(void)
@@ -1071,11 +882,6 @@ static void test_blockjob_drain(void)
test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS);
}
-static void test_blockjob_drain_subtree(void)
-{
- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS);
-}
-
static void test_blockjob_error_drain_all(void)
{
test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN);
@@ -1088,12 +894,6 @@ static void test_blockjob_error_drain(void)
test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE);
}
-static void test_blockjob_error_drain_subtree(void)
-{
- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN);
- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE);
-}
-
static void test_blockjob_iothread_drain_all(void)
{
test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS);
@@ -1104,11 +904,6 @@ static void test_blockjob_iothread_drain(void)
test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS);
}
-static void test_blockjob_iothread_drain_subtree(void)
-{
- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS);
-}
-
static void test_blockjob_iothread_error_drain_all(void)
{
test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN);
@@ -1121,12 +916,6 @@ static void test_blockjob_iothread_error_drain(void)
test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE);
}
-static void test_blockjob_iothread_error_drain_subtree(void)
-{
- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN);
- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE);
-}
-
typedef struct BDRVTestTopState {
BdrvChild *wait_child;
@@ -1273,14 +1062,6 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete,
bdrv_drain(child_bs);
bdrv_unref(child_bs);
break;
- case BDRV_SUBTREE_DRAIN:
- /* Would have to ref/unref bs here for !detach_instead_of_delete, but
- * then the whole test becomes pointless because the graph changes
- * don't occur during the drain any more. */
- assert(detach_instead_of_delete);
- bdrv_subtree_drained_begin(bs);
- bdrv_subtree_drained_end(bs);
- break;
case BDRV_DRAIN_ALL:
bdrv_drain_all_begin();
bdrv_drain_all_end();
@@ -1315,11 +1096,6 @@ static void test_detach_by_drain(void)
do_test_delete_by_drain(true, BDRV_DRAIN);
}
-static void test_detach_by_drain_subtree(void)
-{
- do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN);
-}
-
struct detach_by_parent_data {
BlockDriverState *parent_b;
@@ -1452,7 +1228,10 @@ static void test_detach_indirect(bool by_parent_cb)
g_assert(acb != NULL);
/* Drain and check the expected result */
- bdrv_subtree_drained_begin(parent_b);
+ bdrv_drained_begin(parent_b);
+ bdrv_drained_begin(a);
+ bdrv_drained_begin(b);
+ bdrv_drained_begin(c);
g_assert(detach_by_parent_data.child_c != NULL);
@@ -1467,12 +1246,15 @@ static void test_detach_indirect(bool by_parent_cb)
g_assert(QLIST_NEXT(child_a, next) == NULL);
g_assert_cmpint(parent_a->quiesce_counter, ==, 1);
- g_assert_cmpint(parent_b->quiesce_counter, ==, 1);
+ g_assert_cmpint(parent_b->quiesce_counter, ==, 3);
g_assert_cmpint(a->quiesce_counter, ==, 1);
- g_assert_cmpint(b->quiesce_counter, ==, 0);
+ g_assert_cmpint(b->quiesce_counter, ==, 1);
g_assert_cmpint(c->quiesce_counter, ==, 1);
- bdrv_subtree_drained_end(parent_b);
+ bdrv_drained_end(parent_b);
+ bdrv_drained_end(a);
+ bdrv_drained_end(b);
+ bdrv_drained_end(c);
bdrv_unref(parent_b);
blk_unref(blk);
@@ -2202,70 +1984,47 @@ int main(int argc, char **argv)
g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
- g_test_add_func("/bdrv-drain/driver-cb/drain_subtree",
- test_drv_cb_drain_subtree);
g_test_add_func("/bdrv-drain/driver-cb/co/drain_all",
test_drv_cb_co_drain_all);
g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain);
- g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree",
- test_drv_cb_co_drain_subtree);
-
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
- g_test_add_func("/bdrv-drain/quiesce/drain_subtree",
- test_quiesce_drain_subtree);
g_test_add_func("/bdrv-drain/quiesce/co/drain_all",
test_quiesce_co_drain_all);
g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain);
- g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree",
- test_quiesce_co_drain_subtree);
g_test_add_func("/bdrv-drain/nested", test_nested);
- g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
- g_test_add_func("/bdrv-drain/graph-change/drain_subtree",
- test_graph_change_drain_subtree);
g_test_add_func("/bdrv-drain/graph-change/drain_all",
test_graph_change_drain_all);
g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all);
g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain);
- g_test_add_func("/bdrv-drain/iothread/drain_subtree",
- test_iothread_drain_subtree);
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
- g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
- test_blockjob_drain_subtree);
g_test_add_func("/bdrv-drain/blockjob/error/drain_all",
test_blockjob_error_drain_all);
g_test_add_func("/bdrv-drain/blockjob/error/drain",
test_blockjob_error_drain);
- g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree",
- test_blockjob_error_drain_subtree);
g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all",
test_blockjob_iothread_drain_all);
g_test_add_func("/bdrv-drain/blockjob/iothread/drain",
test_blockjob_iothread_drain);
- g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree",
- test_blockjob_iothread_drain_subtree);
g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all",
test_blockjob_iothread_error_drain_all);
g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain",
test_blockjob_iothread_error_drain);
- g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree",
- test_blockjob_iothread_error_drain_subtree);
g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain);
g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all);
g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain);
- g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree);
g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb);
g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb);
--
2.31.1

@ -1,302 +0,0 @@
From 0e894c93cae97bb792dc483be8e295d097ebd7a1 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:40:58 +0100
Subject: [PATCH 16/31] block: Revert .bdrv_drained_begin/end to
non-coroutine_fn
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [4/16] 86d6049e40a99604e414c2572b67f74b85868832 (sgarzarella/qemu-kvm-c-9-s)
Polling during bdrv_drained_end() can be problematic (and in the future,
we may get cases for bdrv_drained_begin() where polling is forbidden,
and we don't care about already in-flight requests, but just want to
prevent new requests from arriving).
The .bdrv_drained_begin/end callbacks running in a coroutine is the only
reason why we have to do this polling, so make them non-coroutine
callbacks again. None of the callers actually yield any more.
This means that bdrv_drained_end() effectively doesn't poll any more,
even if AIO_WAIT_WHILE() loops are still there (their condition is false
from the beginning). This is generally not a problem, but in
test-bdrv-drain, some additional explicit aio_poll() calls need to be
added because the test case wants to verify the final state after BHs
have executed.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20221118174110.55183-4-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 5e8ac21717373cbe96ef7a91e216bf5788815d63)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 4 +--
block/io.c | 49 +++++---------------------------
block/qed.c | 6 ++--
block/throttle.c | 8 +++---
include/block/block_int-common.h | 10 ++++---
tests/unit/test-bdrv-drain.c | 18 ++++++------
6 files changed, 32 insertions(+), 63 deletions(-)
diff --git a/block.c b/block.c
index ec184150a2..16a62a329c 100644
--- a/block.c
+++ b/block.c
@@ -1713,8 +1713,8 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
assert(is_power_of_2(bs->bl.request_alignment));
for (i = 0; i < bs->quiesce_counter; i++) {
- if (drv->bdrv_co_drain_begin) {
- drv->bdrv_co_drain_begin(bs);
+ if (drv->bdrv_drain_begin) {
+ drv->bdrv_drain_begin(bs);
}
}
diff --git a/block/io.c b/block/io.c
index b9424024f9..c2ed4b2af9 100644
--- a/block/io.c
+++ b/block/io.c
@@ -252,55 +252,20 @@ typedef struct {
int *drained_end_counter;
} BdrvCoDrainData;
-static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
-{
- BdrvCoDrainData *data = opaque;
- BlockDriverState *bs = data->bs;
-
- if (data->begin) {
- bs->drv->bdrv_co_drain_begin(bs);
- } else {
- bs->drv->bdrv_co_drain_end(bs);
- }
-
- /* Set data->done and decrement drained_end_counter before bdrv_wakeup() */
- qatomic_mb_set(&data->done, true);
- if (!data->begin) {
- qatomic_dec(data->drained_end_counter);
- }
- bdrv_dec_in_flight(bs);
-
- g_free(data);
-}
-
-/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
+/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */
static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
int *drained_end_counter)
{
- BdrvCoDrainData *data;
-
- if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
- (!begin && !bs->drv->bdrv_co_drain_end)) {
+ if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) ||
+ (!begin && !bs->drv->bdrv_drain_end)) {
return;
}
- data = g_new(BdrvCoDrainData, 1);
- *data = (BdrvCoDrainData) {
- .bs = bs,
- .done = false,
- .begin = begin,
- .drained_end_counter = drained_end_counter,
- };
-
- if (!begin) {
- qatomic_inc(drained_end_counter);
+ if (begin) {
+ bs->drv->bdrv_drain_begin(bs);
+ } else {
+ bs->drv->bdrv_drain_end(bs);
}
-
- /* Make sure the driver callback completes during the polling phase for
- * drain_begin. */
- bdrv_inc_in_flight(bs);
- data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
- aio_co_schedule(bdrv_get_aio_context(bs), data->co);
}
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
diff --git a/block/qed.c b/block/qed.c
index 013f826c44..c2691a85b1 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -262,7 +262,7 @@ static bool coroutine_fn qed_plug_allocating_write_reqs(BDRVQEDState *s)
assert(!s->allocating_write_reqs_plugged);
if (s->allocating_acb != NULL) {
/* Another allocating write came concurrently. This cannot happen
- * from bdrv_qed_co_drain_begin, but it can happen when the timer runs.
+ * from bdrv_qed_drain_begin, but it can happen when the timer runs.
*/
qemu_co_mutex_unlock(&s->table_lock);
return false;
@@ -365,7 +365,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
}
}
-static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs)
+static void bdrv_qed_drain_begin(BlockDriverState *bs)
{
BDRVQEDState *s = bs->opaque;
@@ -1661,7 +1661,7 @@ static BlockDriver bdrv_qed = {
.bdrv_co_check = bdrv_qed_co_check,
.bdrv_detach_aio_context = bdrv_qed_detach_aio_context,
.bdrv_attach_aio_context = bdrv_qed_attach_aio_context,
- .bdrv_co_drain_begin = bdrv_qed_co_drain_begin,
+ .bdrv_drain_begin = bdrv_qed_drain_begin,
};
static void bdrv_qed_init(void)
diff --git a/block/throttle.c b/block/throttle.c
index 131eba3ab4..88851c84f4 100644
--- a/block/throttle.c
+++ b/block/throttle.c
@@ -214,7 +214,7 @@ static void throttle_reopen_abort(BDRVReopenState *reopen_state)
reopen_state->opaque = NULL;
}
-static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs)
+static void throttle_drain_begin(BlockDriverState *bs)
{
ThrottleGroupMember *tgm = bs->opaque;
if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) {
@@ -222,7 +222,7 @@ static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs)
}
}
-static void coroutine_fn throttle_co_drain_end(BlockDriverState *bs)
+static void throttle_drain_end(BlockDriverState *bs)
{
ThrottleGroupMember *tgm = bs->opaque;
assert(tgm->io_limits_disabled);
@@ -261,8 +261,8 @@ static BlockDriver bdrv_throttle = {
.bdrv_reopen_commit = throttle_reopen_commit,
.bdrv_reopen_abort = throttle_reopen_abort,
- .bdrv_co_drain_begin = throttle_co_drain_begin,
- .bdrv_co_drain_end = throttle_co_drain_end,
+ .bdrv_drain_begin = throttle_drain_begin,
+ .bdrv_drain_end = throttle_drain_end,
.is_filter = true,
.strong_runtime_opts = throttle_strong_runtime_opts,
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 31ae91e56e..40d646d1ed 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -735,17 +735,19 @@ struct BlockDriver {
void (*bdrv_io_unplug)(BlockDriverState *bs);
/**
- * bdrv_co_drain_begin is called if implemented in the beginning of a
+ * bdrv_drain_begin is called if implemented in the beginning of a
* drain operation to drain and stop any internal sources of requests in
* the driver.
- * bdrv_co_drain_end is called if implemented at the end of the drain.
+ * bdrv_drain_end is called if implemented at the end of the drain.
*
* They should be used by the driver to e.g. manage scheduled I/O
* requests, or toggle an internal state. After the end of the drain new
* requests will continue normally.
+ *
+ * Implementations of both functions must not call aio_poll().
*/
- void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs);
- void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
+ void (*bdrv_drain_begin)(BlockDriverState *bs);
+ void (*bdrv_drain_end)(BlockDriverState *bs);
bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)(
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 24f34e24ad..695519ee02 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -46,7 +46,7 @@ static void coroutine_fn sleep_in_drain_begin(void *opaque)
bdrv_dec_in_flight(bs);
}
-static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
+static void bdrv_test_drain_begin(BlockDriverState *bs)
{
BDRVTestState *s = bs->opaque;
s->drain_count++;
@@ -57,7 +57,7 @@ static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
}
}
-static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs)
+static void bdrv_test_drain_end(BlockDriverState *bs)
{
BDRVTestState *s = bs->opaque;
s->drain_count--;
@@ -111,8 +111,8 @@ static BlockDriver bdrv_test = {
.bdrv_close = bdrv_test_close,
.bdrv_co_preadv = bdrv_test_co_preadv,
- .bdrv_co_drain_begin = bdrv_test_co_drain_begin,
- .bdrv_co_drain_end = bdrv_test_co_drain_end,
+ .bdrv_drain_begin = bdrv_test_drain_begin,
+ .bdrv_drain_end = bdrv_test_drain_end,
.bdrv_child_perm = bdrv_default_perms,
@@ -1703,6 +1703,7 @@ static void test_blockjob_commit_by_drained_end(void)
bdrv_drained_begin(bs_child);
g_assert(!job_has_completed);
bdrv_drained_end(bs_child);
+ aio_poll(qemu_get_aio_context(), false);
g_assert(job_has_completed);
bdrv_unref(bs_parents[0]);
@@ -1858,6 +1859,7 @@ static void test_drop_intermediate_poll(void)
g_assert(!job_has_completed);
ret = bdrv_drop_intermediate(chain[1], chain[0], NULL);
+ aio_poll(qemu_get_aio_context(), false);
g_assert(ret == 0);
g_assert(job_has_completed);
@@ -1946,7 +1948,7 @@ static void coroutine_fn bdrv_replace_test_drain_co(void *opaque)
* .was_drained.
* Increment .drain_count.
*/
-static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs)
+static void bdrv_replace_test_drain_begin(BlockDriverState *bs)
{
BDRVReplaceTestState *s = bs->opaque;
@@ -1977,7 +1979,7 @@ static void coroutine_fn bdrv_replace_test_read_entry(void *opaque)
* If .drain_count reaches 0 and the node has a backing file, issue a
* read request.
*/
-static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs)
+static void bdrv_replace_test_drain_end(BlockDriverState *bs)
{
BDRVReplaceTestState *s = bs->opaque;
@@ -2002,8 +2004,8 @@ static BlockDriver bdrv_replace_test = {
.bdrv_close = bdrv_replace_test_close,
.bdrv_co_preadv = bdrv_replace_test_co_preadv,
- .bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin,
- .bdrv_co_drain_end = bdrv_replace_test_co_drain_end,
+ .bdrv_drain_begin = bdrv_replace_test_drain_begin,
+ .bdrv_drain_end = bdrv_replace_test_drain_end,
.bdrv_child_perm = bdrv_default_perms,
};
--
2.31.1

@ -1,246 +0,0 @@
From 54e290df4bc1c9e83be7357caed6a2b1ba4f21f0 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 20 Jun 2022 18:26:56 +0200
Subject: [PATCH 09/20] block: Split BlockNodeInfo off of ImageInfo
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
RH-Bugzilla: 1860292
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [4/12] fc8d69d549bb9a929db218b91697ee3ae95c1ff6 (hreitz/qemu-kvm-c-9-s)
ImageInfo sometimes contains flat information, and sometimes it does
not. Split off a BlockNodeInfo struct, which only contains information
about a single node and has no link to the backing image.
We do this so we can extend BlockNodeInfo to a BlockGraphInfo struct,
which has links to all child nodes, not just the backing node. It would
be strange to base BlockGraphInfo on ImageInfo, because then this
extended struct would have two links to the backing node (one in
BlockGraphInfo as one of all the child links, and one in ImageInfo).
Furthermore, it is quite common to ignore the backing-image field
altogether: bdrv_query_image_info() does not set it, and
bdrv_image_info_dump() does not evaluate it. That signals that we
should have different structs for describing a single node and one that
has a link to the backing image.
Still, bdrv_query_image_info() and bdrv_image_info_dump() are not
changed too much in this patch. Follow-up patches will handle them.
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220620162704.80987-5-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit a2085f8909377b6df738f6c3f7ee6db4d16da8f7)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/qapi.c | 86 ++++++++++++++++++++++++++++++++------------
include/block/qapi.h | 3 ++
qapi/block-core.json | 24 +++++++++----
3 files changed, 85 insertions(+), 28 deletions(-)
diff --git a/block/qapi.c b/block/qapi.c
index 51202b470a..e5022b4481 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -241,30 +241,18 @@ int bdrv_query_snapshot_info_list(BlockDriverState *bs,
}
/**
- * bdrv_query_image_info:
- * @bs: block device to examine
- * @p_info: location to store image information
- * @errp: location to store error information
- *
- * Store "flat" image information in @p_info.
- *
- * "Flat" means it does *not* query backing image information,
- * i.e. (*pinfo)->has_backing_image will be set to false and
- * (*pinfo)->backing_image to NULL even when the image does in fact have
- * a backing image.
- *
- * @p_info will be set only on success. On error, store error in @errp.
+ * Helper function for other query info functions. Store information about @bs
+ * in @info, setting @errp on error.
*/
-void bdrv_query_image_info(BlockDriverState *bs,
- ImageInfo **p_info,
- Error **errp)
+static void bdrv_do_query_node_info(BlockDriverState *bs,
+ BlockNodeInfo *info,
+ Error **errp)
{
int64_t size;
const char *backing_filename;
BlockDriverInfo bdi;
int ret;
Error *err = NULL;
- ImageInfo *info;
aio_context_acquire(bdrv_get_aio_context(bs));
@@ -277,7 +265,6 @@ void bdrv_query_image_info(BlockDriverState *bs,
bdrv_refresh_filename(bs);
- info = g_new0(ImageInfo, 1);
info->filename = g_strdup(bs->filename);
info->format = g_strdup(bdrv_get_format_name(bs));
info->virtual_size = size;
@@ -298,7 +285,6 @@ void bdrv_query_image_info(BlockDriverState *bs,
info->format_specific = bdrv_get_specific_info(bs, &err);
if (err) {
error_propagate(errp, err);
- qapi_free_ImageInfo(info);
goto out;
}
info->has_format_specific = info->format_specific != NULL;
@@ -339,16 +325,72 @@ void bdrv_query_image_info(BlockDriverState *bs,
break;
default:
error_propagate(errp, err);
- qapi_free_ImageInfo(info);
goto out;
}
- *p_info = info;
-
out:
aio_context_release(bdrv_get_aio_context(bs));
}
+/**
+ * bdrv_query_block_node_info:
+ * @bs: block node to examine
+ * @p_info: location to store node information
+ * @errp: location to store error information
+ *
+ * Store image information about @bs in @p_info.
+ *
+ * @p_info will be set only on success. On error, store error in @errp.
+ */
+void bdrv_query_block_node_info(BlockDriverState *bs,
+ BlockNodeInfo **p_info,
+ Error **errp)
+{
+ BlockNodeInfo *info;
+ ERRP_GUARD();
+
+ info = g_new0(BlockNodeInfo, 1);
+ bdrv_do_query_node_info(bs, info, errp);
+ if (*errp) {
+ qapi_free_BlockNodeInfo(info);
+ return;
+ }
+
+ *p_info = info;
+}
+
+/**
+ * bdrv_query_image_info:
+ * @bs: block node to examine
+ * @p_info: location to store image information
+ * @errp: location to store error information
+ *
+ * Store "flat" image information in @p_info.
+ *
+ * "Flat" means it does *not* query backing image information,
+ * i.e. (*pinfo)->has_backing_image will be set to false and
+ * (*pinfo)->backing_image to NULL even when the image does in fact have
+ * a backing image.
+ *
+ * @p_info will be set only on success. On error, store error in @errp.
+ */
+void bdrv_query_image_info(BlockDriverState *bs,
+ ImageInfo **p_info,
+ Error **errp)
+{
+ ImageInfo *info;
+ ERRP_GUARD();
+
+ info = g_new0(ImageInfo, 1);
+ bdrv_do_query_node_info(bs, qapi_ImageInfo_base(info), errp);
+ if (*errp) {
+ qapi_free_ImageInfo(info);
+ return;
+ }
+
+ *p_info = info;
+}
+
/* @p_info will be set only on success. */
static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
Error **errp)
diff --git a/include/block/qapi.h b/include/block/qapi.h
index c09859ea78..c7de4e3fa9 100644
--- a/include/block/qapi.h
+++ b/include/block/qapi.h
@@ -35,6 +35,9 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
int bdrv_query_snapshot_info_list(BlockDriverState *bs,
SnapshotInfoList **p_list,
Error **errp);
+void bdrv_query_block_node_info(BlockDriverState *bs,
+ BlockNodeInfo **p_info,
+ Error **errp);
void bdrv_query_image_info(BlockDriverState *bs,
ImageInfo **p_info,
Error **errp);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 4b9365167f..7720da0498 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -251,7 +251,7 @@
} }
##
-# @ImageInfo:
+# @BlockNodeInfo:
#
# Information about a QEMU image file
#
@@ -279,22 +279,34 @@
#
# @snapshots: list of VM snapshots
#
-# @backing-image: info of the backing image (since 1.6)
-#
# @format-specific: structure supplying additional format-specific
# information (since 1.7)
#
-# Since: 1.3
+# Since: 8.0
##
-{ 'struct': 'ImageInfo',
+{ 'struct': 'BlockNodeInfo',
'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool',
'*actual-size': 'int', 'virtual-size': 'int',
'*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool',
'*backing-filename': 'str', '*full-backing-filename': 'str',
'*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
- '*backing-image': 'ImageInfo',
'*format-specific': 'ImageInfoSpecific' } }
+##
+# @ImageInfo:
+#
+# Information about a QEMU image file, and potentially its backing image
+#
+# @backing-image: info of the backing image
+#
+# Since: 1.3
+##
+{ 'struct': 'ImageInfo',
+ 'base': 'BlockNodeInfo',
+ 'data': {
+ '*backing-image': 'ImageInfo'
+ } }
+
##
# @ImageCheck:
#
--
2.31.1

@ -0,0 +1,386 @@
From 7baea25be90e184175dd5a919ee5878cbd4970c2 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 4 May 2023 13:57:33 +0200
Subject: [PATCH 52/56] block: bdrv/blk_co_unref() for calls in coroutine
context
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize()
RH-Bugzilla: 2185688
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [1/4] 8ebf8486b082c30ca1b39a6ede35e471eaaccfa3 (kmwolf/centos-qemu-kvm)
These functions must not be called in coroutine context, because they
need write access to the graph.
Cc: qemu-stable@nongnu.org
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20230504115750.54437-4-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit b2ab5f545fa1eaaf2955dd617bee19a8b3279786)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block.c | 2 +-
block/crypto.c | 6 +++---
block/parallels.c | 6 +++---
block/qcow.c | 6 +++---
block/qcow2.c | 14 +++++++-------
block/qed.c | 6 +++---
block/vdi.c | 6 +++---
block/vhdx.c | 6 +++---
block/vmdk.c | 18 +++++++++---------
block/vpc.c | 6 +++---
include/block/block-global-state.h | 3 ++-
include/sysemu/block-backend-global-state.h | 5 ++++-
12 files changed, 44 insertions(+), 40 deletions(-)
diff --git a/block.c b/block.c
index d79a52ca74..a48112f945 100644
--- a/block.c
+++ b/block.c
@@ -680,7 +680,7 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
ret = 0;
out:
- blk_unref(blk);
+ blk_co_unref(blk);
return ret;
}
diff --git a/block/crypto.c b/block/crypto.c
index ca67289187..8fd3ad0054 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -355,7 +355,7 @@ block_crypto_co_create_generic(BlockDriverState *bs, int64_t size,
ret = 0;
cleanup:
qcrypto_block_free(crypto);
- blk_unref(blk);
+ blk_co_unref(blk);
return ret;
}
@@ -661,7 +661,7 @@ block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp)
ret = 0;
fail:
- bdrv_unref(bs);
+ bdrv_co_unref(bs);
return ret;
}
@@ -730,7 +730,7 @@ fail:
bdrv_co_delete_file_noerr(bs);
}
- bdrv_unref(bs);
+ bdrv_co_unref(bs);
qapi_free_QCryptoBlockCreateOptions(create_opts);
qobject_unref(cryptoopts);
return ret;
diff --git a/block/parallels.c b/block/parallels.c
index 013684801a..b49c35929e 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -613,8 +613,8 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts,
ret = 0;
out:
- blk_unref(blk);
- bdrv_unref(bs);
+ blk_co_unref(blk);
+ bdrv_co_unref(bs);
return ret;
exit:
@@ -691,7 +691,7 @@ parallels_co_create_opts(BlockDriver *drv, const char *filename,
done:
qobject_unref(qdict);
- bdrv_unref(bs);
+ bdrv_co_unref(bs);
qapi_free_BlockdevCreateOptions(create_options);
return ret;
}
diff --git a/block/qcow.c b/block/qcow.c
index 490e4f819e..a0c701f578 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -915,8 +915,8 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
g_free(tmp);
ret = 0;
exit:
- blk_unref(qcow_blk);
- bdrv_unref(bs);
+ blk_co_unref(qcow_blk);
+ bdrv_co_unref(bs);
qcrypto_block_free(crypto);
return ret;
}
@@ -1015,7 +1015,7 @@ qcow_co_create_opts(BlockDriver *drv, const char *filename,
fail:
g_free(backing_fmt);
qobject_unref(qdict);
- bdrv_unref(bs);
+ bdrv_co_unref(bs);
qapi_free_BlockdevCreateOptions(create_options);
return ret;
}
diff --git a/block/qcow2.c b/block/qcow2.c
index 22084730f9..0b8beb8b47 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3711,7 +3711,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
goto out;
}
- blk_unref(blk);
+ blk_co_unref(blk);
blk = NULL;
/*
@@ -3791,7 +3791,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
}
}
- blk_unref(blk);
+ blk_co_unref(blk);
blk = NULL;
/* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning.
@@ -3816,9 +3816,9 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
ret = 0;
out:
- blk_unref(blk);
- bdrv_unref(bs);
- bdrv_unref(data_bs);
+ blk_co_unref(blk);
+ bdrv_co_unref(bs);
+ bdrv_co_unref(data_bs);
return ret;
}
@@ -3949,8 +3949,8 @@ finish:
}
qobject_unref(qdict);
- bdrv_unref(bs);
- bdrv_unref(data_bs);
+ bdrv_co_unref(bs);
+ bdrv_co_unref(data_bs);
qapi_free_BlockdevCreateOptions(create_options);
return ret;
}
diff --git a/block/qed.c b/block/qed.c
index 0705a7b4e2..aff2a2076e 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -748,8 +748,8 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
ret = 0; /* success */
out:
g_free(l1_table);
- blk_unref(blk);
- bdrv_unref(bs);
+ blk_co_unref(blk);
+ bdrv_co_unref(bs);
return ret;
}
@@ -819,7 +819,7 @@ bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename,
fail:
qobject_unref(qdict);
- bdrv_unref(bs);
+ bdrv_co_unref(bs);
qapi_free_BlockdevCreateOptions(create_options);
return ret;
}
diff --git a/block/vdi.c b/block/vdi.c
index f2434d6153..08331d2dd7 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -886,8 +886,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
ret = 0;
exit:
- blk_unref(blk);
- bdrv_unref(bs_file);
+ blk_co_unref(blk);
+ bdrv_co_unref(bs_file);
g_free(bmap);
return ret;
}
@@ -975,7 +975,7 @@ vdi_co_create_opts(BlockDriver *drv, const char *filename,
done:
qobject_unref(qdict);
qapi_free_BlockdevCreateOptions(create_options);
- bdrv_unref(bs_file);
+ bdrv_co_unref(bs_file);
return ret;
}
diff --git a/block/vhdx.c b/block/vhdx.c
index 81420722a1..00777da91a 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -2053,8 +2053,8 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts,
ret = 0;
delete_and_exit:
- blk_unref(blk);
- bdrv_unref(bs);
+ blk_co_unref(blk);
+ bdrv_co_unref(bs);
g_free(creator);
return ret;
}
@@ -2144,7 +2144,7 @@ vhdx_co_create_opts(BlockDriver *drv, const char *filename,
fail:
qobject_unref(qdict);
- bdrv_unref(bs);
+ bdrv_co_unref(bs);
qapi_free_BlockdevCreateOptions(create_options);
return ret;
}
diff --git a/block/vmdk.c b/block/vmdk.c
index f5f49018fe..01ca13c82b 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -2306,7 +2306,7 @@ exit:
if (pbb) {
*pbb = blk;
} else {
- blk_unref(blk);
+ blk_co_unref(blk);
blk = NULL;
}
}
@@ -2516,12 +2516,12 @@ vmdk_co_do_create(int64_t size,
if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) {
error_setg(errp, "Invalid backing file format: %s. Must be vmdk",
blk_bs(backing)->drv->format_name);
- blk_unref(backing);
+ blk_co_unref(backing);
ret = -EINVAL;
goto exit;
}
ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid);
- blk_unref(backing);
+ blk_co_unref(backing);
if (ret) {
error_setg(errp, "Failed to read parent CID");
goto exit;
@@ -2542,14 +2542,14 @@ vmdk_co_do_create(int64_t size,
blk_bs(extent_blk)->filename);
created_size += cur_size;
extent_idx++;
- blk_unref(extent_blk);
+ blk_co_unref(extent_blk);
}
/* Check whether we got excess extents */
extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain,
opaque, NULL);
if (extent_blk) {
- blk_unref(extent_blk);
+ blk_co_unref(extent_blk);
error_setg(errp, "List of extents contains unused extents");
ret = -EINVAL;
goto exit;
@@ -2590,7 +2590,7 @@ vmdk_co_do_create(int64_t size,
ret = 0;
exit:
if (blk) {
- blk_unref(blk);
+ blk_co_unref(blk);
}
g_free(desc);
g_free(parent_desc_line);
@@ -2641,7 +2641,7 @@ vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split,
errp)) {
goto exit;
}
- bdrv_unref(bs);
+ bdrv_co_unref(bs);
exit:
g_free(ext_filename);
return blk;
@@ -2797,12 +2797,12 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx,
return NULL;
}
blk_set_allow_write_beyond_eof(blk, true);
- bdrv_unref(bs);
+ bdrv_co_unref(bs);
if (size != -1) {
ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp);
if (ret) {
- blk_unref(blk);
+ blk_co_unref(blk);
blk = NULL;
}
}
diff --git a/block/vpc.c b/block/vpc.c
index b89b0ff8e2..07ddda5b99 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -1082,8 +1082,8 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts,
}
out:
- blk_unref(blk);
- bdrv_unref(bs);
+ blk_co_unref(blk);
+ bdrv_co_unref(bs);
return ret;
}
@@ -1162,7 +1162,7 @@ vpc_co_create_opts(BlockDriver *drv, const char *filename,
fail:
qobject_unref(qdict);
- bdrv_unref(bs);
+ bdrv_co_unref(bs);
qapi_free_BlockdevCreateOptions(create_options);
return ret;
}
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
index 399200a9a3..cd4ea554bf 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
@@ -214,7 +214,8 @@ void bdrv_img_create(const char *filename, const char *fmt,
bool quiet, Error **errp);
void bdrv_ref(BlockDriverState *bs);
-void bdrv_unref(BlockDriverState *bs);
+void no_coroutine_fn bdrv_unref(BlockDriverState *bs);
+void coroutine_fn no_co_wrapper bdrv_co_unref(BlockDriverState *bs);
void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h
index 2b6d27db7c..fa83f9389c 100644
--- a/include/sysemu/block-backend-global-state.h
+++ b/include/sysemu/block-backend-global-state.h
@@ -42,7 +42,10 @@ blk_co_new_open(const char *filename, const char *reference, QDict *options,
int blk_get_refcnt(BlockBackend *blk);
void blk_ref(BlockBackend *blk);
-void blk_unref(BlockBackend *blk);
+
+void no_coroutine_fn blk_unref(BlockBackend *blk);
+void coroutine_fn no_co_wrapper blk_co_unref(BlockBackend *blk);
+
void blk_remove_all_bs(void);
BlockBackend *blk_by_name(const char *name);
BlockBackend *blk_next(BlockBackend *blk);
--
2.39.1

@ -0,0 +1,74 @@
From b1f0546548e561856252c2bc610a8f4f8fcdf007 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Wed, 26 Jul 2023 09:48:07 +0200
Subject: [PATCH 02/14] block/blkio: do not use open flags in qemu_open()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
RH-Bugzilla: 2225354 2225439
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Alberto Faria <None>
RH-Commit: [2/6] 1ccd0ef56182bb5e2374c3b5be98ee1ec05066d6 (sgarzarella/qemu-kvm-c-9-s)
qemu_open() in blkio_virtio_blk_common_open() is used to open the
character device (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or in
the future eventually the unix socket.
In all these cases we cannot open the path in read-only mode,
when the `read-only` option of blockdev is on, because the exchange
of IOCTL commands for example will fail.
In order to open the device read-only, we have to use the `read-only`
property of the libblkio driver as we already do in blkio_file_open().
Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk")
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2225439
Reported-by: Qing Wang <qinwang@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Message-id: 20230726074807.14041-1-sgarzare@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit a5942c177b7bcc1357e496b7d68668befcfc2bb9)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/blkio.c | 21 ++++++++++++---------
1 file changed, 12 insertions(+), 9 deletions(-)
diff --git a/block/blkio.c b/block/blkio.c
index 3ea9841bd8..5a82c6cb1a 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -685,15 +685,18 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs,
* layer through the "/dev/fdset/N" special path.
*/
if (fd_supported) {
- int open_flags;
-
- if (flags & BDRV_O_RDWR) {
- open_flags = O_RDWR;
- } else {
- open_flags = O_RDONLY;
- }
-
- fd = qemu_open(path, open_flags, errp);
+ /*
+ * `path` can contain the path of a character device
+ * (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or a unix socket.
+ *
+ * So, we should always open it with O_RDWR flag, also if BDRV_O_RDWR
+ * is not set in the open flags, because the exchange of IOCTL commands
+ * for example will fail.
+ *
+ * In order to open the device read-only, we are using the `read-only`
+ * property of the libblkio driver in blkio_file_open().
+ */
+ fd = qemu_open(path, O_RDWR, errp);
if (fd < 0) {
return -EINVAL;
}
--
2.39.3

@ -0,0 +1,54 @@
From ef99db21e9469f3fc946b7bf3edc1837d7b24e0b Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Tue, 25 Jul 2023 12:37:44 +0200
Subject: [PATCH 01/14] block/blkio: enable the completion eventfd
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
RH-Bugzilla: 2225354 2225439
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Alberto Faria <None>
RH-Commit: [1/6] d91b3a465942863550130105ae2f38f47a82a360 (sgarzarella/qemu-kvm-c-9-s)
Until libblkio 1.3.0, virtio-blk drivers had completion eventfd
notifications enabled from the start, but from the next releases
this is no longer the case, so we have to explicitly enable them.
In fact, the libblkio documentation says they could be disabled,
so we should always enable them at the start if we want to be
sure to get completion eventfd notifications:
By default, the driver might not generate completion events for
requests so it is necessary to explicitly enable the completion
file descriptor before use:
void blkioq_set_completion_fd_enabled(struct blkioq *q, bool enable);
I discovered this while trying a development version of libblkio:
the guest kernel hangs during boot, while probing the device.
Fixes: fd66dbd424f5 ("blkio: add libblkio block driver")
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20230725103744.77343-1-sgarzare@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit 9359c459889fce1804c4e1b2a2ff8f182b4a9ae8)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/blkio.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/block/blkio.c b/block/blkio.c
index afcec359f2..3ea9841bd8 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -844,6 +844,7 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
QLIST_INIT(&s->bounce_bufs);
s->blkioq = blkio_get_queue(s->blkio, 0);
s->completion_fd = blkioq_get_completion_fd(s->blkioq);
+ blkioq_set_completion_fd_enabled(s->blkioq, true);
blkio_attach_aio_context(bs, bdrv_get_aio_context(bs));
return 0;
--
2.39.3

@ -0,0 +1,67 @@
From c1ce3ba81698b9d52ac9dff83c01ee8141ca403d Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Thu, 27 Jul 2023 18:10:19 +0200
Subject: [PATCH 05/14] block/blkio: fall back on using `path` when `fd`
setting fails
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
RH-Bugzilla: 2225354 2225439
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Alberto Faria <None>
RH-Commit: [5/6] c03cea95146a59b2830ffe2dd56ef77a6630ce3e (sgarzarella/qemu-kvm-c-9-s)
qemu_open() fails if called with an unix domain socket in this way:
-blockdev node-name=drive0,driver=virtio-blk-vhost-user,path=vhost-user-blk.sock,cache.direct=on: Could not open 'vhost-user-blk.sock': No such device or address
Since virtio-blk-vhost-user does not support fd passing, let`s always fall back
on using `path` if we fail the fd passing.
Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk")
Reported-by: Qing Wang <qinwang@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20230727161020.84213-4-sgarzare@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit 723bea27b127969931fa26bc0de79372a3d9e148)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/blkio.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/block/blkio.c b/block/blkio.c
index 93a8f8fc5c..eef80e9ce5 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -710,19 +710,19 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
* In order to open the device read-only, we are using the `read-only`
* property of the libblkio driver in blkio_file_open().
*/
- fd = qemu_open(path, O_RDWR, errp);
+ fd = qemu_open(path, O_RDWR, NULL);
if (fd < 0) {
- return -EINVAL;
+ fd_supported = false;
+ } else {
+ ret = blkio_set_int(s->blkio, "fd", fd);
+ if (ret < 0) {
+ fd_supported = false;
+ qemu_close(fd);
+ }
}
+ }
- ret = blkio_set_int(s->blkio, "fd", fd);
- if (ret < 0) {
- error_setg_errno(errp, -ret, "failed to set fd: %s",
- blkio_get_error_msg());
- qemu_close(fd);
- return ret;
- }
- } else {
+ if (!fd_supported) {
ret = blkio_set_str(s->blkio, "path", path);
if (ret < 0) {
error_setg_errno(errp, -ret, "failed to set path: %s",
--
2.39.3

@ -0,0 +1,205 @@
From 545482400ea87d54b1b839587f8aaad41e30692f Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 4 Jul 2023 14:34:36 +0200
Subject: [PATCH 36/37] block/blkio: fix module_block.py parsing
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
RH-MergeRequest: 181: block/blkio: fix module_block.py parsing
RH-Bugzilla: 2213317
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [1/2] c85df95824f4889526a73527771dec9efcb06926 (stefanha/centos-stream-qemu-kvm)
When QEMU is built with --enable-modules, the module_block.py script
parses block/*.c to find block drivers that are built as modules. The
script generates a table of block drivers called block_driver_modules[].
This table is used for block driver module loading.
The blkio.c driver uses macros to define its BlockDriver structs. This
was done to avoid code duplication but the module_block.py script is
unable to parse the macro. The result is that libblkio-based block
drivers can be built as modules but will not be found at runtime.
One fix is to make the module_block.py script or build system fancier so
it can parse C macros (e.g. by parsing the preprocessed source code). I
chose not to do this because it raises the complexity of the build,
making future issues harder to debug.
Keep things simple: use the macro to avoid duplicating BlockDriver
function pointers but define .format_name and .protocol_name manually
for each BlockDriver. This way the module_block.py is able to parse the
code.
Also get rid of the block driver name macros (e.g. DRIVER_IO_URING)
because module_block.py cannot parse them either.
Fixes: fd66dbd424f5 ("blkio: add libblkio block driver")
Reported-by: Qing Wang <qinwang@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20230704123436.187761-1-stefanha@redhat.com
Cc: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit c21eae1ccc782440f320accb6f90c66cb8f45ee9)
Conflicts:
- Downstream lacks commit 28ff7b4dfbb5 ("block/blkio: convert to
blk_io_plug_call() API") so keep the .bdrv_co_io_unplug callback.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block/blkio.c | 118 ++++++++++++++++++++++++++------------------------
1 file changed, 61 insertions(+), 57 deletions(-)
diff --git a/block/blkio.c b/block/blkio.c
index 6a6f20f923..afcec359f2 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -21,16 +21,6 @@
#include "block/block-io.h"
-/*
- * Keep the QEMU BlockDriver names identical to the libblkio driver names.
- * Using macros instead of typing out the string literals avoids typos.
- */
-#define DRIVER_IO_URING "io_uring"
-#define DRIVER_NVME_IO_URING "nvme-io_uring"
-#define DRIVER_VIRTIO_BLK_VFIO_PCI "virtio-blk-vfio-pci"
-#define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user"
-#define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa"
-
/*
* Allocated bounce buffers are kept in a list sorted by buffer address.
*/
@@ -743,15 +733,15 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
return ret;
}
- if (strcmp(blkio_driver, DRIVER_IO_URING) == 0) {
+ if (strcmp(blkio_driver, "io_uring") == 0) {
ret = blkio_io_uring_open(bs, options, flags, errp);
- } else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) {
+ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
ret = blkio_nvme_io_uring(bs, options, flags, errp);
- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VFIO_PCI) == 0) {
+ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) {
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) {
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
} else {
g_assert_not_reached();
@@ -1027,50 +1017,64 @@ static void blkio_refresh_limits(BlockDriverState *bs, Error **errp)
* - truncate
*/
-#define BLKIO_DRIVER(name, ...) \
- { \
- .format_name = name, \
- .protocol_name = name, \
- .instance_size = sizeof(BDRVBlkioState), \
- .bdrv_file_open = blkio_file_open, \
- .bdrv_close = blkio_close, \
- .bdrv_co_getlength = blkio_co_getlength, \
- .bdrv_co_truncate = blkio_truncate, \
- .bdrv_co_get_info = blkio_co_get_info, \
- .bdrv_attach_aio_context = blkio_attach_aio_context, \
- .bdrv_detach_aio_context = blkio_detach_aio_context, \
- .bdrv_co_pdiscard = blkio_co_pdiscard, \
- .bdrv_co_preadv = blkio_co_preadv, \
- .bdrv_co_pwritev = blkio_co_pwritev, \
- .bdrv_co_flush_to_disk = blkio_co_flush, \
- .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
- .bdrv_co_io_unplug = blkio_co_io_unplug, \
- .bdrv_refresh_limits = blkio_refresh_limits, \
- .bdrv_register_buf = blkio_register_buf, \
- .bdrv_unregister_buf = blkio_unregister_buf, \
- __VA_ARGS__ \
- }
-
-static BlockDriver bdrv_io_uring = BLKIO_DRIVER(
- DRIVER_IO_URING,
- .bdrv_needs_filename = true,
-);
-
-static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER(
- DRIVER_NVME_IO_URING,
-);
-
-static BlockDriver bdrv_virtio_blk_vfio_pci = BLKIO_DRIVER(
- DRIVER_VIRTIO_BLK_VFIO_PCI
-);
+/*
+ * Do not include .format_name and .protocol_name because module_block.py
+ * does not parse macros in the source code.
+ */
+#define BLKIO_DRIVER_COMMON \
+ .instance_size = sizeof(BDRVBlkioState), \
+ .bdrv_file_open = blkio_file_open, \
+ .bdrv_close = blkio_close, \
+ .bdrv_co_getlength = blkio_co_getlength, \
+ .bdrv_co_truncate = blkio_truncate, \
+ .bdrv_co_get_info = blkio_co_get_info, \
+ .bdrv_attach_aio_context = blkio_attach_aio_context, \
+ .bdrv_detach_aio_context = blkio_detach_aio_context, \
+ .bdrv_co_pdiscard = blkio_co_pdiscard, \
+ .bdrv_co_preadv = blkio_co_preadv, \
+ .bdrv_co_pwritev = blkio_co_pwritev, \
+ .bdrv_co_flush_to_disk = blkio_co_flush, \
+ .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
+ .bdrv_co_io_unplug = blkio_co_io_unplug, \
+ .bdrv_refresh_limits = blkio_refresh_limits, \
+ .bdrv_register_buf = blkio_register_buf, \
+ .bdrv_unregister_buf = blkio_unregister_buf,
-static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER(
- DRIVER_VIRTIO_BLK_VHOST_USER
-);
+/*
+ * Use the same .format_name and .protocol_name as the libblkio driver name for
+ * consistency.
+ */
-static BlockDriver bdrv_virtio_blk_vhost_vdpa = BLKIO_DRIVER(
- DRIVER_VIRTIO_BLK_VHOST_VDPA
-);
+static BlockDriver bdrv_io_uring = {
+ .format_name = "io_uring",
+ .protocol_name = "io_uring",
+ .bdrv_needs_filename = true,
+ BLKIO_DRIVER_COMMON
+};
+
+static BlockDriver bdrv_nvme_io_uring = {
+ .format_name = "nvme-io_uring",
+ .protocol_name = "nvme-io_uring",
+ BLKIO_DRIVER_COMMON
+};
+
+static BlockDriver bdrv_virtio_blk_vfio_pci = {
+ .format_name = "virtio-blk-vfio-pci",
+ .protocol_name = "virtio-blk-vfio-pci",
+ BLKIO_DRIVER_COMMON
+};
+
+static BlockDriver bdrv_virtio_blk_vhost_user = {
+ .format_name = "virtio-blk-vhost-user",
+ .protocol_name = "virtio-blk-vhost-user",
+ BLKIO_DRIVER_COMMON
+};
+
+static BlockDriver bdrv_virtio_blk_vhost_vdpa = {
+ .format_name = "virtio-blk-vhost-vdpa",
+ .protocol_name = "virtio-blk-vhost-vdpa",
+ BLKIO_DRIVER_COMMON
+};
static void bdrv_blkio_init(void)
{
--
2.39.3

@ -0,0 +1,151 @@
From 458c33c9f19ed01beeb9b2b494ce6ed10d2ed4ac Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Thu, 27 Jul 2023 18:10:17 +0200
Subject: [PATCH 03/14] block/blkio: move blkio_connect() in the drivers
functions
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
RH-Bugzilla: 2225354 2225439
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Alberto Faria <None>
RH-Commit: [3/6] c356108d7dfe1ba2098c094f8d12b6e40853560c (sgarzarella/qemu-kvm-c-9-s)
This is in preparation for the next patch, where for virtio-blk
drivers we need to handle the failure of blkio_connect().
Let's also rename the *_open() functions to *_connect() to make
the code reflect the changes applied.
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20230727161020.84213-2-sgarzare@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit 69785d66ae1ec43f77fc65109a21721992bead9f)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/blkio.c | 67 ++++++++++++++++++++++++++++++---------------------
1 file changed, 40 insertions(+), 27 deletions(-)
diff --git a/block/blkio.c b/block/blkio.c
index 5a82c6cb1a..85d1eed5fb 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -602,8 +602,8 @@ static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size)
}
}
-static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
- Error **errp)
+static int blkio_io_uring_connect(BlockDriverState *bs, QDict *options,
+ int flags, Error **errp)
{
const char *filename = qdict_get_str(options, "filename");
BDRVBlkioState *s = bs->opaque;
@@ -626,11 +626,18 @@ static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
}
}
+ ret = blkio_connect(s->blkio);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
+ blkio_get_error_msg());
+ return ret;
+ }
+
return 0;
}
-static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags,
- Error **errp)
+static int blkio_nvme_io_uring_connect(BlockDriverState *bs, QDict *options,
+ int flags, Error **errp)
{
const char *path = qdict_get_try_str(options, "path");
BDRVBlkioState *s = bs->opaque;
@@ -654,11 +661,18 @@ static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags,
return -EINVAL;
}
+ ret = blkio_connect(s->blkio);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
+ blkio_get_error_msg());
+ return ret;
+ }
+
return 0;
}
-static int blkio_virtio_blk_common_open(BlockDriverState *bs,
- QDict *options, int flags, Error **errp)
+static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
+ int flags, Error **errp)
{
const char *path = qdict_get_try_str(options, "path");
BDRVBlkioState *s = bs->opaque;
@@ -717,6 +731,13 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs,
}
}
+ ret = blkio_connect(s->blkio);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
+ blkio_get_error_msg());
+ return ret;
+ }
+
qdict_del(options, "path");
return 0;
@@ -736,24 +757,6 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
return ret;
}
- if (strcmp(blkio_driver, "io_uring") == 0) {
- ret = blkio_io_uring_open(bs, options, flags, errp);
- } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
- ret = blkio_nvme_io_uring(bs, options, flags, errp);
- } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
- ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
- } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
- ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
- } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
- ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
- } else {
- g_assert_not_reached();
- }
- if (ret < 0) {
- blkio_destroy(&s->blkio);
- return ret;
- }
-
if (!(flags & BDRV_O_RDWR)) {
ret = blkio_set_bool(s->blkio, "read-only", true);
if (ret < 0) {
@@ -764,10 +767,20 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
}
}
- ret = blkio_connect(s->blkio);
+ if (strcmp(blkio_driver, "io_uring") == 0) {
+ ret = blkio_io_uring_connect(bs, options, flags, errp);
+ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
+ ret = blkio_nvme_io_uring_connect(bs, options, flags, errp);
+ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
+ ret = blkio_virtio_blk_connect(bs, options, flags, errp);
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
+ ret = blkio_virtio_blk_connect(bs, options, flags, errp);
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
+ ret = blkio_virtio_blk_connect(bs, options, flags, errp);
+ } else {
+ g_assert_not_reached();
+ }
if (ret < 0) {
- error_setg_errno(errp, -ret, "blkio_connect failed: %s",
- blkio_get_error_msg());
blkio_destroy(&s->blkio);
return ret;
}
--
2.39.3

@ -0,0 +1,85 @@
From ece855a71d9234c58497f37cb5498f507742167d Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Thu, 27 Jul 2023 18:10:18 +0200
Subject: [PATCH 04/14] block/blkio: retry blkio_connect() if it fails using
`fd`
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
RH-Bugzilla: 2225354 2225439
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Alberto Faria <None>
RH-Commit: [4/6] 14ebc1f333617ce22c68693dec1c9a186d4f8a08 (sgarzarella/qemu-kvm-c-9-s)
libblkio 1.3.0 added support of "fd" property for virtio-blk-vhost-vdpa
driver. In QEMU, starting from commit cad2ccc395 ("block/blkio: use
qemu_open() to support fd passing for virtio-blk") we are using
`blkio_get_int(..., "fd")` to check if the "fd" property is supported
for all the virtio-blk-* driver.
Unfortunately that property is also available for those driver that do
not support it, such as virtio-blk-vhost-user.
So, `blkio_get_int()` is not enough to check whether the driver supports
the `fd` property or not. This is because the virito-blk common libblkio
driver only checks whether or not `fd` is set during `blkio_connect()`
and fails with -EINVAL for those transports that do not support it
(all except vhost-vdpa for now).
So let's handle the `blkio_connect()` failure, retrying it using `path`
directly.
Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk")
Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20230727161020.84213-3-sgarzare@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit 809c319f8a089fbc49223dc29e1cc2b978beeada)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/blkio.c | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/block/blkio.c b/block/blkio.c
index 85d1eed5fb..93a8f8fc5c 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -732,6 +732,35 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
}
ret = blkio_connect(s->blkio);
+ /*
+ * If the libblkio driver doesn't support the `fd` property, blkio_connect()
+ * will fail with -EINVAL. So let's try calling blkio_connect() again by
+ * directly setting `path`.
+ */
+ if (fd_supported && ret == -EINVAL) {
+ qemu_close(fd);
+
+ /*
+ * We need to clear the `fd` property we set previously by setting
+ * it to -1.
+ */
+ ret = blkio_set_int(s->blkio, "fd", -1);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "failed to set fd: %s",
+ blkio_get_error_msg());
+ return ret;
+ }
+
+ ret = blkio_set_str(s->blkio, "path", path);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "failed to set path: %s",
+ blkio_get_error_msg());
+ return ret;
+ }
+
+ ret = blkio_connect(s->blkio);
+ }
+
if (ret < 0) {
error_setg_errno(errp, -ret, "blkio_connect failed: %s",
blkio_get_error_msg());
--
2.39.3

@ -0,0 +1,49 @@
From 2f4436e7cc2f63d198229dc8ba32783460c0b185 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Thu, 27 Jul 2023 18:10:20 +0200
Subject: [PATCH 06/14] block/blkio: use blkio_set_int("fd") to check fd
support
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
RH-Bugzilla: 2225354 2225439
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Alberto Faria <None>
RH-Commit: [6/6] d57aafb2c3a8ed13aa3c6dcce5525a9cc8f5aa21 (sgarzarella/qemu-kvm-c-9-s)
Setting the `fd` property fails with virtio-blk-* libblkio drivers
that do not support fd passing since
https://gitlab.com/libblkio/libblkio/-/merge_requests/208.
Getting the `fd` property, on the other hand, always succeeds for
virtio-blk-* libblkio drivers even when they don't support fd passing.
This patch switches to setting the `fd` property because it is a
better mechanism for probing fd passing support than getting the `fd`
property.
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20230727161020.84213-5-sgarzare@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit 1c38fe69e2b8a05c1762b122292fa7e3662f06fd)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/blkio.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/blkio.c b/block/blkio.c
index eef80e9ce5..8defbf744f 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -689,7 +689,7 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
return -EINVAL;
}
- if (blkio_get_int(s->blkio, "fd", &fd) == 0) {
+ if (blkio_set_int(s->blkio, "fd", -1) == 0) {
fd_supported = true;
}
--
2.39.3

@ -0,0 +1,108 @@
From fd57241cf0f8c2906fa56118f8da1e65a5b1e4d8 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Tue, 30 May 2023 09:19:40 +0200
Subject: [PATCH 3/5] block/blkio: use qemu_open() to support fd passing for
virtio-blk
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 169: block/blkio: support fd passing for virtio-blk-vhost-vdpa driver
RH-Bugzilla: 2180076
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [1/2] 9ff1a1510500db101648341207a36318a0c41c5a (sgarzarella/qemu-kvm-c-9-s)
Some virtio-blk drivers (e.g. virtio-blk-vhost-vdpa) supports the fd
passing. Let's expose this to the user, so the management layer
can pass the file descriptor of an already opened path.
If the libblkio virtio-blk driver supports fd passing, let's always
use qemu_open() to open the `path`, so we can handle fd passing
from the management layer through the "/dev/fdset/N" special path.
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20230530071941.8954-2-sgarzare@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit cad2ccc395c7113fb30bc9390774b67b34f06c68)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/blkio.c | 53 ++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 44 insertions(+), 9 deletions(-)
diff --git a/block/blkio.c b/block/blkio.c
index 0cdc99a729..6a6f20f923 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -672,25 +672,60 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs,
{
const char *path = qdict_get_try_str(options, "path");
BDRVBlkioState *s = bs->opaque;
- int ret;
+ bool fd_supported = false;
+ int fd, ret;
if (!path) {
error_setg(errp, "missing 'path' option");
return -EINVAL;
}
- ret = blkio_set_str(s->blkio, "path", path);
- qdict_del(options, "path");
- if (ret < 0) {
- error_setg_errno(errp, -ret, "failed to set path: %s",
- blkio_get_error_msg());
- return ret;
- }
-
if (!(flags & BDRV_O_NOCACHE)) {
error_setg(errp, "cache.direct=off is not supported");
return -EINVAL;
}
+
+ if (blkio_get_int(s->blkio, "fd", &fd) == 0) {
+ fd_supported = true;
+ }
+
+ /*
+ * If the libblkio driver supports fd passing, let's always use qemu_open()
+ * to open the `path`, so we can handle fd passing from the management
+ * layer through the "/dev/fdset/N" special path.
+ */
+ if (fd_supported) {
+ int open_flags;
+
+ if (flags & BDRV_O_RDWR) {
+ open_flags = O_RDWR;
+ } else {
+ open_flags = O_RDONLY;
+ }
+
+ fd = qemu_open(path, open_flags, errp);
+ if (fd < 0) {
+ return -EINVAL;
+ }
+
+ ret = blkio_set_int(s->blkio, "fd", fd);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "failed to set fd: %s",
+ blkio_get_error_msg());
+ qemu_close(fd);
+ return ret;
+ }
+ } else {
+ ret = blkio_set_str(s->blkio, "path", path);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "failed to set path: %s",
+ blkio_get_error_msg());
+ return ret;
+ }
+ }
+
+ qdict_del(options, "path");
+
return 0;
}
--
2.39.3

@ -0,0 +1,121 @@
From d9190117f3c701380701d6e9b2aa3c2446b9708f Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Mon, 1 May 2023 13:34:43 -0400
Subject: [PATCH 01/21] block: compile out assert_bdrv_graph_readable() by
default
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 166: block/graph-lock: Disable locking for now
RH-Bugzilla: 2186725
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [1/4] d8cb4bb832c85e8216d97e57679a34c7bc6a8f71 (kmwolf/centos-qemu-kvm)
reader_count() is a performance bottleneck because the global
aio_context_list_lock mutex causes thread contention. Put this debugging
assertion behind a new ./configure --enable-debug-graph-lock option and
disable it by default.
The --enable-debug-graph-lock option is also enabled by the more general
--enable-debug option.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20230501173443.153062-1-stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 58a2e3f5c37be02dac3086b81bdda9414b931edf)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/graph-lock.c | 3 +++
configure | 1 +
meson.build | 2 ++
meson_options.txt | 2 ++
scripts/meson-buildoptions.sh | 4 ++++
5 files changed, 12 insertions(+)
diff --git a/block/graph-lock.c b/block/graph-lock.c
index 454c31e691..259a7a0bde 100644
--- a/block/graph-lock.c
+++ b/block/graph-lock.c
@@ -265,7 +265,10 @@ void bdrv_graph_rdunlock_main_loop(void)
void assert_bdrv_graph_readable(void)
{
+ /* reader_count() is slow due to aio_context_list_lock lock contention */
+#ifdef CONFIG_DEBUG_GRAPH_LOCK
assert(qemu_in_main_thread() || reader_count());
+#endif
}
void assert_bdrv_graph_writable(void)
diff --git a/configure b/configure
index 800b5850f4..a62a3e6be9 100755
--- a/configure
+++ b/configure
@@ -806,6 +806,7 @@ for opt do
--enable-debug)
# Enable debugging options that aren't excessively noisy
debug_tcg="yes"
+ meson_option_parse --enable-debug-graph-lock ""
meson_option_parse --enable-debug-mutex ""
meson_option_add -Doptimization=0
fortify_source="no"
diff --git a/meson.build b/meson.build
index c44d05a13f..d964e741e7 100644
--- a/meson.build
+++ b/meson.build
@@ -1956,6 +1956,7 @@ if get_option('debug_stack_usage') and have_coroutine_pool
have_coroutine_pool = false
endif
config_host_data.set10('CONFIG_COROUTINE_POOL', have_coroutine_pool)
+config_host_data.set('CONFIG_DEBUG_GRAPH_LOCK', get_option('debug_graph_lock'))
config_host_data.set('CONFIG_DEBUG_MUTEX', get_option('debug_mutex'))
config_host_data.set('CONFIG_DEBUG_STACK_USAGE', get_option('debug_stack_usage'))
config_host_data.set('CONFIG_GPROF', get_option('gprof'))
@@ -3833,6 +3834,7 @@ summary_info += {'PIE': get_option('b_pie')}
summary_info += {'static build': config_host.has_key('CONFIG_STATIC')}
summary_info += {'malloc trim support': has_malloc_trim}
summary_info += {'membarrier': have_membarrier}
+summary_info += {'debug graph lock': get_option('debug_graph_lock')}
summary_info += {'debug stack usage': get_option('debug_stack_usage')}
summary_info += {'mutex debugging': get_option('debug_mutex')}
summary_info += {'memory allocator': get_option('malloc')}
diff --git a/meson_options.txt b/meson_options.txt
index fc9447d267..bc857fe68b 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -311,6 +311,8 @@ option('rng_none', type: 'boolean', value: false,
description: 'dummy RNG, avoid using /dev/(u)random and getrandom()')
option('coroutine_pool', type: 'boolean', value: true,
description: 'coroutine freelist (better performance)')
+option('debug_graph_lock', type: 'boolean', value: false,
+ description: 'graph lock debugging support')
option('debug_mutex', type: 'boolean', value: false,
description: 'mutex debugging support')
option('debug_stack_usage', type: 'boolean', value: false,
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 009fab1515..30e1f25259 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -21,6 +21,8 @@ meson_options_help() {
printf "%s\n" ' QEMU'
printf "%s\n" ' --enable-cfi Control-Flow Integrity (CFI)'
printf "%s\n" ' --enable-cfi-debug Verbose errors in case of CFI violation'
+ printf "%s\n" ' --enable-debug-graph-lock'
+ printf "%s\n" ' graph lock debugging support'
printf "%s\n" ' --enable-debug-mutex mutex debugging support'
printf "%s\n" ' --enable-debug-stack-usage'
printf "%s\n" ' measure coroutine stack usage'
@@ -249,6 +251,8 @@ _meson_option_parse() {
--datadir=*) quote_sh "-Ddatadir=$2" ;;
--enable-dbus-display) printf "%s" -Ddbus_display=enabled ;;
--disable-dbus-display) printf "%s" -Ddbus_display=disabled ;;
+ --enable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=true ;;
+ --disable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=false ;;
--enable-debug-mutex) printf "%s" -Ddebug_mutex=true ;;
--disable-debug-mutex) printf "%s" -Ddebug_mutex=false ;;
--enable-debug-stack-usage) printf "%s" -Ddebug_stack_usage=true ;;
--
2.39.3

@ -1,70 +0,0 @@
From defd6b325264d94ffb1355a8b19f9a77bd694a2f Mon Sep 17 00:00:00 2001
From: Vladimir Sementsov-Ogievskiy <vladimir.sementsov-ogievskiy@openvz.org>
Date: Mon, 7 Nov 2022 19:35:56 +0300
Subject: [PATCH 13/31] block: drop bdrv_remove_filter_or_cow_child
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [1/16] 6339edd738c3b79f8ecb6c1e012e52b6afb1a622 (sgarzarella/qemu-kvm-c-9-s)
Drop this simple wrapper used only in one place. We have too many graph
modifying functions even without it.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@openvz.org>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20221107163558.618889-3-vsementsov@yandex-team.ru>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit f38eaec4c3618dfc4a23e20435cefb5bf8325264)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 15 +--------------
1 file changed, 1 insertion(+), 14 deletions(-)
diff --git a/block.c b/block.c
index a18f052374..ec184150a2 100644
--- a/block.c
+++ b/block.c
@@ -93,8 +93,6 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
static void bdrv_replace_child_noperm(BdrvChild *child,
BlockDriverState *new_bs);
static void bdrv_remove_child(BdrvChild *child, Transaction *tran);
-static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
- Transaction *tran);
static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
BlockReopenQueue *queue,
@@ -5073,17 +5071,6 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran)
tran_add(tran, &bdrv_remove_child_drv, child);
}
-/*
- * A function to remove backing-chain child of @bs if exists: cow child for
- * format nodes (always .backing) and filter child for filters (may be .file or
- * .backing)
- */
-static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
- Transaction *tran)
-{
- bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran);
-}
-
static int bdrv_replace_node_noperm(BlockDriverState *from,
BlockDriverState *to,
bool auto_skip, Transaction *tran,
@@ -5168,7 +5155,7 @@ static int bdrv_replace_node_common(BlockDriverState *from,
}
if (detach_subchain) {
- bdrv_remove_filter_or_cow_child(to_cow_parent, tran);
+ bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran);
}
found = g_hash_table_new(NULL, NULL);
--
2.31.1

@ -1,145 +0,0 @@
From 4af86458d6bea2a6e15fd57d4d4bbe88e35f7e72 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 20 Jun 2022 18:26:54 +0200
Subject: [PATCH 07/20] block/file: Add file-specific image info
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
RH-Bugzilla: 1860292
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [2/12] d8cc351d6c16c41b2000e41dc555f13093a9edce (hreitz/qemu-kvm-c-9-s)
Add some (optional) information that the file driver can provide for
image files, namely the extent size hint.
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220620162704.80987-3-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 7f36a50ab4e7d39369cac67be4ba9d6ee4081dc0)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/file-posix.c | 30 ++++++++++++++++++++++++++++++
qapi/block-core.json | 26 ++++++++++++++++++++++++--
2 files changed, 54 insertions(+), 2 deletions(-)
diff --git a/block/file-posix.c b/block/file-posix.c
index b9647c5ffc..df3da79aed 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -3095,6 +3095,34 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
return 0;
}
+static ImageInfoSpecific *raw_get_specific_info(BlockDriverState *bs,
+ Error **errp)
+{
+ ImageInfoSpecificFile *file_info = g_new0(ImageInfoSpecificFile, 1);
+ ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1);
+
+ *spec_info = (ImageInfoSpecific){
+ .type = IMAGE_INFO_SPECIFIC_KIND_FILE,
+ .u.file.data = file_info,
+ };
+
+#ifdef FS_IOC_FSGETXATTR
+ {
+ BDRVRawState *s = bs->opaque;
+ struct fsxattr attr;
+ int ret;
+
+ ret = ioctl(s->fd, FS_IOC_FSGETXATTR, &attr);
+ if (!ret && attr.fsx_extsize != 0) {
+ file_info->has_extent_size_hint = true;
+ file_info->extent_size_hint = attr.fsx_extsize;
+ }
+ }
+#endif
+
+ return spec_info;
+}
+
static BlockStatsSpecificFile get_blockstats_specific_file(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
@@ -3328,6 +3356,7 @@ BlockDriver bdrv_file = {
.bdrv_co_truncate = raw_co_truncate,
.bdrv_getlength = raw_getlength,
.bdrv_get_info = raw_get_info,
+ .bdrv_get_specific_info = raw_get_specific_info,
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
.bdrv_get_specific_stats = raw_get_specific_stats,
@@ -3700,6 +3729,7 @@ static BlockDriver bdrv_host_device = {
.bdrv_co_truncate = raw_co_truncate,
.bdrv_getlength = raw_getlength,
.bdrv_get_info = raw_get_info,
+ .bdrv_get_specific_info = raw_get_specific_info,
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
.bdrv_get_specific_stats = hdev_get_specific_stats,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 95ac4fa634..f5d822cbd6 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -139,16 +139,29 @@
'*encryption-format': 'RbdImageEncryptionFormat'
} }
+##
+# @ImageInfoSpecificFile:
+#
+# @extent-size-hint: Extent size hint (if available)
+#
+# Since: 8.0
+##
+{ 'struct': 'ImageInfoSpecificFile',
+ 'data': {
+ '*extent-size-hint': 'size'
+ } }
+
##
# @ImageInfoSpecificKind:
#
# @luks: Since 2.7
# @rbd: Since 6.1
+# @file: Since 8.0
#
# Since: 1.7
##
{ 'enum': 'ImageInfoSpecificKind',
- 'data': [ 'qcow2', 'vmdk', 'luks', 'rbd' ] }
+ 'data': [ 'qcow2', 'vmdk', 'luks', 'rbd', 'file' ] }
##
# @ImageInfoSpecificQCow2Wrapper:
@@ -185,6 +198,14 @@
{ 'struct': 'ImageInfoSpecificRbdWrapper',
'data': { 'data': 'ImageInfoSpecificRbd' } }
+##
+# @ImageInfoSpecificFileWrapper:
+#
+# Since: 8.0
+##
+{ 'struct': 'ImageInfoSpecificFileWrapper',
+ 'data': { 'data': 'ImageInfoSpecificFile' } }
+
##
# @ImageInfoSpecific:
#
@@ -199,7 +220,8 @@
'qcow2': 'ImageInfoSpecificQCow2Wrapper',
'vmdk': 'ImageInfoSpecificVmdkWrapper',
'luks': 'ImageInfoSpecificLUKSWrapper',
- 'rbd': 'ImageInfoSpecificRbdWrapper'
+ 'rbd': 'ImageInfoSpecificRbdWrapper',
+ 'file': 'ImageInfoSpecificFileWrapper'
} }
##
--
2.31.1

@ -1,206 +0,0 @@
From c8c282c2e1d74cfc5de6527f7e20dfc3e76b67ac Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 20 Jun 2022 18:27:00 +0200
Subject: [PATCH 13/20] block/qapi: Add indentation to bdrv_node_info_dump()
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
RH-Bugzilla: 1860292
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [8/12] d3a697e81ab9828457198075e5815a592363c725 (hreitz/qemu-kvm-c-9-s)
In order to let qemu-img info present a block graph, add a parameter to
bdrv_node_info_dump() and bdrv_image_info_specific_dump() so that the
information of nodes below the root level can be given an indentation.
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220620162704.80987-9-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 76c9e9750d1bd580e8ed4465f6be3a986434e7c3)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/monitor/block-hmp-cmds.c | 2 +-
block/qapi.c | 47 +++++++++++++++++++---------------
include/block/qapi.h | 5 ++--
qemu-img.c | 2 +-
qemu-io-cmds.c | 3 ++-
5 files changed, 34 insertions(+), 25 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index aa37faa601..72824d4e2e 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info,
monitor_printf(mon, "\nImages:\n");
image_info = inserted->image;
while (1) {
- bdrv_node_info_dump(qapi_ImageInfo_base(image_info));
+ bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0);
if (image_info->has_backing_image) {
image_info = image_info->backing_image;
} else {
diff --git a/block/qapi.c b/block/qapi.c
index f208c21ccf..3e35603f0c 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -915,7 +915,8 @@ static bool qobject_is_empty_dump(const QObject *obj)
* prepending an optional prefix if the dump is not empty.
*/
void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
- const char *prefix)
+ const char *prefix,
+ int indentation)
{
QObject *obj, *data;
Visitor *v = qobject_output_visitor_new(&obj);
@@ -925,48 +926,51 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
data = qdict_get(qobject_to(QDict, obj), "data");
if (!qobject_is_empty_dump(data)) {
if (prefix) {
- qemu_printf("%s", prefix);
+ qemu_printf("%*s%s", indentation * 4, "", prefix);
}
- dump_qobject(1, data);
+ dump_qobject(indentation + 1, data);
}
qobject_unref(obj);
visit_free(v);
}
-void bdrv_node_info_dump(BlockNodeInfo *info)
+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation)
{
char *size_buf, *dsize_buf;
+ g_autofree char *ind_s = g_strdup_printf("%*s", indentation * 4, "");
+
if (!info->has_actual_size) {
dsize_buf = g_strdup("unavailable");
} else {
dsize_buf = size_to_str(info->actual_size);
}
size_buf = size_to_str(info->virtual_size);
- qemu_printf("image: %s\n"
- "file format: %s\n"
- "virtual size: %s (%" PRId64 " bytes)\n"
- "disk size: %s\n",
- info->filename, info->format, size_buf,
- info->virtual_size,
- dsize_buf);
+ qemu_printf("%simage: %s\n"
+ "%sfile format: %s\n"
+ "%svirtual size: %s (%" PRId64 " bytes)\n"
+ "%sdisk size: %s\n",
+ ind_s, info->filename,
+ ind_s, info->format,
+ ind_s, size_buf, info->virtual_size,
+ ind_s, dsize_buf);
g_free(size_buf);
g_free(dsize_buf);
if (info->has_encrypted && info->encrypted) {
- qemu_printf("encrypted: yes\n");
+ qemu_printf("%sencrypted: yes\n", ind_s);
}
if (info->has_cluster_size) {
- qemu_printf("cluster_size: %" PRId64 "\n",
- info->cluster_size);
+ qemu_printf("%scluster_size: %" PRId64 "\n",
+ ind_s, info->cluster_size);
}
if (info->has_dirty_flag && info->dirty_flag) {
- qemu_printf("cleanly shut down: no\n");
+ qemu_printf("%scleanly shut down: no\n", ind_s);
}
if (info->has_backing_filename) {
- qemu_printf("backing file: %s", info->backing_filename);
+ qemu_printf("%sbacking file: %s", ind_s, info->backing_filename);
if (!info->has_full_backing_filename) {
qemu_printf(" (cannot determine actual path)");
} else if (strcmp(info->backing_filename,
@@ -975,15 +979,16 @@ void bdrv_node_info_dump(BlockNodeInfo *info)
}
qemu_printf("\n");
if (info->has_backing_filename_format) {
- qemu_printf("backing file format: %s\n",
- info->backing_filename_format);
+ qemu_printf("%sbacking file format: %s\n",
+ ind_s, info->backing_filename_format);
}
}
if (info->has_snapshots) {
SnapshotInfoList *elem;
- qemu_printf("Snapshot list:\n");
+ qemu_printf("%sSnapshot list:\n", ind_s);
+ qemu_printf("%s", ind_s);
bdrv_snapshot_dump(NULL);
qemu_printf("\n");
@@ -1003,6 +1008,7 @@ void bdrv_node_info_dump(BlockNodeInfo *info)
pstrcpy(sn.id_str, sizeof(sn.id_str), elem->value->id);
pstrcpy(sn.name, sizeof(sn.name), elem->value->name);
+ qemu_printf("%s", ind_s);
bdrv_snapshot_dump(&sn);
qemu_printf("\n");
}
@@ -1010,6 +1016,7 @@ void bdrv_node_info_dump(BlockNodeInfo *info)
if (info->has_format_specific) {
bdrv_image_info_specific_dump(info->format_specific,
- "Format specific information:\n");
+ "Format specific information:\n",
+ indentation);
}
}
diff --git a/include/block/qapi.h b/include/block/qapi.h
index 196436020e..38855f2ae9 100644
--- a/include/block/qapi.h
+++ b/include/block/qapi.h
@@ -49,6 +49,7 @@ void bdrv_query_block_graph_info(BlockDriverState *bs,
void bdrv_snapshot_dump(QEMUSnapshotInfo *sn);
void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
- const char *prefix);
-void bdrv_node_info_dump(BlockNodeInfo *info);
+ const char *prefix,
+ int indentation);
+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation);
#endif
diff --git a/qemu-img.c b/qemu-img.c
index 3b2ca3bbcb..30b4ea58bb 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2859,7 +2859,7 @@ static void dump_human_image_info_list(BlockNodeInfoList *list)
}
delim = true;
- bdrv_node_info_dump(elem->value);
+ bdrv_node_info_dump(elem->value, 0);
}
}
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index f4a374528e..fdcb89211b 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -1826,7 +1826,8 @@ static int info_f(BlockBackend *blk, int argc, char **argv)
}
if (spec_info) {
bdrv_image_info_specific_dump(spec_info,
- "Format specific information:\n");
+ "Format specific information:\n",
+ 0);
qapi_free_ImageInfoSpecific(spec_info);
}
--
2.31.1

@ -1,155 +0,0 @@
From 0044e3848b02ef6edba5961d1f4b6297d137d207 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 20 Jun 2022 18:26:59 +0200
Subject: [PATCH 12/20] block/qapi: Introduce BlockGraphInfo
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
RH-Bugzilla: 1860292
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [7/12] de47bac372cd552b812c774a2f35f95923af74ff (hreitz/qemu-kvm-c-9-s)
Introduce a new QAPI type BlockGraphInfo and an associated
bdrv_query_block_graph_info() function that recursively gathers
BlockNodeInfo objects through a block graph.
A follow-up patch is going to make "qemu-img info" use this to print
information about all nodes that are (usually implicitly) opened for a
given image file.
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220620162704.80987-8-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 6cab33997b91eb86e82a6a2ae58a24f835249d4a)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/qapi.c | 48 ++++++++++++++++++++++++++++++++++++++++++++
include/block/qapi.h | 3 +++
qapi/block-core.json | 35 ++++++++++++++++++++++++++++++++
3 files changed, 86 insertions(+)
diff --git a/block/qapi.c b/block/qapi.c
index 5d0a8d2ce3..f208c21ccf 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -411,6 +411,54 @@ fail:
qapi_free_ImageInfo(info);
}
+/**
+ * bdrv_query_block_graph_info:
+ * @bs: root node to start from
+ * @p_info: location to store image information
+ * @errp: location to store error information
+ *
+ * Store image information about the graph starting from @bs in @p_info.
+ *
+ * @p_info will be set only on success. On error, store error in @errp.
+ */
+void bdrv_query_block_graph_info(BlockDriverState *bs,
+ BlockGraphInfo **p_info,
+ Error **errp)
+{
+ BlockGraphInfo *info;
+ BlockChildInfoList **children_list_tail;
+ BdrvChild *c;
+ ERRP_GUARD();
+
+ info = g_new0(BlockGraphInfo, 1);
+ bdrv_do_query_node_info(bs, qapi_BlockGraphInfo_base(info), errp);
+ if (*errp) {
+ goto fail;
+ }
+
+ children_list_tail = &info->children;
+
+ QLIST_FOREACH(c, &bs->children, next) {
+ BlockChildInfo *c_info;
+
+ c_info = g_new0(BlockChildInfo, 1);
+ QAPI_LIST_APPEND(children_list_tail, c_info);
+
+ c_info->name = g_strdup(c->name);
+ bdrv_query_block_graph_info(c->bs, &c_info->info, errp);
+ if (*errp) {
+ goto fail;
+ }
+ }
+
+ *p_info = info;
+ return;
+
+fail:
+ assert(*errp != NULL);
+ qapi_free_BlockGraphInfo(info);
+}
+
/* @p_info will be set only on success. */
static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
Error **errp)
diff --git a/include/block/qapi.h b/include/block/qapi.h
index 2174bf8fa2..196436020e 100644
--- a/include/block/qapi.h
+++ b/include/block/qapi.h
@@ -43,6 +43,9 @@ void bdrv_query_image_info(BlockDriverState *bs,
bool flat,
bool skip_implicit_filters,
Error **errp);
+void bdrv_query_block_graph_info(BlockDriverState *bs,
+ BlockGraphInfo **p_info,
+ Error **errp);
void bdrv_snapshot_dump(QEMUSnapshotInfo *sn);
void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 4cf2deeb6c..d703e0fb16 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -307,6 +307,41 @@
'*backing-image': 'ImageInfo'
} }
+##
+# @BlockChildInfo:
+#
+# Information about all nodes in the block graph starting at some node,
+# annotated with information about that node in relation to its parent.
+#
+# @name: Child name of the root node in the BlockGraphInfo struct, in its role
+# as the child of some undescribed parent node
+#
+# @info: Block graph information starting at this node
+#
+# Since: 8.0
+##
+{ 'struct': 'BlockChildInfo',
+ 'data': {
+ 'name': 'str',
+ 'info': 'BlockGraphInfo'
+ } }
+
+##
+# @BlockGraphInfo:
+#
+# Information about all nodes in a block (sub)graph in the form of BlockNodeInfo
+# data.
+# The base BlockNodeInfo struct contains the information for the (sub)graph's
+# root node.
+#
+# @children: Array of links to this node's child nodes' information
+#
+# Since: 8.0
+##
+{ 'struct': 'BlockGraphInfo',
+ 'base': 'BlockNodeInfo',
+ 'data': { 'children': ['BlockChildInfo'] } }
+
##
# @ImageCheck:
#
--
2.31.1

@ -1,197 +0,0 @@
From ae2c3df00d673d436fe4d8ec9103a3b76d7e6233 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 20 Jun 2022 18:26:58 +0200
Subject: [PATCH 11/20] block/qapi: Let bdrv_query_image_info() recurse
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
RH-Bugzilla: 1860292
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [6/12] 451a83fd682cd6dd6026c22974d18c2f12ee06e3 (hreitz/qemu-kvm-c-9-s)
There is no real reason why bdrv_query_image_info() should generally not
recurse. The ImageInfo struct has a pointer to the backing image, so it
should generally be filled, unless the caller explicitly opts out.
This moves the recursing code from bdrv_block_device_info() into
bdrv_query_image_info().
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220620162704.80987-7-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 5d8813593f3f673fc96eed199beb35690cc46f58)
Conflicts:
block/qapi.c: Conflicts with
54fde4ff0621c22b15cbaaa3c74301cc0dbd1c9e ("qapi block: Elide
redundant has_FOO in generated C"), which dropped
`has_backing_image`. Without that commit (and 44ea9d9be before it),
we still need to set `has_backing_image` in
`bdrv_query_image_info()`.
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/qapi.c | 94 +++++++++++++++++++++++++++-----------------
include/block/qapi.h | 2 +
2 files changed, 59 insertions(+), 37 deletions(-)
diff --git a/block/qapi.c b/block/qapi.c
index ad88bf9b38..5d0a8d2ce3 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -47,8 +47,10 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
Error **errp)
{
ImageInfo **p_image_info;
+ ImageInfo *backing_info;
BlockDriverState *bs0, *backing;
BlockDeviceInfo *info;
+ ERRP_GUARD();
if (!bs->drv) {
error_setg(errp, "Block device %s is ejected", bs->node_name);
@@ -149,38 +151,21 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
bs0 = bs;
p_image_info = &info->image;
info->backing_file_depth = 0;
- while (1) {
- Error *local_err = NULL;
- bdrv_query_image_info(bs0, p_image_info, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- qapi_free_BlockDeviceInfo(info);
- return NULL;
- }
-
- /* stop gathering data for flat output */
- if (flat) {
- break;
- }
- if (bs0->drv && bdrv_filter_or_cow_child(bs0)) {
- /*
- * Put any filtered child here (for backwards compatibility to when
- * we put bs0->backing here, which might be any filtered child).
- */
- info->backing_file_depth++;
- bs0 = bdrv_filter_or_cow_bs(bs0);
- (*p_image_info)->has_backing_image = true;
- p_image_info = &((*p_image_info)->backing_image);
- } else {
- break;
- }
+ /*
+ * Skip automatically inserted nodes that the user isn't aware of for
+ * query-block (blk != NULL), but not for query-named-block-nodes
+ */
+ bdrv_query_image_info(bs0, p_image_info, flat, blk != NULL, errp);
+ if (*errp) {
+ qapi_free_BlockDeviceInfo(info);
+ return NULL;
+ }
- /* Skip automatically inserted nodes that the user isn't aware of for
- * query-block (blk != NULL), but not for query-named-block-nodes */
- if (blk) {
- bs0 = bdrv_skip_implicit_filters(bs0);
- }
+ backing_info = info->image->backing_image;
+ while (backing_info) {
+ info->backing_file_depth++;
+ backing_info = backing_info->backing_image;
}
return info;
@@ -363,19 +348,28 @@ void bdrv_query_block_node_info(BlockDriverState *bs,
* bdrv_query_image_info:
* @bs: block node to examine
* @p_info: location to store image information
+ * @flat: skip backing node information
+ * @skip_implicit_filters: skip implicit filters in the backing chain
* @errp: location to store error information
*
- * Store "flat" image information in @p_info.
+ * Store image information in @p_info, potentially recursively covering the
+ * backing chain.
*
- * "Flat" means it does *not* query backing image information,
- * i.e. (*pinfo)->has_backing_image will be set to false and
- * (*pinfo)->backing_image to NULL even when the image does in fact have
- * a backing image.
+ * If @flat is true, do not query backing image information, i.e.
+ * (*p_info)->has_backing_image will be set to false and
+ * (*p_info)->backing_image to NULL even when the image does in fact have a
+ * backing image.
+ *
+ * If @skip_implicit_filters is true, implicit filter nodes in the backing chain
+ * will be skipped when querying backing image information.
+ * (@skip_implicit_filters is ignored when @flat is true.)
*
* @p_info will be set only on success. On error, store error in @errp.
*/
void bdrv_query_image_info(BlockDriverState *bs,
ImageInfo **p_info,
+ bool flat,
+ bool skip_implicit_filters,
Error **errp)
{
ImageInfo *info;
@@ -384,11 +378,37 @@ void bdrv_query_image_info(BlockDriverState *bs,
info = g_new0(ImageInfo, 1);
bdrv_do_query_node_info(bs, qapi_ImageInfo_base(info), errp);
if (*errp) {
- qapi_free_ImageInfo(info);
- return;
+ goto fail;
+ }
+
+ if (!flat) {
+ BlockDriverState *backing;
+
+ /*
+ * Use any filtered child here (for backwards compatibility to when
+ * we always took bs->backing, which might be any filtered child).
+ */
+ backing = bdrv_filter_or_cow_bs(bs);
+ if (skip_implicit_filters) {
+ backing = bdrv_skip_implicit_filters(backing);
+ }
+
+ if (backing) {
+ bdrv_query_image_info(backing, &info->backing_image, false,
+ skip_implicit_filters, errp);
+ if (*errp) {
+ goto fail;
+ }
+ info->has_backing_image = true;
+ }
}
*p_info = info;
+ return;
+
+fail:
+ assert(*errp);
+ qapi_free_ImageInfo(info);
}
/* @p_info will be set only on success. */
diff --git a/include/block/qapi.h b/include/block/qapi.h
index 22198dcd0c..2174bf8fa2 100644
--- a/include/block/qapi.h
+++ b/include/block/qapi.h
@@ -40,6 +40,8 @@ void bdrv_query_block_node_info(BlockDriverState *bs,
Error **errp);
void bdrv_query_image_info(BlockDriverState *bs,
ImageInfo **p_info,
+ bool flat,
+ bool skip_implicit_filters,
Error **errp);
void bdrv_snapshot_dump(QEMUSnapshotInfo *sn);
--
2.31.1

@ -1,99 +0,0 @@
From b952c8f1da6f8597736c0e040565830139369359 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Tue, 14 Feb 2023 18:16:21 +0100
Subject: [PATCH] block: temporarily hold the new AioContext of bs_top in
bdrv_append()
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 153: block: temporarily hold the new AioContext of bs_top in bdrv_append()
RH-Bugzilla: 2168209
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [1/1] 5b190426d996e8c9f7a781bd97aee8d25756dbd3 (sgarzarella/qemu-kvm-c-9-s)
bdrv_append() is called with bs_top AioContext held, but
bdrv_attach_child_noperm() could change the AioContext of bs_top.
bdrv_replace_node_noperm() calls bdrv_drained_begin() starting from
commit 2398747128 ("block: Don't poll in bdrv_replace_child_noperm()").
bdrv_drained_begin() can call BDRV_POLL_WHILE that assumes the new lock
is taken, so let's temporarily hold the new AioContext to prevent QEMU
from failing in BDRV_POLL_WHILE when it tries to release the wrong
AioContext.
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2168209
Reported-by: Aihua Liang <aliang@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-Id: <20230214171621.11574-1-sgarzare@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 60d90bf43c169b9d1dbcb17ed794b7b02c6862b1)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/block.c b/block.c
index 0d78711416..9e1dcb9e47 100644
--- a/block.c
+++ b/block.c
@@ -5275,6 +5275,8 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
* child.
*
* This function does not create any image files.
+ *
+ * The caller must hold the AioContext lock for @bs_top.
*/
int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
Error **errp)
@@ -5282,11 +5284,14 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
int ret;
BdrvChild *child;
Transaction *tran = tran_new();
+ AioContext *old_context, *new_context = NULL;
GLOBAL_STATE_CODE();
assert(!bs_new->backing);
+ old_context = bdrv_get_aio_context(bs_top);
+
child = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
&child_of_bds, bdrv_backing_role(bs_new),
tran, errp);
@@ -5295,6 +5300,19 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
goto out;
}
+ /*
+ * bdrv_attach_child_noperm could change the AioContext of bs_top.
+ * bdrv_replace_node_noperm calls bdrv_drained_begin, so let's temporarily
+ * hold the new AioContext, since bdrv_drained_begin calls BDRV_POLL_WHILE
+ * that assumes the new lock is taken.
+ */
+ new_context = bdrv_get_aio_context(bs_top);
+
+ if (old_context != new_context) {
+ aio_context_release(old_context);
+ aio_context_acquire(new_context);
+ }
+
ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp);
if (ret < 0) {
goto out;
@@ -5306,6 +5324,11 @@ out:
bdrv_refresh_limits(bs_top, NULL, NULL);
+ if (new_context && old_context != new_context) {
+ aio_context_release(new_context);
+ aio_context_acquire(old_context);
+ }
+
return ret;
}
--
2.31.1

@ -1,140 +0,0 @@
From d8caed018afb0f60f449e971398d2a8d6c2992e7 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 20 Jun 2022 18:26:55 +0200
Subject: [PATCH 08/20] block/vmdk: Change extent info type
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
RH-Bugzilla: 1860292
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [3/12] efe50a2797c679ce6bb5faa423047461a34e6792 (hreitz/qemu-kvm-c-9-s)
VMDK's implementation of .bdrv_get_specific_info() returns information
about its extent files, ostensibly in the form of ImageInfo objects.
However, it does not get this information through
bdrv_query_image_info(), but fills only a select few fields with custom
information that does not always match the fields' purposes.
For example, @format, which is supposed to be a block driver name, is
filled with the extent type, e.g. SPARSE or FLAT.
In ImageInfo, @compressed shows whether the data that can be seen in the
image is stored in compressed form or not. For example, a compressed
qcow2 image will store compressed data in its data file, but when
accessing the qcow2 node, you will see normal data. This is not how
VMDK uses the @compressed field for its extent files: Instead, it
signifies whether accessing the extent file will yield compressed data
(which the VMDK driver then (de-)compresses).
Create a new structure to represent the extent information. This allows
us to clarify the fields' meanings, and it clearly shows that these are
not complete ImageInfo objects. (That is, if a user wants an extent
file's ImageInfo object, they will need to query it separately, and will
not get it from ImageInfoSpecificVmdk.extents.)
Note that this removes the last use of ['ImageInfo'] (i.e. an array of
ImageInfo objects), so the QAPI generator will no longer generate
ImageInfoList by default. However, we use it in qemu-img.c, so we need
to create a dummy object to force the generate to create that type,
similarly to DummyForceArrays in machine.json (introduced in commit
9f08c8ec73878122ad4b061ed334f0437afaaa32 ("qapi: Lazy creation of array
types")).
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220620162704.80987-4-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 456e75171a85c19a5bfa202eefcbdc4ef1692f05)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/vmdk.c | 8 ++++----
qapi/block-core.json | 38 +++++++++++++++++++++++++++++++++++++-
2 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/block/vmdk.c b/block/vmdk.c
index 26376352b9..4435b9880b 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -2901,12 +2901,12 @@ static int vmdk_has_zero_init(BlockDriverState *bs)
return 1;
}
-static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent)
+static VmdkExtentInfo *vmdk_get_extent_info(VmdkExtent *extent)
{
- ImageInfo *info = g_new0(ImageInfo, 1);
+ VmdkExtentInfo *info = g_new0(VmdkExtentInfo, 1);
bdrv_refresh_filename(extent->file->bs);
- *info = (ImageInfo){
+ *info = (VmdkExtentInfo){
.filename = g_strdup(extent->file->bs->filename),
.format = g_strdup(extent->type),
.virtual_size = extent->sectors * BDRV_SECTOR_SIZE,
@@ -2985,7 +2985,7 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs,
int i;
BDRVVmdkState *s = bs->opaque;
ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1);
- ImageInfoList **tail;
+ VmdkExtentInfoList **tail;
*spec_info = (ImageInfoSpecific){
.type = IMAGE_INFO_SPECIFIC_KIND_VMDK,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index f5d822cbd6..4b9365167f 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -124,7 +124,33 @@
'create-type': 'str',
'cid': 'int',
'parent-cid': 'int',
- 'extents': ['ImageInfo']
+ 'extents': ['VmdkExtentInfo']
+ } }
+
+##
+# @VmdkExtentInfo:
+#
+# Information about a VMDK extent file
+#
+# @filename: Name of the extent file
+#
+# @format: Extent type (e.g. FLAT or SPARSE)
+#
+# @virtual-size: Number of bytes covered by this extent
+#
+# @cluster-size: Cluster size in bytes (for non-flat extents)
+#
+# @compressed: Whether this extent contains compressed data
+#
+# Since: 8.0
+##
+{ 'struct': 'VmdkExtentInfo',
+ 'data': {
+ 'filename': 'str',
+ 'format': 'str',
+ 'virtual-size': 'int',
+ '*cluster-size': 'int',
+ '*compressed': 'bool'
} }
##
@@ -5754,3 +5780,13 @@
'data': { 'device': 'str', '*id': 'str', '*name': 'str'},
'returns': 'SnapshotInfo',
'allow-preconfig': true }
+
+##
+# @DummyBlockCoreForceArrays:
+#
+# Not used by QMP; hack to let us use ImageInfoList internally
+#
+# Since: 8.0
+##
+{ 'struct': 'DummyBlockCoreForceArrays',
+ 'data': { 'unused-image-info': ['ImageInfo'] } }
--
2.31.1

@ -0,0 +1,55 @@
From 961bc392ee60743344236ddd247ab646a0eec914 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 07/21] checkpatch: add qemu_bh_new/aio_bh_new checks
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
RH-Jira: RHEL-516
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [3/13] e0473487f0e3186c42559a5c36a8650f27ab26ae (jmaloy/jmaloy-qemu-kvm-2)
Jira: https://issues.redhat.com/browse/RHEL-516
Upstream: Merged
CVE: CVE-2023-2680
commit ef56ffbdd6b0605dc1e305611287b948c970e236
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Thu Apr 27 17:10:08 2023 -0400
checkpatch: add qemu_bh_new/aio_bh_new checks
Advise authors to use the _guarded versions of the APIs, instead.
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Message-Id: <20230427211013.2994127-4-alxndr@bu.edu>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
scripts/checkpatch.pl | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index d768171dcf..eeaec436eb 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2865,6 +2865,14 @@ sub process {
if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) {
ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr);
}
+# recommend qemu_bh_new_guarded instead of qemu_bh_new
+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) {
+ ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr);
+ }
+# recommend aio_bh_new_guarded instead of aio_bh_new
+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) {
+ ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr);
+ }
# check for module_init(), use category-specific init macros explicitly please
if ($line =~ /^module_init\s*\(/) {
ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr);
--
2.39.3

@ -1,127 +0,0 @@
From b886411a682b56bfe674f0a35d40c67c8e9dc87a Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 21 Feb 2023 16:22:17 -0500
Subject: [PATCH 02/12] dma-helpers: prevent dma_blk_cb() vs dma_aio_cancel()
race
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread
RH-Bugzilla: 2155748
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
RH-Commit: [2/3] eeeea43c25d8f4fa84591b05547fb77e4058abff (stefanha/centos-stream-qemu-kvm)
dma_blk_cb() only takes the AioContext lock around ->io_func(). That
means the rest of dma_blk_cb() is not protected. In particular, the
DMAAIOCB field accesses happen outside the lock.
There is a race when the main loop thread holds the AioContext lock and
invokes scsi_device_purge_requests() -> bdrv_aio_cancel() ->
dma_aio_cancel() while an IOThread executes dma_blk_cb(). The dbs->acb
field determines how cancellation proceeds. If dma_aio_cancel() sees
dbs->acb == NULL while dma_blk_cb() is still running, the request can be
completed twice (-ECANCELED and the actual return value).
The following assertion can occur with virtio-scsi when an IOThread is
used:
../hw/scsi/scsi-disk.c:368: scsi_dma_complete: Assertion `r->req.aiocb != NULL' failed.
Fix the race by holding the AioContext across dma_blk_cb(). Now
dma_aio_cancel() under the AioContext lock will not see
inconsistent/intermediate states.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20230221212218.1378734-3-stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit abfcd2760b3e70727bbc0792221b8b98a733dc32)
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
hw/scsi/scsi-disk.c | 4 +---
softmmu/dma-helpers.c | 12 +++++++-----
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 5327f93f4c..b12d8b0816 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -354,13 +354,12 @@ done:
scsi_req_unref(&r->req);
}
+/* Called with AioContext lock held */
static void scsi_dma_complete(void *opaque, int ret)
{
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
-
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
@@ -370,7 +369,6 @@ static void scsi_dma_complete(void *opaque, int ret)
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
}
scsi_dma_complete_noio(r, ret);
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
}
static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c
index 7820fec54c..2463964805 100644
--- a/softmmu/dma-helpers.c
+++ b/softmmu/dma-helpers.c
@@ -113,17 +113,19 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
static void dma_blk_cb(void *opaque, int ret)
{
DMAAIOCB *dbs = (DMAAIOCB *)opaque;
+ AioContext *ctx = dbs->ctx;
dma_addr_t cur_addr, cur_len;
void *mem;
trace_dma_blk_cb(dbs, ret);
+ aio_context_acquire(ctx);
dbs->acb = NULL;
dbs->offset += dbs->iov.size;
if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
dma_complete(dbs, ret);
- return;
+ goto out;
}
dma_blk_unmap(dbs);
@@ -164,9 +166,9 @@ static void dma_blk_cb(void *opaque, int ret)
if (dbs->iov.size == 0) {
trace_dma_map_wait(dbs);
- dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs);
+ dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
cpu_register_map_client(dbs->bh);
- return;
+ goto out;
}
if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
@@ -174,11 +176,11 @@ static void dma_blk_cb(void *opaque, int ret)
QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
}
- aio_context_acquire(dbs->ctx);
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
dma_blk_cb, dbs, dbs->io_func_opaque);
- aio_context_release(dbs->ctx);
assert(dbs->acb);
+out:
+ aio_context_release(ctx);
}
static void dma_aio_cancel(BlockAIOCB *acb)
--
2.39.1

@ -1,61 +0,0 @@
From 67bbeb056f75adc6c964468d876531ab68366fe0 Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 9 Mar 2023 08:24:36 -0500
Subject: [PATCH 07/12] edu: add smp_mb__after_rmw()
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
RH-Bugzilla: 2175660
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [4/9] 2ad6fd6cb33fde39d2d017d94c0dde2152ad70c4 (eesposit/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
commit 2482aeea4195ad84cf3d4e5b15b28ec5b420ed5a
Author: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu Mar 2 11:16:13 2023 +0100
edu: add smp_mb__after_rmw()
Ensure ordering between clearing the COMPUTING flag and checking
IRQFACT, and between setting the IRQFACT flag and checking
COMPUTING. This ensures that no wakeups are lost.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
hw/misc/edu.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/hw/misc/edu.c b/hw/misc/edu.c
index e935c418d4..a1f8bc77e7 100644
--- a/hw/misc/edu.c
+++ b/hw/misc/edu.c
@@ -267,6 +267,8 @@ static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val,
case 0x20:
if (val & EDU_STATUS_IRQFACT) {
qatomic_or(&edu->status, EDU_STATUS_IRQFACT);
+ /* Order check of the COMPUTING flag after setting IRQFACT. */
+ smp_mb__after_rmw();
} else {
qatomic_and(&edu->status, ~EDU_STATUS_IRQFACT);
}
@@ -349,6 +351,9 @@ static void *edu_fact_thread(void *opaque)
qemu_mutex_unlock(&edu->thr_mutex);
qatomic_and(&edu->status, ~EDU_STATUS_COMPUTING);
+ /* Clear COMPUTING flag before checking IRQFACT. */
+ smp_mb__after_rmw();
+
if (qatomic_read(&edu->status) & EDU_STATUS_IRQFACT) {
qemu_mutex_lock_iothread();
edu_raise_irq(edu, FACT_IRQ);
--
2.39.1

@ -0,0 +1,153 @@
From 516bf44de08a13d97c08e210137078e642ce8e88 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 17 May 2023 17:28:32 +0200
Subject: [PATCH 02/21] graph-lock: Disable locking for now
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 166: block/graph-lock: Disable locking for now
RH-Bugzilla: 2186725
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [2/4] 39d42fb527aad0491a018743289de7b762108317 (kmwolf/centos-qemu-kvm)
In QEMU 8.0, we've been seeing deadlocks in bdrv_graph_wrlock(). They
come from callers that hold an AioContext lock, which is not allowed
during polling. In theory, we could temporarily release the lock, but
callers are inconsistent about whether they hold a lock, and if they do,
some are also confused about which one they hold. While all of this is
fixable, it's not trivial, and the best course of action for 8.0.1 is
probably just disabling the graph locking code temporarily.
We don't currently rely on graph locking yet. It is supposed to replace
the AioContext lock eventually to enable multiqueue support, but as long
as we still have the AioContext lock, it is sufficient without the graph
lock. Once the AioContext lock goes away, the deadlock doesn't exist any
more either and this commit can be reverted. (Of course, it can also be
reverted while the AioContext lock still exists if the callers have been
fixed.)
Cc: qemu-stable@nongnu.org
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20230517152834.277483-2-kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 80fc5d260002432628710f8b0c7cfc7d9b97bb9d)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/graph-lock.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/block/graph-lock.c b/block/graph-lock.c
index 259a7a0bde..2490926c90 100644
--- a/block/graph-lock.c
+++ b/block/graph-lock.c
@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock;
/* Protects the list of aiocontext and orphaned_reader_count */
static QemuMutex aio_context_list_lock;
+#if 0
/* Written and read with atomic operations. */
static int has_writer;
+#endif
/*
* A reader coroutine could move from an AioContext to another.
@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx)
g_free(ctx->bdrv_graph);
}
+#if 0
static uint32_t reader_count(void)
{
BdrvGraphRWlock *brdv_graph;
@@ -105,10 +108,17 @@ static uint32_t reader_count(void)
assert((int32_t)rd >= 0);
return rd;
}
+#endif
void bdrv_graph_wrlock(void)
{
GLOBAL_STATE_CODE();
+ /*
+ * TODO Some callers hold an AioContext lock when this is called, which
+ * causes deadlocks. Reenable once the AioContext locking is cleaned up (or
+ * AioContext locks are gone).
+ */
+#if 0
assert(!qatomic_read(&has_writer));
/* Make sure that constantly arriving new I/O doesn't cause starvation */
@@ -139,11 +149,13 @@ void bdrv_graph_wrlock(void)
} while (reader_count() >= 1);
bdrv_drain_all_end();
+#endif
}
void bdrv_graph_wrunlock(void)
{
GLOBAL_STATE_CODE();
+#if 0
QEMU_LOCK_GUARD(&aio_context_list_lock);
assert(qatomic_read(&has_writer));
@@ -155,10 +167,13 @@ void bdrv_graph_wrunlock(void)
/* Wake up all coroutine that are waiting to read the graph */
qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
+#endif
}
void coroutine_fn bdrv_graph_co_rdlock(void)
{
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
BdrvGraphRWlock *bdrv_graph;
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
@@ -223,10 +238,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void)
qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
}
}
+#endif
}
void coroutine_fn bdrv_graph_co_rdunlock(void)
{
+#if 0
BdrvGraphRWlock *bdrv_graph;
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
@@ -249,6 +266,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void)
if (qatomic_read(&has_writer)) {
aio_wait_kick();
}
+#endif
}
void bdrv_graph_rdlock_main_loop(void)
@@ -266,13 +284,19 @@ void bdrv_graph_rdunlock_main_loop(void)
void assert_bdrv_graph_readable(void)
{
/* reader_count() is slow due to aio_context_list_lock lock contention */
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
#ifdef CONFIG_DEBUG_GRAPH_LOCK
assert(qemu_in_main_thread() || reader_count());
#endif
+#endif
}
void assert_bdrv_graph_writable(void)
{
assert(qemu_in_main_thread());
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
assert(qatomic_read(&has_writer));
+#endif
}
--
2.39.3

@ -0,0 +1,40 @@
From b4645e7682aa1bde6f89df0eff2a9de83720eecc Mon Sep 17 00:00:00 2001
From: Ani Sinha <anisinha@redhat.com>
Date: Tue, 2 May 2023 15:51:53 +0530
Subject: [PATCH 3/3] hw/acpi: Mark acpi blobs as resizable on RHEL pc machines
version 7.6 and above
RH-Author: Ani Sinha <None>
RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3
RH-Bugzilla: 1934134
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: MST <mst@redhat.com>
RH-Commit: [2/2] 95d443af6e75c569d89d04d028012c3c56c0c3a4 (anisinha/centos-qemu-kvm)
Please look at QEMU upstream commit
1af507756bae7 ("hw/acpi: limit warning on acpi table size to pc machines older than version 2.3")
This patch adapts the above change so that it applies to RHEL pc machines of
version 7.6 and newer. These are the machine types that are currently supported
in RHEL. Q35 machines are not affected.
Signed-off-by: Ani Sinha <anisinha@redhat.com>
---
hw/i386/pc_piix.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 4d5880e249..6c7be628e1 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -914,6 +914,7 @@ static void pc_machine_rhel7_options(MachineClass *m)
m->default_machine_opts = "firmware=bios-256k.bin,hpet=off";
pcmc->default_nic_model = "e1000";
pcmc->pci_root_uid = 0;
+ pcmc->resizable_acpi_blob = true;
m->default_display = "std";
m->no_parallel = 1;
m->numa_mem_supported = true;
--
2.39.1

@ -0,0 +1,101 @@
From 3f70da88788c398877b8ded0b27689530385302b Mon Sep 17 00:00:00 2001
From: Ani Sinha <anisinha@redhat.com>
Date: Wed, 29 Mar 2023 10:27:26 +0530
Subject: [PATCH 2/3] hw/acpi: limit warning on acpi table size to pc machines
older than version 2.3
RH-Author: Ani Sinha <None>
RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3
RH-Bugzilla: 1934134
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: MST <mst@redhat.com>
RH-Commit: [1/2] 96c3b6d51e16734eb4e8de52635e0ca036964090 (anisinha/centos-qemu-kvm)
i440fx machine versions 2.3 and newer supports dynamic ram
resizing. See commit a1666142db6233 ("acpi-build: make ROMs RAM blocks resizeable") .
Currently supported all q35 machine types (versions 2.4 and newer) supports
resizable RAM/ROM blocks.Therefore the warning generated when the ACPI table
size exceeds a pre-defined value does not apply to those machine versions.
Add a check limiting the warning message to only those machines that does not
support expandable ram blocks (that is, i440fx machines with version 2.2
and older).
Signed-off-by: Ani Sinha <anisinha@redhat.com>
Message-Id: <20230329045726.14028-1-anisinha@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 1af507756bae775028c27d30e602e2b9c72cd074)
---
hw/i386/acpi-build.c | 6 ++++--
hw/i386/pc.c | 1 +
hw/i386/pc_piix.c | 1 +
include/hw/i386/pc.h | 3 +++
4 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index ec857a117e..9bc4d8a981 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2695,7 +2695,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
int legacy_table_size =
ROUND_UP(tables_blob->len - aml_len + legacy_aml_len,
ACPI_BUILD_ALIGN_SIZE);
- if (tables_blob->len > legacy_table_size) {
+ if ((tables_blob->len > legacy_table_size) &&
+ !pcmc->resizable_acpi_blob) {
/* Should happen only with PCI bridges and -M pc-i440fx-2.0. */
warn_report("ACPI table size %u exceeds %d bytes,"
" migration may not work",
@@ -2706,7 +2707,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
g_array_set_size(tables_blob, legacy_table_size);
} else {
/* Make sure we have a buffer in case we need to resize the tables. */
- if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) {
+ if ((tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) &&
+ !pcmc->resizable_acpi_blob) {
/* As of QEMU 2.1, this fires with 160 VCPUs and 255 memory slots. */
warn_report("ACPI table size %u exceeds %d bytes,"
" migration may not work",
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index f216922cee..7db5a2348f 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -2092,6 +2092,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
pcmc->acpi_data_size = 0x20000 + 0x8000;
pcmc->pvh_enabled = true;
pcmc->kvmclock_create_always = true;
+ pcmc->resizable_acpi_blob = true;
assert(!mc->get_hotplug_handler);
mc->async_pf_vmexit_disable = false;
mc->get_hotplug_handler = pc_get_hotplug_handler;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index fc704d783f..4d5880e249 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -750,6 +750,7 @@ static void pc_i440fx_2_2_machine_options(MachineClass *m)
compat_props_add(m->compat_props, hw_compat_2_2, hw_compat_2_2_len);
compat_props_add(m->compat_props, pc_compat_2_2, pc_compat_2_2_len);
pcmc->rsdp_in_ram = false;
+ pcmc->resizable_acpi_blob = false;
}
DEFINE_I440FX_MACHINE(v2_2, "pc-i440fx-2.2", pc_compat_2_2_fn,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index d218ad1628..2f514d13d8 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -130,6 +130,9 @@ struct PCMachineClass {
/* create kvmclock device even when KVM PV features are not exposed */
bool kvmclock_create_always;
+
+ /* resizable acpi blob compat */
+ bool resizable_acpi_blob;
};
#define TYPE_PC_MACHINE "generic-pc-machine"
--
2.39.1

@ -0,0 +1,60 @@
From 7b57aec372fc238cbaafe86557f9fb4b560895b1 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Tue, 27 Jun 2023 20:20:09 +1000
Subject: [PATCH 2/6] hw/arm: Validate cluster and NUMA node boundary
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines
RH-Bugzilla: 2171363
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [2/3] fcac7ea85d9f73613989903c642fc1bf6c51946b
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363
There are two ARM machines where NUMA is aware: 'virt' and 'sbsa-ref'.
Both of them are required to follow cluster-NUMA-node boundary. To
enable the validation to warn about the irregular configuration where
multiple CPUs in one cluster have been associated with different NUMA
nodes.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Acked-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <20230509002739.18388-3-gshan@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit fecff672351ace5e39adf7dbcf7a8ee748b201cb)
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/sbsa-ref.c | 2 ++
hw/arm/virt.c | 2 ++
2 files changed, 4 insertions(+)
diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
index 0b93558dde..efb380e7c8 100644
--- a/hw/arm/sbsa-ref.c
+++ b/hw/arm/sbsa-ref.c
@@ -864,6 +864,8 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data)
mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids;
mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props;
mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id;
+ /* platform instead of architectural choice */
+ mc->cpu_cluster_has_numa_boundary = true;
}
static const TypeInfo sbsa_ref_info = {
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 9be53e9355..df6a0231bc 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3083,6 +3083,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
mc->smp_props.clusters_supported = true;
mc->auto_enable_numa_with_memhp = true;
mc->auto_enable_numa_with_memdev = true;
+ /* platform instead of architectural choice */
+ mc->cpu_cluster_has_numa_boundary = true;
mc->default_ram_id = "mach-virt.ram";
object_class_property_add(oc, "acpi", "OnOffAuto",
--
2.39.3

@ -0,0 +1,166 @@
From a3412036477e8c91e0b71fcd91de4e24a9904077 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Tue, 25 Jul 2023 10:56:51 +0100
Subject: [PATCH 09/14] hw/arm/smmu: Handle big-endian hosts correctly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes
RH-Bugzilla: 2229133
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Commit: [3/3] df9c8d228b25273e0c4927a10b21e66fb4bef5f0 (eauger1/centos-qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133
The implementation of the SMMUv3 has multiple places where it reads a
data structure from the guest and directly operates on it without
doing a guest-to-host endianness conversion. Since all SMMU data
structures are little-endian, this means that the SMMU doesn't work
on a big-endian host. In particular, this causes the Avocado test
machine_aarch64_virt.py:Aarch64VirtMachine.test_alpine_virt_tcg_gic_max
to fail on an s390x host.
Add appropriate byte-swapping on reads and writes of guest in-memory
data structures so that the device works correctly on big-endian
hosts.
As part of this we constrain queue_read() to operate only on Cmd
structs and queue_write() on Evt structs, because in practice these
are the only data structures the two functions are used with, and we
need to know what the data structure is to be able to byte-swap its
parts correctly.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Tested-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Message-id: 20230717132641.764660-1-peter.maydell@linaro.org
Cc: qemu-stable@nongnu.org
(cherry picked from commit c6445544d4cea2628fbad3bad09f3d3a03c749d3)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/arm/smmu-common.c | 3 +--
hw/arm/smmuv3.c | 39 +++++++++++++++++++++++++++++++--------
2 files changed, 32 insertions(+), 10 deletions(-)
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index e7f1c1f219..daa02ce798 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -192,8 +192,7 @@ static int get_pte(dma_addr_t baseaddr, uint32_t index, uint64_t *pte,
dma_addr_t addr = baseaddr + index * sizeof(*pte);
/* TODO: guarantee 64-bit single-copy atomicity */
- ret = dma_memory_read(&address_space_memory, addr, pte, sizeof(*pte),
- MEMTXATTRS_UNSPECIFIED);
+ ret = ldq_le_dma(&address_space_memory, addr, pte, MEMTXATTRS_UNSPECIFIED);
if (ret != MEMTX_OK) {
info->type = SMMU_PTW_ERR_WALK_EABT;
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 270c80b665..cfb56725a6 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -98,20 +98,34 @@ static void smmuv3_write_gerrorn(SMMUv3State *s, uint32_t new_gerrorn)
trace_smmuv3_write_gerrorn(toggled & pending, s->gerrorn);
}
-static inline MemTxResult queue_read(SMMUQueue *q, void *data)
+static inline MemTxResult queue_read(SMMUQueue *q, Cmd *cmd)
{
dma_addr_t addr = Q_CONS_ENTRY(q);
+ MemTxResult ret;
+ int i;
- return dma_memory_read(&address_space_memory, addr, data, q->entry_size,
- MEMTXATTRS_UNSPECIFIED);
+ ret = dma_memory_read(&address_space_memory, addr, cmd, sizeof(Cmd),
+ MEMTXATTRS_UNSPECIFIED);
+ if (ret != MEMTX_OK) {
+ return ret;
+ }
+ for (i = 0; i < ARRAY_SIZE(cmd->word); i++) {
+ le32_to_cpus(&cmd->word[i]);
+ }
+ return ret;
}
-static MemTxResult queue_write(SMMUQueue *q, void *data)
+static MemTxResult queue_write(SMMUQueue *q, Evt *evt_in)
{
dma_addr_t addr = Q_PROD_ENTRY(q);
MemTxResult ret;
+ Evt evt = *evt_in;
+ int i;
- ret = dma_memory_write(&address_space_memory, addr, data, q->entry_size,
+ for (i = 0; i < ARRAY_SIZE(evt.word); i++) {
+ cpu_to_le32s(&evt.word[i]);
+ }
+ ret = dma_memory_write(&address_space_memory, addr, &evt, sizeof(Evt),
MEMTXATTRS_UNSPECIFIED);
if (ret != MEMTX_OK) {
return ret;
@@ -291,7 +305,7 @@ static void smmuv3_init_regs(SMMUv3State *s)
static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf,
SMMUEventInfo *event)
{
- int ret;
+ int ret, i;
trace_smmuv3_get_ste(addr);
/* TODO: guarantee 64-bit single-copy atomicity */
@@ -304,6 +318,9 @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf,
event->u.f_ste_fetch.addr = addr;
return -EINVAL;
}
+ for (i = 0; i < ARRAY_SIZE(buf->word); i++) {
+ le32_to_cpus(&buf->word[i]);
+ }
return 0;
}
@@ -313,7 +330,7 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
CD *buf, SMMUEventInfo *event)
{
dma_addr_t addr = STE_CTXPTR(ste);
- int ret;
+ int ret, i;
trace_smmuv3_get_cd(addr);
/* TODO: guarantee 64-bit single-copy atomicity */
@@ -326,6 +343,9 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
event->u.f_ste_fetch.addr = addr;
return -EINVAL;
}
+ for (i = 0; i < ARRAY_SIZE(buf->word); i++) {
+ le32_to_cpus(&buf->word[i]);
+ }
return 0;
}
@@ -407,7 +427,7 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
return -EINVAL;
}
if (s->features & SMMU_FEATURE_2LVL_STE) {
- int l1_ste_offset, l2_ste_offset, max_l2_ste, span;
+ int l1_ste_offset, l2_ste_offset, max_l2_ste, span, i;
dma_addr_t l1ptr, l2ptr;
STEDesc l1std;
@@ -431,6 +451,9 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
event->u.f_ste_fetch.addr = l1ptr;
return -EINVAL;
}
+ for (i = 0; i < ARRAY_SIZE(l1std.word); i++) {
+ le32_to_cpus(&l1std.word[i]);
+ }
span = L1STD_SPAN(&l1std);
--
2.39.3

@ -1,169 +0,0 @@
From 4ab2aff624908e49b099f00609875f4d03e9e1ec Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 21 Dec 2022 08:48:45 +0800
Subject: [PATCH 6/8] hw/arm/virt: Add 'compact-highmem' property
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
RH-Bugzilla: 2113840
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [6/8] 781506f3445493f05b511547370b6d88ef092457
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
After the improvement to high memory region address assignment is
applied, the memory layout can be changed, introducing possible
migration breakage. For example, VIRT_HIGH_PCIE_MMIO memory region
is disabled or enabled when the optimization is applied or not, with
the following configuration. The configuration is only achievable by
modifying the source code until more properties are added to allow
users selectively disable those high memory regions.
pa_bits = 40;
vms->highmem_redists = false;
vms->highmem_ecam = false;
vms->highmem_mmio = true;
# qemu-system-aarch64 -accel kvm -cpu host \
-machine virt-7.2,compact-highmem={on, off} \
-m 4G,maxmem=511G -monitor stdio
Region compact-highmem=off compact-highmem=on
----------------------------------------------------------------
MEM [1GB 512GB] [1GB 512GB]
HIGH_GIC_REDISTS2 [512GB 512GB+64MB] [disabled]
HIGH_PCIE_ECAM [512GB+256MB 512GB+512MB] [disabled]
HIGH_PCIE_MMIO [disabled] [512GB 1TB]
In order to keep backwords compatibility, we need to disable the
optimization on machine, which is virt-7.1 or ealier than it. It
means the optimization is enabled by default from virt-7.2. Besides,
'compact-highmem' property is added so that the optimization can be
explicitly enabled or disabled on all machine types by users.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
Message-id: 20221029224307.138822-7-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit f40408a9fe5d1db70a75a33d2b26c8af8a5d57b0)
Signed-off-by: Gavin Shan <gshan@redhat.com>
Conflicts:
hw/arm/virt.c
Comment out the handlers of property 'compact-highmem' since
the property isn't exposed.
---
docs/system/arm/virt.rst | 4 ++++
hw/arm/virt.c | 34 ++++++++++++++++++++++++++++++++++
include/hw/arm/virt.h | 1 +
3 files changed, 39 insertions(+)
diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
index 20442ea2c1..4454706392 100644
--- a/docs/system/arm/virt.rst
+++ b/docs/system/arm/virt.rst
@@ -94,6 +94,10 @@ highmem
address space above 32 bits. The default is ``on`` for machine types
later than ``virt-2.12``.
+compact-highmem
+ Set ``on``/``off`` to enable/disable the compact layout for high memory regions.
+ The default is ``on`` for machine types later than ``virt-7.2``.
+
gic-version
Specify the version of the Generic Interrupt Controller (GIC) to provide.
Valid values are:
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 6896e0ca0f..6087511ae9 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -216,6 +216,12 @@ static const MemMapEntry base_memmap[] = {
* Note the extended_memmap is sized so that it eventually also includes the
* base_memmap entries (VIRT_HIGH_GIC_REDIST2 index is greater than the last
* index of base_memmap).
+ *
+ * The memory map for these Highmem IO Regions can be in legacy or compact
+ * layout, depending on 'compact-highmem' property. With legacy layout, the
+ * PA space for one specific region is always reserved, even if the region
+ * has been disabled or doesn't fit into the PA space. However, the PA space
+ * for the region won't be reserved in these circumstances with compact layout.
*/
static MemMapEntry extended_memmap[] = {
/* Additional 64 MB redist region (can contain up to 512 redistributors) */
@@ -2400,6 +2406,22 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp)
vms->highmem = value;
}
+#if 0 /* Disabled for Red Hat Enterprise Linux */
+static bool virt_get_compact_highmem(Object *obj, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+
+ return vms->highmem_compact;
+}
+
+static void virt_set_compact_highmem(Object *obj, bool value, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+
+ vms->highmem_compact = value;
+}
+#endif /* disabled for RHEL */
+
static bool virt_get_its(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -3023,6 +3045,13 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
"Set on/off to enable/disable using "
"physical address space above 32 bits");
+ object_class_property_add_bool(oc, "compact-highmem",
+ virt_get_compact_highmem,
+ virt_set_compact_highmem);
+ object_class_property_set_description(oc, "compact-highmem",
+ "Set on/off to enable/disable compact "
+ "layout for high memory regions");
+
object_class_property_add_str(oc, "gic-version", virt_get_gic_version,
virt_set_gic_version);
object_class_property_set_description(oc, "gic-version",
@@ -3107,6 +3136,7 @@ static void virt_instance_init(Object *obj)
/* High memory is enabled by default */
vms->highmem = true;
+ vms->highmem_compact = !vmc->no_highmem_compact;
vms->gic_version = VIRT_GIC_VERSION_NOSEL;
vms->highmem_ecam = !vmc->no_highmem_ecam;
@@ -3176,8 +3206,12 @@ DEFINE_VIRT_MACHINE_AS_LATEST(7, 2)
static void virt_machine_7_1_options(MachineClass *mc)
{
+ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
+
virt_machine_7_2_options(mc);
compat_props_add(mc->compat_props, hw_compat_7_1, hw_compat_7_1_len);
+ /* Compact layout for high memory regions was introduced with 7.2 */
+ vmc->no_highmem_compact = true;
}
DEFINE_VIRT_MACHINE(7, 1)
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 15bd291311..85e7d61868 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -125,6 +125,7 @@ struct VirtMachineClass {
bool no_pmu;
bool claim_edge_triggered_timers;
bool smbios_old_sys_ver;
+ bool no_highmem_compact;
bool no_highmem_ecam;
bool no_ged; /* Machines < 4.2 have no support for ACPI GED device */
bool kvm_no_adjvtime;
--
2.31.1

@ -1,179 +0,0 @@
From 30e86a7c4fbcdc95b74bcb2a15745cb221783091 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 21 Dec 2022 08:48:45 +0800
Subject: [PATCH 7/8] hw/arm/virt: Add properties to disable high memory
regions
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
RH-Bugzilla: 2113840
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [7/8] 16f8762393b447a590b31c9e4d8d3c58c6bc9fa8
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
The 3 high memory regions are usually enabled by default, but they may
be not used. For example, VIRT_HIGH_GIC_REDIST2 isn't needed by GICv2.
This leads to waste in the PA space.
Add properties ("highmem-redists", "highmem-ecam", "highmem-mmio") to
allow users selectively disable them if needed. After that, the high
memory region for GICv3 or GICv4 redistributor can be disabled by user,
the number of maximal supported CPUs needs to be calculated based on
'vms->highmem_redists'. The follow-up error message is also improved
to indicate if the high memory region for GICv3 and GICv4 has been
enabled or not.
Suggested-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Message-id: 20221029224307.138822-8-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit 6a48c64eec355ab1aff694eb4522d07a8e461368)
Signed-off-by: Gavin Shan <gshan@redhat.com>
Conflicts:
hw/arm/virt.c
Comment out the handlers of the property 'highmem-redists',
'highmem-ecam' and 'highmem-mmio' since they aren't exposed.
---
docs/system/arm/virt.rst | 13 +++++++
hw/arm/virt.c | 75 ++++++++++++++++++++++++++++++++++++++--
2 files changed, 86 insertions(+), 2 deletions(-)
diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
index 4454706392..188a4f211f 100644
--- a/docs/system/arm/virt.rst
+++ b/docs/system/arm/virt.rst
@@ -98,6 +98,19 @@ compact-highmem
Set ``on``/``off`` to enable/disable the compact layout for high memory regions.
The default is ``on`` for machine types later than ``virt-7.2``.
+highmem-redists
+ Set ``on``/``off`` to enable/disable the high memory region for GICv3 or
+ GICv4 redistributor. The default is ``on``. Setting this to ``off`` will
+ limit the maximum number of CPUs when GICv3 or GICv4 is used.
+
+highmem-ecam
+ Set ``on``/``off`` to enable/disable the high memory region for PCI ECAM.
+ The default is ``on`` for machine types later than ``virt-3.0``.
+
+highmem-mmio
+ Set ``on``/``off`` to enable/disable the high memory region for PCI MMIO.
+ The default is ``on``.
+
gic-version
Specify the version of the Generic Interrupt Controller (GIC) to provide.
Valid values are:
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 6087511ae9..304fa0d6e7 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2142,14 +2142,20 @@ static void machvirt_init(MachineState *machine)
if (vms->gic_version == VIRT_GIC_VERSION_2) {
virt_max_cpus = GIC_NCPU;
} else {
- virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST) +
- virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2);
+ virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST);
+ if (vms->highmem_redists) {
+ virt_max_cpus += virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2);
+ }
}
if (max_cpus > virt_max_cpus) {
error_report("Number of SMP CPUs requested (%d) exceeds max CPUs "
"supported by machine 'mach-virt' (%d)",
max_cpus, virt_max_cpus);
+ if (vms->gic_version != VIRT_GIC_VERSION_2 && !vms->highmem_redists) {
+ error_printf("Try 'highmem-redists=on' for more CPUs\n");
+ }
+
exit(1);
}
@@ -2420,6 +2426,49 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp)
vms->highmem_compact = value;
}
+
+static bool virt_get_highmem_redists(Object *obj, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+
+ return vms->highmem_redists;
+}
+
+static void virt_set_highmem_redists(Object *obj, bool value, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+
+ vms->highmem_redists = value;
+}
+
+static bool virt_get_highmem_ecam(Object *obj, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+
+ return vms->highmem_ecam;
+}
+
+static void virt_set_highmem_ecam(Object *obj, bool value, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+
+ vms->highmem_ecam = value;
+}
+
+static bool virt_get_highmem_mmio(Object *obj, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+
+ return vms->highmem_mmio;
+}
+
+static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+
+ vms->highmem_mmio = value;
+}
+
#endif /* disabled for RHEL */
static bool virt_get_its(Object *obj, Error **errp)
@@ -3052,6 +3101,28 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
"Set on/off to enable/disable compact "
"layout for high memory regions");
+ object_class_property_add_bool(oc, "highmem-redists",
+ virt_get_highmem_redists,
+ virt_set_highmem_redists);
+ object_class_property_set_description(oc, "highmem-redists",
+ "Set on/off to enable/disable high "
+ "memory region for GICv3 or GICv4 "
+ "redistributor");
+
+ object_class_property_add_bool(oc, "highmem-ecam",
+ virt_get_highmem_ecam,
+ virt_set_highmem_ecam);
+ object_class_property_set_description(oc, "highmem-ecam",
+ "Set on/off to enable/disable high "
+ "memory region for PCI ECAM");
+
+ object_class_property_add_bool(oc, "highmem-mmio",
+ virt_get_highmem_mmio,
+ virt_set_highmem_mmio);
+ object_class_property_set_description(oc, "highmem-mmio",
+ "Set on/off to enable/disable high "
+ "memory region for PCI MMIO");
+
object_class_property_add_str(oc, "gic-version", virt_get_gic_version,
virt_set_gic_version);
object_class_property_set_description(oc, "gic-version",
--
2.31.1

@ -1,51 +0,0 @@
From 969ea1ff46b52c5fe6d87f2eeb1625871a2dfb2a Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 21 Dec 2022 08:48:45 +0800
Subject: [PATCH 8/8] hw/arm/virt: Enable compat high memory region address
assignment for 9.2.0 machine
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
RH-Bugzilla: 2113840
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [8/8] beda1791c0c35dce5c669efd47685302b8468032
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
Upstream: RHEL only
The compact high memory region address assignment is enabled for 9.2.0,
but it's kept as disabled for 9.0.0, to keep the backwards compatibility
on 9.0.0. Note that these newly added properties ('compact-highmem',
'highmem-redists', 'highmem-ecam', and 'highmem-mmio') in the upstream
aren't exposed for the downstream.
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/virt.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 304fa0d6e7..e41c0b462c 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3581,6 +3581,7 @@ static void rhel_virt_instance_init(Object *obj)
/* High memory is enabled by default */
vms->highmem = true;
+ vms->highmem_compact = !vmc->no_highmem_compact;
vms->gic_version = VIRT_GIC_VERSION_NOSEL;
vms->highmem_ecam = !vmc->no_highmem_ecam;
@@ -3659,5 +3660,7 @@ static void rhel900_virt_options(MachineClass *mc)
/* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */
vmc->no_tcg_lpa2 = true;
+ /* Compact layout for high memory regions was introduced with 9.2.0 */
+ vmc->no_highmem_compact = true;
}
DEFINE_RHEL_MACHINE(9, 0, 0)
--
2.31.1

@ -1,112 +0,0 @@
From 1c7fad3776a14ca35b24dc2fdb262d4ddf40d6eb Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 21 Dec 2022 08:48:45 +0800
Subject: [PATCH 5/8] hw/arm/virt: Improve high memory region address
assignment
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
RH-Bugzilla: 2113840
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [5/8] 4d77fa78b5258a1bd8d30405cec5ba3311d42f92
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
There are three high memory regions, which are VIRT_HIGH_REDIST2,
VIRT_HIGH_PCIE_ECAM and VIRT_HIGH_PCIE_MMIO. Their base addresses
are floating on highest RAM address. However, they can be disabled
in several cases.
(1) One specific high memory region is likely to be disabled by
code by toggling vms->highmem_{redists, ecam, mmio}.
(2) VIRT_HIGH_PCIE_ECAM region is disabled on machine, which is
'virt-2.12' or ealier than it.
(3) VIRT_HIGH_PCIE_ECAM region is disabled when firmware is loaded
on 32-bits system.
(4) One specific high memory region is disabled when it breaks the
PA space limit.
The current implementation of virt_set_{memmap, high_memmap}() isn't
optimized because the high memory region's PA space is always reserved,
regardless of whatever the actual state in the corresponding
vms->highmem_{redists, ecam, mmio} flag. In the code, 'base' and
'vms->highest_gpa' are always increased for case (1), (2) and (3).
It's unnecessary since the assigned PA space for the disabled high
memory region won't be used afterwards.
Improve the address assignment for those three high memory region by
skipping the address assignment for one specific high memory region if
it has been disabled in case (1), (2) and (3). The memory layout may
be changed after the improvement is applied, which leads to potential
migration breakage. So 'vms->highmem_compact' is added to control if
the improvement should be applied. For now, 'vms->highmem_compact' is
set to false, meaning that we don't have memory layout change until it
becomes configurable through property 'compact-highmem' in next patch.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
Message-id: 20221029224307.138822-6-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit 4a4ff9edc6a8fdc76082af5b41b059217138c09b)
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/virt.c | 15 ++++++++++-----
include/hw/arm/virt.h | 1 +
2 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 6e3b9fc060..6896e0ca0f 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1768,18 +1768,23 @@ static void virt_set_high_memmap(VirtMachineState *vms,
vms->memmap[i].size = region_size;
/*
- * Check each device to see if they fit in the PA space,
- * moving highest_gpa as we go.
+ * Check each device to see if it fits in the PA space,
+ * moving highest_gpa as we go. For compatibility, move
+ * highest_gpa for disabled fitting devices as well, if
+ * the compact layout has been disabled.
*
* For each device that doesn't fit, disable it.
*/
fits = (region_base + region_size) <= BIT_ULL(pa_bits);
- if (fits) {
- vms->highest_gpa = region_base + region_size - 1;
+ *region_enabled &= fits;
+ if (vms->highmem_compact && !*region_enabled) {
+ continue;
}
- *region_enabled &= fits;
base = region_base + region_size;
+ if (fits) {
+ vms->highest_gpa = base - 1;
+ }
}
}
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 22b54ec510..15bd291311 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -144,6 +144,7 @@ struct VirtMachineState {
PFlashCFI01 *flash[2];
bool secure;
bool highmem;
+ bool highmem_compact;
bool highmem_ecam;
bool highmem_mmio;
bool highmem_redists;
--
2.31.1

@ -1,82 +0,0 @@
From 305a369fd18f29914bf96cc181add532d435d8ed Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 21 Dec 2022 08:48:45 +0800
Subject: [PATCH 3/8] hw/arm/virt: Introduce variable region_base in
virt_set_high_memmap()
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
RH-Bugzilla: 2113840
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [3/8] 15de90df217d680ccc858b679898b3993e1c050a
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
This introduces variable 'region_base' for the base address of the
specific high memory region. It's the preparatory work to optimize
high memory region address assignment.
No functional change intended.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
Message-id: 20221029224307.138822-4-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit fa245799b9407fc7b561da185b3d889df5e16a88)
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/virt.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index ca098d40b8..ddcf7ee2f8 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1739,15 +1739,15 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
static void virt_set_high_memmap(VirtMachineState *vms,
hwaddr base, int pa_bits)
{
- hwaddr region_size;
+ hwaddr region_base, region_size;
bool fits;
int i;
for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
+ region_base = ROUND_UP(base, extended_memmap[i].size);
region_size = extended_memmap[i].size;
- base = ROUND_UP(base, region_size);
- vms->memmap[i].base = base;
+ vms->memmap[i].base = region_base;
vms->memmap[i].size = region_size;
/*
@@ -1756,9 +1756,9 @@ static void virt_set_high_memmap(VirtMachineState *vms,
*
* For each device that doesn't fit, disable it.
*/
- fits = (base + region_size) <= BIT_ULL(pa_bits);
+ fits = (region_base + region_size) <= BIT_ULL(pa_bits);
if (fits) {
- vms->highest_gpa = base + region_size - 1;
+ vms->highest_gpa = region_base + region_size - 1;
}
switch (i) {
@@ -1773,7 +1773,7 @@ static void virt_set_high_memmap(VirtMachineState *vms,
break;
}
- base += region_size;
+ base = region_base + region_size;
}
}
--
2.31.1

@ -1,95 +0,0 @@
From a2ddd68c8365ec602db6b2a9cf83bb441ca701cc Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 21 Dec 2022 08:48:45 +0800
Subject: [PATCH 4/8] hw/arm/virt: Introduce virt_get_high_memmap_enabled()
helper
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
RH-Bugzilla: 2113840
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [4/8] 65524de2fc106600bbaff641caa8c4f2f8027114
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
This introduces virt_get_high_memmap_enabled() helper, which returns
the pointer to vms->highmem_{redists, ecam, mmio}. The pointer will
be used in the subsequent patches.
No functional change intended.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
Message-id: 20221029224307.138822-5-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit a5cb1350b19a5c2a58ab4edddf609ed429c13085)
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/virt.c | 32 +++++++++++++++++++-------------
1 file changed, 19 insertions(+), 13 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index ddcf7ee2f8..6e3b9fc060 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1736,14 +1736,31 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
return arm_cpu_mp_affinity(idx, clustersz);
}
+static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms,
+ int index)
+{
+ bool *enabled_array[] = {
+ &vms->highmem_redists,
+ &vms->highmem_ecam,
+ &vms->highmem_mmio,
+ };
+
+ assert(ARRAY_SIZE(extended_memmap) - VIRT_LOWMEMMAP_LAST ==
+ ARRAY_SIZE(enabled_array));
+ assert(index - VIRT_LOWMEMMAP_LAST < ARRAY_SIZE(enabled_array));
+
+ return enabled_array[index - VIRT_LOWMEMMAP_LAST];
+}
+
static void virt_set_high_memmap(VirtMachineState *vms,
hwaddr base, int pa_bits)
{
hwaddr region_base, region_size;
- bool fits;
+ bool *region_enabled, fits;
int i;
for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
+ region_enabled = virt_get_high_memmap_enabled(vms, i);
region_base = ROUND_UP(base, extended_memmap[i].size);
region_size = extended_memmap[i].size;
@@ -1761,18 +1778,7 @@ static void virt_set_high_memmap(VirtMachineState *vms,
vms->highest_gpa = region_base + region_size - 1;
}
- switch (i) {
- case VIRT_HIGH_GIC_REDIST2:
- vms->highmem_redists &= fits;
- break;
- case VIRT_HIGH_PCIE_ECAM:
- vms->highmem_ecam &= fits;
- break;
- case VIRT_HIGH_PCIE_MMIO:
- vms->highmem_mmio &= fits;
- break;
- }
-
+ *region_enabled &= fits;
base = region_base + region_size;
}
}
--
2.31.1

@ -1,130 +0,0 @@
From 5dff87c5ea60054709021025c9513ec259433ce2 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 21 Dec 2022 08:48:45 +0800
Subject: [PATCH 1/8] hw/arm/virt: Introduce virt_set_high_memmap() helper
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
RH-Bugzilla: 2113840
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [1/8] 5f6ba5af7a2c21d8473c58e088ee99b11336c673
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
This introduces virt_set_high_memmap() helper. The logic of high
memory region address assignment is moved to the helper. The intention
is to make the subsequent optimization for high memory region address
assignment easier.
No functional change intended.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
Message-id: 20221029224307.138822-2-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit 4af6b6edece5ef273d29972d53547f823d2bc1c0)
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/virt.c | 74 ++++++++++++++++++++++++++++-----------------------
1 file changed, 41 insertions(+), 33 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index bf18838b87..bea5f54720 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1736,6 +1736,46 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
return arm_cpu_mp_affinity(idx, clustersz);
}
+static void virt_set_high_memmap(VirtMachineState *vms,
+ hwaddr base, int pa_bits)
+{
+ int i;
+
+ for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
+ hwaddr size = extended_memmap[i].size;
+ bool fits;
+
+ base = ROUND_UP(base, size);
+ vms->memmap[i].base = base;
+ vms->memmap[i].size = size;
+
+ /*
+ * Check each device to see if they fit in the PA space,
+ * moving highest_gpa as we go.
+ *
+ * For each device that doesn't fit, disable it.
+ */
+ fits = (base + size) <= BIT_ULL(pa_bits);
+ if (fits) {
+ vms->highest_gpa = base + size - 1;
+ }
+
+ switch (i) {
+ case VIRT_HIGH_GIC_REDIST2:
+ vms->highmem_redists &= fits;
+ break;
+ case VIRT_HIGH_PCIE_ECAM:
+ vms->highmem_ecam &= fits;
+ break;
+ case VIRT_HIGH_PCIE_MMIO:
+ vms->highmem_mmio &= fits;
+ break;
+ }
+
+ base += size;
+ }
+}
+
static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
{
MachineState *ms = MACHINE(vms);
@@ -1791,39 +1831,7 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
/* We know for sure that at least the memory fits in the PA space */
vms->highest_gpa = memtop - 1;
- for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
- hwaddr size = extended_memmap[i].size;
- bool fits;
-
- base = ROUND_UP(base, size);
- vms->memmap[i].base = base;
- vms->memmap[i].size = size;
-
- /*
- * Check each device to see if they fit in the PA space,
- * moving highest_gpa as we go.
- *
- * For each device that doesn't fit, disable it.
- */
- fits = (base + size) <= BIT_ULL(pa_bits);
- if (fits) {
- vms->highest_gpa = base + size - 1;
- }
-
- switch (i) {
- case VIRT_HIGH_GIC_REDIST2:
- vms->highmem_redists &= fits;
- break;
- case VIRT_HIGH_PCIE_ECAM:
- vms->highmem_ecam &= fits;
- break;
- case VIRT_HIGH_PCIE_MMIO:
- vms->highmem_mmio &= fits;
- break;
- }
-
- base += size;
- }
+ virt_set_high_memmap(vms, base, pa_bits);
if (device_memory_size > 0) {
ms->device_memory = g_malloc0(sizeof(*ms->device_memory));
--
2.31.1

@ -1,83 +0,0 @@
From bd5b7edbf8f4425f4b4e0d49a00cbdd48d9c6f48 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 21 Dec 2022 08:48:45 +0800
Subject: [PATCH 2/8] hw/arm/virt: Rename variable size to region_size in
virt_set_high_memmap()
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
RH-Bugzilla: 2113840
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [2/8] 1cadf1b00686cceb45821a58fdcb509bc5da335d
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
This renames variable 'size' to 'region_size' in virt_set_high_memmap().
Its counterpart ('region_base') will be introduced in next patch.
No functional change intended.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
Message-id: 20221029224307.138822-3-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit 370bea9d1c78796eec235ed6cb4310f489931a62)
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/virt.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index bea5f54720..ca098d40b8 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1739,15 +1739,16 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
static void virt_set_high_memmap(VirtMachineState *vms,
hwaddr base, int pa_bits)
{
+ hwaddr region_size;
+ bool fits;
int i;
for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
- hwaddr size = extended_memmap[i].size;
- bool fits;
+ region_size = extended_memmap[i].size;
- base = ROUND_UP(base, size);
+ base = ROUND_UP(base, region_size);
vms->memmap[i].base = base;
- vms->memmap[i].size = size;
+ vms->memmap[i].size = region_size;
/*
* Check each device to see if they fit in the PA space,
@@ -1755,9 +1756,9 @@ static void virt_set_high_memmap(VirtMachineState *vms,
*
* For each device that doesn't fit, disable it.
*/
- fits = (base + size) <= BIT_ULL(pa_bits);
+ fits = (base + region_size) <= BIT_ULL(pa_bits);
if (fits) {
- vms->highest_gpa = base + size - 1;
+ vms->highest_gpa = base + region_size - 1;
}
switch (i) {
@@ -1772,7 +1773,7 @@ static void virt_set_high_memmap(VirtMachineState *vms,
break;
}
- base += size;
+ base += region_size;
}
}
--
2.31.1

@ -0,0 +1,41 @@
From 022529f6d0ee306da857825c72a98bf7ddf5de22 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Tue, 27 Jun 2023 20:20:09 +1000
Subject: [PATCH 3/6] hw/arm/virt: Validate cluster and NUMA node boundary for
RHEL machines
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines
RH-Bugzilla: 2171363
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [3/3] a396c499259b566861ca007b01f8539bf6113711
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363
Upstream Status: RHEL only
Set mc->cpu_cluster_has_numa_boundary to true so that the boundary of
CPU cluster and NUMA node will be validated for 'virt-rhel*' machines.
A warning message will be printed if the boundary is broken.
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/virt.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index df6a0231bc..faf68488d5 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3530,6 +3530,8 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data)
mc->smp_props.clusters_supported = true;
mc->auto_enable_numa_with_memhp = true;
mc->auto_enable_numa_with_memdev = true;
+ /* platform instead of architectural choice */
+ mc->cpu_cluster_has_numa_boundary = true;
mc->default_ram_id = "mach-virt.ram";
object_class_property_add(oc, "acpi", "OnOffAuto",
--
2.39.3

@ -0,0 +1,44 @@
From 491cf9e251026d135f315b7fe0d8771841f06e9f Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Tue, 25 Jul 2023 15:34:45 -0300
Subject: [PATCH 8/9] hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type
<= pc-q35-rhel9.2.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 192: hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type <= pc-q35-rhel9.2.0
RH-Bugzilla: 2223691
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: quintela1 <quintela@redhat.com>
RH-Commit: [1/1] e57816f8ad15a9ce5f342b061c103ae011ec1223 (LeoBras/centos-qemu-kvm)
This is a downstream-only patch to that sets off the property
x-pcie-err-unc-mask for machine types <= pc-q35-rhel9.2.0, allowing
live migrations to RHEL9.2 happen successfully.
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2223691
Fixes: 293a34b4be ("hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine
type < 8.0")
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
hw/core/machine.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 5ea52317b9..6f5117669d 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -62,6 +62,8 @@ GlobalProperty hw_compat_rhel_9_2[] = {
{ "virtio-mem", "x-early-migration", "false" },
/* hw_compat_rhel_9_2 from hw_compat_7_2 */
{ "migration", "x-preempt-pre-7-2", "true" },
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
+ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" },
};
const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2);
--
2.39.3

@ -0,0 +1,118 @@
From 3ac01bb90da12538898f95b2fb4e7f6bc1557eb3 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Tue, 2 May 2023 21:27:02 -0300
Subject: [PATCH 18/21] hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine
type < 8.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 170: hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0
RH-Bugzilla: 2189423
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [1/1] ad62dd5a8567f386770577513c00a0bf36bd3df1 (LeoBras/centos-qemu-kvm)
Since it's implementation on v8.0.0-rc0, having the PCI_ERR_UNCOR_MASK
set for machine types < 8.0 will cause migration to fail if the target
QEMU version is < 8.0.0 :
qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10a read: 40 device: 0 cmask: ff wmask: 0 w1cmask:0
qemu-system-x86_64: Failed to load PCIDevice:config
qemu-system-x86_64: Failed to load e1000e:parent_obj
qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:02.0/e1000e'
qemu-system-x86_64: load of migration failed: Invalid argument
The above test migrated a 7.2 machine type from QEMU master to QEMU 7.2.0,
with this cmdline:
./qemu-system-x86_64 -M pc-q35-7.2 [-incoming XXX]
In order to fix this, property x-pcie-err-unc-mask was introduced to
control when PCI_ERR_UNCOR_MASK is enabled. This property is enabled by
default, but is disabled if machine type <= 7.2.
Fixes: 010746ae1d ("hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register")
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Message-Id: <20230503002701.854329-1-leobras@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1576
Tested-by: Fiona Ebner <f.ebner@proxmox.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 5ed3dabe57dd9f4c007404345e5f5bf0e347317f)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
hw/core/machine.c | 1 +
hw/pci/pci.c | 2 ++
hw/pci/pcie_aer.c | 11 +++++++----
include/hw/pci/pci.h | 2 ++
4 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 0e0120b7f2..c28702b690 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -43,6 +43,7 @@ GlobalProperty hw_compat_7_2[] = {
{ "e1000e", "migrate-timadj", "off" },
{ "virtio-mem", "x-early-migration", "false" },
{ "migration", "x-preempt-pre-7-2", "true" },
+ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" },
};
const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index def5000e7b..8ad4349e96 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -79,6 +79,8 @@ static Property pci_props[] = {
DEFINE_PROP_STRING("failover_pair_id", PCIDevice,
failover_pair_id),
DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0),
+ DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
+ QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
DEFINE_PROP_END_OF_LIST()
};
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index 103667c368..374d593ead 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -112,10 +112,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset,
pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
PCI_ERR_UNC_SUPPORTED);
- pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK,
- PCI_ERR_UNC_MASK_DEFAULT);
- pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
- PCI_ERR_UNC_SUPPORTED);
+
+ if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) {
+ pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK,
+ PCI_ERR_UNC_MASK_DEFAULT);
+ pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
+ PCI_ERR_UNC_SUPPORTED);
+ }
pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
PCI_ERR_UNC_SEVERITY_DEFAULT);
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index d5a40cd058..6dc6742fc4 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -207,6 +207,8 @@ enum {
QEMU_PCIE_EXTCAP_INIT = (1 << QEMU_PCIE_EXTCAP_INIT_BITNR),
#define QEMU_PCIE_CXL_BITNR 10
QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR),
+#define QEMU_PCIE_ERR_UNC_MASK_BITNR 11
+ QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
};
typedef struct PCIINTxRoute {
--
2.39.3

@ -0,0 +1,470 @@
From d1b7a9b25c0df9016cd8e93d40837314b1a81d70 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 08/21] hw: replace most qemu_bh_new calls with
qemu_bh_new_guarded
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
RH-Jira: RHEL-516
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [4/13] bcbc67dd0023aee2b3a342665237daa83b183c7b (jmaloy/jmaloy-qemu-kvm-2)
Jira: https://issues.redhat.com/browse/RHEL-516
Upstream: Merged
CVE: CVE-2023-2680
commit f63192b0544af5d3e4d5edfd85ab520fcf671377
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Thu Apr 27 17:10:09 2023 -0400
hw: replace most qemu_bh_new calls with qemu_bh_new_guarded
This protects devices from bh->mmio reentrancy issues.
Thanks: Thomas Huth <thuth@redhat.com> for diagnosing OS X test failure.
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Paul Durrant <paul@xen.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <20230427211013.2994127-5-alxndr@bu.edu>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/9pfs/xen-9p-backend.c | 5 ++++-
hw/block/dataplane/virtio-blk.c | 3 ++-
hw/block/dataplane/xen-block.c | 5 +++--
hw/char/virtio-serial-bus.c | 3 ++-
hw/display/qxl.c | 9 ++++++---
hw/display/virtio-gpu.c | 6 ++++--
hw/ide/ahci.c | 3 ++-
hw/ide/ahci_internal.h | 1 +
hw/ide/core.c | 4 +++-
hw/misc/imx_rngc.c | 6 ++++--
hw/misc/macio/mac_dbdma.c | 2 +-
hw/net/virtio-net.c | 3 ++-
hw/nvme/ctrl.c | 6 ++++--
hw/scsi/mptsas.c | 3 ++-
hw/scsi/scsi-bus.c | 3 ++-
hw/scsi/vmw_pvscsi.c | 3 ++-
hw/usb/dev-uas.c | 3 ++-
hw/usb/hcd-dwc2.c | 3 ++-
hw/usb/hcd-ehci.c | 3 ++-
hw/usb/hcd-uhci.c | 2 +-
hw/usb/host-libusb.c | 6 ++++--
hw/usb/redirect.c | 6 ++++--
hw/usb/xen-usb.c | 3 ++-
hw/virtio/virtio-balloon.c | 5 +++--
hw/virtio/virtio-crypto.c | 3 ++-
25 files changed, 66 insertions(+), 33 deletions(-)
diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index 74f3a05f88..0e266c552b 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -61,6 +61,7 @@ typedef struct Xen9pfsDev {
int num_rings;
Xen9pfsRing *rings;
+ MemReentrancyGuard mem_reentrancy_guard;
} Xen9pfsDev;
static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev);
@@ -443,7 +444,9 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data +
XEN_FLEX_RING_SIZE(ring_order);
- xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]);
+ xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh,
+ &xen_9pdev->rings[i],
+ &xen_9pdev->mem_reentrancy_guard);
xen_9pdev->rings[i].out_cons = 0;
xen_9pdev->rings[i].out_size = 0;
xen_9pdev->rings[i].inprogress = false;
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index b28d81737e..a6202997ee 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -127,7 +127,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
} else {
s->ctx = qemu_get_aio_context();
}
- s->bh = aio_bh_new(s->ctx, notify_guest_bh, s);
+ s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s,
+ &DEVICE(vdev)->mem_reentrancy_guard);
s->batch_notify_vqs = bitmap_new(conf->num_queues);
*dataplane = s;
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 734da42ea7..d8bc39d359 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -633,8 +633,9 @@ XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev,
} else {
dataplane->ctx = qemu_get_aio_context();
}
- dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh,
- dataplane);
+ dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh,
+ dataplane,
+ &DEVICE(xendev)->mem_reentrancy_guard);
return dataplane;
}
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
index 7d4601cb5d..dd619f0731 100644
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -985,7 +985,8 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp)
return;
}
- port->bh = qemu_bh_new(flush_queued_data_bh, port);
+ port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port,
+ &dev->mem_reentrancy_guard);
port->elem = NULL;
}
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index 80ce1e9a93..f1c0eb7dfc 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -2201,11 +2201,14 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp)
qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl);
- qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl);
+ qxl->update_irq = qemu_bh_new_guarded(qxl_update_irq_bh, qxl,
+ &DEVICE(qxl)->mem_reentrancy_guard);
qxl_reset_state(qxl);
- qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl);
- qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd);
+ qxl->update_area_bh = qemu_bh_new_guarded(qxl_render_update_area_bh, qxl,
+ &DEVICE(qxl)->mem_reentrancy_guard);
+ qxl->ssd.cursor_bh = qemu_bh_new_guarded(qemu_spice_cursor_refresh_bh, &qxl->ssd,
+ &DEVICE(qxl)->mem_reentrancy_guard);
}
static void qxl_realize_primary(PCIDevice *dev, Error **errp)
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 5e15c79b94..66ac9b6cc5 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -1339,8 +1339,10 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
g->ctrl_vq = virtio_get_queue(vdev, 0);
g->cursor_vq = virtio_get_queue(vdev, 1);
- g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g);
- g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g);
+ g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g,
+ &qdev->mem_reentrancy_guard);
+ g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g,
+ &qdev->mem_reentrancy_guard);
QTAILQ_INIT(&g->reslist);
QTAILQ_INIT(&g->cmdq);
QTAILQ_INIT(&g->fenceq);
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 55902e1df7..4e76d6b191 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -1509,7 +1509,8 @@ static void ahci_cmd_done(const IDEDMA *dma)
ahci_write_fis_d2h(ad);
if (ad->port_regs.cmd_issue && !ad->check_bh) {
- ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad);
+ ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad,
+ &ad->mem_reentrancy_guard);
qemu_bh_schedule(ad->check_bh);
}
}
diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h
index 303fcd7235..2480455372 100644
--- a/hw/ide/ahci_internal.h
+++ b/hw/ide/ahci_internal.h
@@ -321,6 +321,7 @@ struct AHCIDevice {
bool init_d2h_sent;
AHCICmdHdr *cur_cmd;
NCQTransferState ncq_tfs[AHCI_MAX_CMDS];
+ MemReentrancyGuard mem_reentrancy_guard;
};
struct AHCIPCIState {
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 45d14a25e9..de48ff9f86 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -513,6 +513,7 @@ BlockAIOCB *ide_issue_trim(
BlockCompletionFunc *cb, void *cb_opaque, void *opaque)
{
IDEState *s = opaque;
+ IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master;
TrimAIOCB *iocb;
/* Paired with a decrement in ide_trim_bh_cb() */
@@ -520,7 +521,8 @@ BlockAIOCB *ide_issue_trim(
iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque);
iocb->s = s;
- iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
+ iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb,
+ &DEVICE(dev)->mem_reentrancy_guard);
iocb->ret = 0;
iocb->qiov = qiov;
iocb->i = -1;
diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c
index 632c03779c..082c6980ad 100644
--- a/hw/misc/imx_rngc.c
+++ b/hw/misc/imx_rngc.c
@@ -228,8 +228,10 @@ static void imx_rngc_realize(DeviceState *dev, Error **errp)
sysbus_init_mmio(sbd, &s->iomem);
sysbus_init_irq(sbd, &s->irq);
- s->self_test_bh = qemu_bh_new(imx_rngc_self_test, s);
- s->seed_bh = qemu_bh_new(imx_rngc_seed, s);
+ s->self_test_bh = qemu_bh_new_guarded(imx_rngc_self_test, s,
+ &dev->mem_reentrancy_guard);
+ s->seed_bh = qemu_bh_new_guarded(imx_rngc_seed, s,
+ &dev->mem_reentrancy_guard);
}
static void imx_rngc_reset(DeviceState *dev)
diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c
index 43bb1f56ba..80a789f32b 100644
--- a/hw/misc/macio/mac_dbdma.c
+++ b/hw/misc/macio/mac_dbdma.c
@@ -914,7 +914,7 @@ static void mac_dbdma_realize(DeviceState *dev, Error **errp)
{
DBDMAState *s = MAC_DBDMA(dev);
- s->bh = qemu_bh_new(DBDMA_run_bh, s);
+ s->bh = qemu_bh_new_guarded(DBDMA_run_bh, s, &dev->mem_reentrancy_guard);
}
static void mac_dbdma_class_init(ObjectClass *oc, void *data)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 53e1c32643..447f669921 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -2917,7 +2917,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index)
n->vqs[index].tx_vq =
virtio_add_queue(vdev, n->net_conf.tx_queue_size,
virtio_net_handle_tx_bh);
- n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
+ n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
+ &DEVICE(vdev)->mem_reentrancy_guard);
}
n->vqs[index].tx_waiting = 0;
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index ac24eeb5ed..e5a468975e 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -4607,7 +4607,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry);
}
- sq->bh = qemu_bh_new(nvme_process_sq, sq);
+ sq->bh = qemu_bh_new_guarded(nvme_process_sq, sq,
+ &DEVICE(sq->ctrl)->mem_reentrancy_guard);
if (n->dbbuf_enabled) {
sq->db_addr = n->dbbuf_dbs + (sqid << 3);
@@ -5253,7 +5254,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
}
}
n->cq[cqid] = cq;
- cq->bh = qemu_bh_new(nvme_post_cqes, cq);
+ cq->bh = qemu_bh_new_guarded(nvme_post_cqes, cq,
+ &DEVICE(cq->ctrl)->mem_reentrancy_guard);
}
static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c
index c485da792c..3de288b454 100644
--- a/hw/scsi/mptsas.c
+++ b/hw/scsi/mptsas.c
@@ -1322,7 +1322,8 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp)
}
s->max_devices = MPTSAS_NUM_PORTS;
- s->request_bh = qemu_bh_new(mptsas_fetch_requests, s);
+ s->request_bh = qemu_bh_new_guarded(mptsas_fetch_requests, s,
+ &DEVICE(dev)->mem_reentrancy_guard);
scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info);
}
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index c97176110c..3c20b47ad0 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -193,7 +193,8 @@ static void scsi_dma_restart_cb(void *opaque, bool running, RunState state)
AioContext *ctx = blk_get_aio_context(s->conf.blk);
/* The reference is dropped in scsi_dma_restart_bh.*/
object_ref(OBJECT(s));
- s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s);
+ s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s,
+ &DEVICE(s)->mem_reentrancy_guard);
qemu_bh_schedule(s->bh);
}
}
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
index fa76696855..4de34536e9 100644
--- a/hw/scsi/vmw_pvscsi.c
+++ b/hw/scsi/vmw_pvscsi.c
@@ -1184,7 +1184,8 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp)
pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET);
}
- s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s);
+ s->completion_worker = qemu_bh_new_guarded(pvscsi_process_completion_queue, s,
+ &DEVICE(pci_dev)->mem_reentrancy_guard);
scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info);
/* override default SCSI bus hotplug-handler, with pvscsi's one */
diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
index 88f99c05d5..f013ded91e 100644
--- a/hw/usb/dev-uas.c
+++ b/hw/usb/dev-uas.c
@@ -937,7 +937,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp)
QTAILQ_INIT(&uas->results);
QTAILQ_INIT(&uas->requests);
- uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas);
+ uas->status_bh = qemu_bh_new_guarded(usb_uas_send_status_bh, uas,
+ &d->mem_reentrancy_guard);
dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE);
scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info);
diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c
index 8755e9cbb0..a0c4e782b2 100644
--- a/hw/usb/hcd-dwc2.c
+++ b/hw/usb/hcd-dwc2.c
@@ -1364,7 +1364,8 @@ static void dwc2_realize(DeviceState *dev, Error **errp)
s->fi = USB_FRMINTVL - 1;
s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s);
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s);
- s->async_bh = qemu_bh_new(dwc2_work_bh, s);
+ s->async_bh = qemu_bh_new_guarded(dwc2_work_bh, s,
+ &dev->mem_reentrancy_guard);
sysbus_init_irq(sbd, &s->irq);
}
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index d4da8dcb8d..c930c60921 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -2533,7 +2533,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp)
}
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s);
- s->async_bh = qemu_bh_new(ehci_work_bh, s);
+ s->async_bh = qemu_bh_new_guarded(ehci_work_bh, s,
+ &dev->mem_reentrancy_guard);
s->device = dev;
s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s);
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 8ac1175ad2..77baaa7a6b 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -1190,7 +1190,7 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp)
USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL);
}
}
- s->bh = qemu_bh_new(uhci_bh, s);
+ s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard);
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s);
s->num_ports_vmstate = NB_PORTS;
QTAILQ_INIT(&s->queues);
diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c
index 176868d345..f500db85ab 100644
--- a/hw/usb/host-libusb.c
+++ b/hw/usb/host-libusb.c
@@ -1141,7 +1141,8 @@ static void usb_host_nodev_bh(void *opaque)
static void usb_host_nodev(USBHostDevice *s)
{
if (!s->bh_nodev) {
- s->bh_nodev = qemu_bh_new(usb_host_nodev_bh, s);
+ s->bh_nodev = qemu_bh_new_guarded(usb_host_nodev_bh, s,
+ &DEVICE(s)->mem_reentrancy_guard);
}
qemu_bh_schedule(s->bh_nodev);
}
@@ -1739,7 +1740,8 @@ static int usb_host_post_load(void *opaque, int version_id)
USBHostDevice *dev = opaque;
if (!dev->bh_postld) {
- dev->bh_postld = qemu_bh_new(usb_host_post_load_bh, dev);
+ dev->bh_postld = qemu_bh_new_guarded(usb_host_post_load_bh, dev,
+ &DEVICE(dev)->mem_reentrancy_guard);
}
qemu_bh_schedule(dev->bh_postld);
dev->bh_postld_pending = true;
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index fd7df599bc..39fbaaab16 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -1441,8 +1441,10 @@ static void usbredir_realize(USBDevice *udev, Error **errp)
}
}
- dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev);
- dev->device_reject_bh = qemu_bh_new(usbredir_device_reject_bh, dev);
+ dev->chardev_close_bh = qemu_bh_new_guarded(usbredir_chardev_close_bh, dev,
+ &DEVICE(dev)->mem_reentrancy_guard);
+ dev->device_reject_bh = qemu_bh_new_guarded(usbredir_device_reject_bh, dev,
+ &DEVICE(dev)->mem_reentrancy_guard);
dev->attach_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, usbredir_do_attach, dev);
packet_id_queue_init(&dev->cancelled, dev, "cancelled");
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
index 66cb3f7c24..38ee660a30 100644
--- a/hw/usb/xen-usb.c
+++ b/hw/usb/xen-usb.c
@@ -1032,7 +1032,8 @@ static void usbback_alloc(struct XenLegacyDevice *xendev)
QTAILQ_INIT(&usbif->req_free_q);
QSIMPLEQ_INIT(&usbif->hotplug_q);
- usbif->bh = qemu_bh_new(usbback_bh, usbif);
+ usbif->bh = qemu_bh_new_guarded(usbback_bh, usbif,
+ &DEVICE(xendev)->mem_reentrancy_guard);
}
static int usbback_free(struct XenLegacyDevice *xendev)
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 43092aa634..5186e831dd 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -909,8 +909,9 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
precopy_add_notifier(&s->free_page_hint_notify);
object_ref(OBJECT(s->iothread));
- s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
- virtio_ballloon_get_free_page_hints, s);
+ s->free_page_bh = aio_bh_new_guarded(iothread_get_aio_context(s->iothread),
+ virtio_ballloon_get_free_page_hints, s,
+ &dev->mem_reentrancy_guard);
}
if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) {
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
index 802e1b9659..2fe804510f 100644
--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
@@ -1074,7 +1074,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp)
vcrypto->vqs[i].dataq =
virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh);
vcrypto->vqs[i].dataq_bh =
- qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]);
+ qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i],
+ &dev->mem_reentrancy_guard);
vcrypto->vqs[i].vcrypto = vcrypto;
}
--
2.39.3

@ -0,0 +1,141 @@
From 8075a9e05699ef0c4e078017eefc20db3186328f Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Mon, 29 May 2023 14:21:08 -0400
Subject: [PATCH 17/21] hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI
controller (CVE-2023-0330)
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
RH-Jira: RHEL-516
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [13/13] 0b6fa742075ef2db3a354ee672dccca3747051cc (jmaloy/jmaloy-qemu-kvm-2)
Jira: https://issues.redhat.com/browse/RHEL-516
Upstream: Merged
CVE: CVE-2023-2680
commit b987718bbb1d0eabf95499b976212dd5f0120d75
Author: Thomas Huth <thuth@redhat.com>
Date: Mon May 22 11:10:11 2023 +0200
hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI controller (CVE-2023-0330)
We cannot use the generic reentrancy guard in the LSI code, so
we have to manually prevent endless reentrancy here. The problematic
lsi_execute_script() function has already a way to detect whether
too many instructions have been executed - we just have to slightly
change the logic here that it also takes into account if the function
has been called too often in a reentrant way.
The code in fuzz-lsi53c895a-test.c has been taken from an earlier
patch by Mauro Matteo Cascella.
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1563
Message-Id: <20230522091011.1082574-1-thuth@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Alexander Bulekov <alxndr@bu.edu>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/scsi/lsi53c895a.c | 23 +++++++++++++++------
tests/qtest/fuzz-lsi53c895a-test.c | 33 ++++++++++++++++++++++++++++++
2 files changed, 50 insertions(+), 6 deletions(-)
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
index 048436352b..f7d45b0b20 100644
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -1134,15 +1134,24 @@ static void lsi_execute_script(LSIState *s)
uint32_t addr, addr_high;
int opcode;
int insn_processed = 0;
+ static int reentrancy_level;
+
+ reentrancy_level++;
s->istat1 |= LSI_ISTAT1_SRUN;
again:
- if (++insn_processed > LSI_MAX_INSN) {
- /* Some windows drivers make the device spin waiting for a memory
- location to change. If we have been executed a lot of code then
- assume this is the case and force an unexpected device disconnect.
- This is apparently sufficient to beat the drivers into submission.
- */
+ /*
+ * Some windows drivers make the device spin waiting for a memory location
+ * to change. If we have executed more than LSI_MAX_INSN instructions then
+ * assume this is the case and force an unexpected device disconnect. This
+ * is apparently sufficient to beat the drivers into submission.
+ *
+ * Another issue (CVE-2023-0330) can occur if the script is programmed to
+ * trigger itself again and again. Avoid this problem by stopping after
+ * being called multiple times in a reentrant way (8 is an arbitrary value
+ * which should be enough for all valid use cases).
+ */
+ if (++insn_processed > LSI_MAX_INSN || reentrancy_level > 8) {
if (!(s->sien0 & LSI_SIST0_UDC)) {
qemu_log_mask(LOG_GUEST_ERROR,
"lsi_scsi: inf. loop with UDC masked");
@@ -1596,6 +1605,8 @@ again:
}
}
trace_lsi_execute_script_stop();
+
+ reentrancy_level--;
}
static uint8_t lsi_reg_readb(LSIState *s, int offset)
diff --git a/tests/qtest/fuzz-lsi53c895a-test.c b/tests/qtest/fuzz-lsi53c895a-test.c
index 2012bd54b7..1b55928b9f 100644
--- a/tests/qtest/fuzz-lsi53c895a-test.c
+++ b/tests/qtest/fuzz-lsi53c895a-test.c
@@ -8,6 +8,36 @@
#include "qemu/osdep.h"
#include "libqtest.h"
+/*
+ * This used to trigger a DMA reentrancy issue
+ * leading to memory corruption bugs like stack
+ * overflow or use-after-free
+ * https://gitlab.com/qemu-project/qemu/-/issues/1563
+ */
+static void test_lsi_dma_reentrancy(void)
+{
+ QTestState *s;
+
+ s = qtest_init("-M q35 -m 512M -nodefaults "
+ "-blockdev driver=null-co,node-name=null0 "
+ "-device lsi53c810 -device scsi-cd,drive=null0");
+
+ qtest_outl(s, 0xcf8, 0x80000804); /* PCI Command Register */
+ qtest_outw(s, 0xcfc, 0x7); /* Enables accesses */
+ qtest_outl(s, 0xcf8, 0x80000814); /* Memory Bar 1 */
+ qtest_outl(s, 0xcfc, 0xff100000); /* Set MMIO Address*/
+ qtest_outl(s, 0xcf8, 0x80000818); /* Memory Bar 2 */
+ qtest_outl(s, 0xcfc, 0xff000000); /* Set RAM Address*/
+ qtest_writel(s, 0xff000000, 0xc0000024);
+ qtest_writel(s, 0xff000114, 0x00000080);
+ qtest_writel(s, 0xff00012c, 0xff000000);
+ qtest_writel(s, 0xff000004, 0xff000114);
+ qtest_writel(s, 0xff000008, 0xff100014);
+ qtest_writel(s, 0xff10002f, 0x000000ff);
+
+ qtest_quit(s);
+}
+
/*
* This used to trigger a UAF in lsi_do_msgout()
* https://gitlab.com/qemu-project/qemu/-/issues/972
@@ -124,5 +154,8 @@ int main(int argc, char **argv)
qtest_add_func("fuzz/lsi53c895a/lsi_do_msgout_cancel_req",
test_lsi_do_msgout_cancel_req);
+ qtest_add_func("fuzz/lsi53c895a/lsi_dma_reentrancy",
+ test_lsi_dma_reentrancy);
+
return g_test_run();
}
--
2.39.3

@ -1,59 +0,0 @@
From 8b0c5c6d356fd6cce9092727e20097b70e07bba9 Mon Sep 17 00:00:00 2001
From: Julia Suvorova <jusual@redhat.com>
Date: Thu, 23 Feb 2023 13:57:47 +0100
Subject: [PATCH] hw/smbios: fix field corruption in type 4 table
RH-Author: Julia Suvorova <None>
RH-MergeRequest: 156: hw/smbios: fix field corruption in type 4 table
RH-Bugzilla: 2169904
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
RH-Acked-by: MST <mst@redhat.com>
RH-Acked-by: Ani Sinha <None>
RH-Commit: [1/1] ee6d9bb6dfa0fb2625915947072cb91a0926c4ec
Since table type 4 of SMBIOS version 2.6 is shorter than 3.0, the
strings which follow immediately after the struct fields have been
overwritten by unconditional filling of later fields such as core_count2.
Make these fields dependent on the SMBIOS version.
Fixes: 05e27d74c7 ("hw/smbios: add core_count2 to smbios table type 4")
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2169904
Signed-off-by: Julia Suvorova <jusual@redhat.com>
Message-Id: <20230223125747.254914-1-jusual@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Ani Sinha <ani@anisinha.ca>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 60d09b8dc7dd4256d664ad680795cb1327805b2b)
---
hw/smbios/smbios.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index c5ad69237e..2d2ece3edb 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -752,14 +752,16 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance)
t->core_count = (ms->smp.cores > 255) ? 0xFF : ms->smp.cores;
t->core_enabled = t->core_count;
- t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores);
-
t->thread_count = (ms->smp.threads > 255) ? 0xFF : ms->smp.threads;
- t->thread_count2 = cpu_to_le16(ms->smp.threads);
t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */
t->processor_family2 = cpu_to_le16(0x01); /* Other */
+ if (tbl_len == SMBIOS_TYPE_4_LEN_V30) {
+ t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores);
+ t->thread_count2 = cpu_to_le16(ms->smp.threads);
+ }
+
SMBIOS_BUILD_TABLE_POST;
smbios_type4_count++;
}
--
2.31.1

@ -0,0 +1,76 @@
From fcd6219a95851d17fd8bde69d87e78c6533be990 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
Date: Wed, 12 Jul 2023 17:46:57 +0200
Subject: [PATCH 24/37] hw/vfio/pci-quirks: Sanitize capability pointer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 179: vfio: live migration support
RH-Bugzilla: 2192818
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [22/28] cb080409c1912f4365f8e31cd23c914b48f91575 (clegoate/qemu-kvm-c9s)
Bugzilla: https://bugzilla.redhat.com/2192818
commit 0ddcb39c9357
Author: Alex Williamson <alex.williamson@redhat.com>
Date: Fri Jun 30 16:36:08 2023 -0600
hw/vfio/pci-quirks: Sanitize capability pointer
Coverity reports a tained scalar when traversing the capabilities
chain (CID 1516589). In practice I've never seen a device with a
chain so broken as to cause an issue, but it's also pretty easy to
sanitize.
Fixes: f6b30c1984f7 ("hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques")
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
hw/vfio/pci-quirks.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 0ed2fcd531..f4ff836805 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -1530,6 +1530,12 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = {
.set = set_nv_gpudirect_clique_id,
};
+static bool is_valid_std_cap_offset(uint8_t pos)
+{
+ return (pos >= PCI_STD_HEADER_SIZEOF &&
+ pos <= (PCI_CFG_SPACE_SIZE - PCI_CAP_SIZEOF));
+}
+
static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
{
PCIDevice *pdev = &vdev->pdev;
@@ -1563,7 +1569,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
*/
ret = pread(vdev->vbasedev.fd, &tmp, 1,
vdev->config_offset + PCI_CAPABILITY_LIST);
- if (ret != 1 || !tmp) {
+ if (ret != 1 || !is_valid_std_cap_offset(tmp)) {
error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list");
return -EINVAL;
}
@@ -1575,7 +1581,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
d4_conflict = true;
}
tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT];
- } while (tmp);
+ } while (is_valid_std_cap_offset(tmp));
if (!c8_conflict) {
pos = 0xC8;
--
2.39.3

@ -0,0 +1,110 @@
From dd38230a0a375fb8427fa106ff79562e56c51b6c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
Date: Wed, 12 Jul 2023 17:46:57 +0200
Subject: [PATCH 18/37] hw/vfio/pci-quirks: Support alternate offset for
GPUDirect Cliques
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 179: vfio: live migration support
RH-Bugzilla: 2192818
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [16/28] 9befb7c9adaeb58e9d0b49686cf54b751c742832 (clegoate/qemu-kvm-c9s)
Bugzilla: https://bugzilla.redhat.com/2192818
commit f6b30c1984f7
Author: Alex Williamson <alex.williamson@redhat.com>
Date: Thu Jun 8 12:05:07 2023 -0600
hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques
NVIDIA Turing and newer GPUs implement the MSI-X capability at the offset
previously reserved for use by hypervisors to implement the GPUDirect
Cliques capability. A revised specification provides an alternate
location. Add a config space walk to the quirk to check for conflicts,
allowing us to fall back to the new location or generate an error at the
quirk setup rather than when the real conflicting capability is added
should there be no available location.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
hw/vfio/pci-quirks.c | 41 ++++++++++++++++++++++++++++++++++++++++-
1 file changed, 40 insertions(+), 1 deletion(-)
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index f0147a050a..0ed2fcd531 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -1490,6 +1490,9 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev)
* +---------------------------------+---------------------------------+
*
* https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf
+ *
+ * Specification for Turning and later GPU architectures:
+ * https://lists.gnu.org/archive/html/qemu-devel/2023-06/pdf142OR4O4c2.pdf
*/
static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v,
const char *name, void *opaque,
@@ -1530,7 +1533,9 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = {
static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
{
PCIDevice *pdev = &vdev->pdev;
- int ret, pos = 0xC8;
+ int ret, pos;
+ bool c8_conflict = false, d4_conflict = false;
+ uint8_t tmp;
if (vdev->nv_gpudirect_clique == 0xFF) {
return 0;
@@ -1547,6 +1552,40 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
return -EINVAL;
}
+ /*
+ * Per the updated specification above, it's recommended to use offset
+ * D4h for Turing and later GPU architectures due to a conflict of the
+ * MSI-X capability at C8h. We don't know how to determine the GPU
+ * architecture, instead we walk the capability chain to mark conflicts
+ * and choose one or error based on the result.
+ *
+ * NB. Cap list head in pdev->config is already cleared, read from device.
+ */
+ ret = pread(vdev->vbasedev.fd, &tmp, 1,
+ vdev->config_offset + PCI_CAPABILITY_LIST);
+ if (ret != 1 || !tmp) {
+ error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list");
+ return -EINVAL;
+ }
+
+ do {
+ if (tmp == 0xC8) {
+ c8_conflict = true;
+ } else if (tmp == 0xD4) {
+ d4_conflict = true;
+ }
+ tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT];
+ } while (tmp);
+
+ if (!c8_conflict) {
+ pos = 0xC8;
+ } else if (!d4_conflict) {
+ pos = 0xD4;
+ } else {
+ error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid config space");
+ return -EINVAL;
+ }
+
ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp);
if (ret < 0) {
error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: ");
--
2.39.3

@ -0,0 +1,62 @@
From 0a731ac1191182546e80af5f39d178a5a2f3688f Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Mon, 17 Jul 2023 18:21:26 +0200
Subject: [PATCH 07/14] hw/virtio-iommu: Fix potential OOB access in
virtio_iommu_handle_command()
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes
RH-Bugzilla: 2229133
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Commit: [1/3] ecdb1e1aa6b93761dc87ea79bc0a1093ad649a74 (eauger1/centos-qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133
In the virtio_iommu_handle_command() when a PROBE request is handled,
output_size takes a value greater than the tail size and on a subsequent
iteration we can get a stack out-of-band access. Initialize the
output_size on each iteration.
The issue was found with ASAN. Credits to:
Yiming Tao(Zhejiang University)
Gaoning Pan(Zhejiang University)
Fixes: 1733eebb9e7 ("virtio-iommu: Implement RESV_MEM probe request")
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reported-by: Mauro Matteo Cascella <mcascell@redhat.com>
Cc: qemu-stable@nongnu.org
Message-Id: <20230717162126.11693-1-eric.auger@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit cf2f89edf36a59183166ae8721a8d7ab5cd286bd)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/virtio/virtio-iommu.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index 421e2a944f..17ce630200 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -728,13 +728,15 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
struct virtio_iommu_req_head head;
struct virtio_iommu_req_tail tail = {};
- size_t output_size = sizeof(tail), sz;
VirtQueueElement *elem;
unsigned int iov_cnt;
struct iovec *iov;
void *buf = NULL;
+ size_t sz;
for (;;) {
+ size_t output_size = sizeof(tail);
+
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
if (!elem) {
return;
--
2.39.3

@ -0,0 +1,52 @@
From f9d982fae156aa9db0506e1e098c1e8a7f7eec94 Mon Sep 17 00:00:00 2001
From: Bandan Das <bsd@redhat.com>
Date: Thu, 3 Aug 2023 14:29:15 -0400
Subject: [PATCH 13/14] i386/cpu: Update how the EBX register of CPUID
0x8000001F is set
RH-Author: Bandan Das <None>
RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter
RH-Bugzilla: 2214839
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [4/4] efc368b2c844fd4fbc3c755a5e2da288329e7a2c (bdas1/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839
commit fb6bbafc0f19385fb257ee073ed13dcaf613f2f8
Author: Tom Lendacky <thomas.lendacky@amd.com>
Date: Fri Sep 30 10:14:30 2022 -0500
i386/cpu: Update how the EBX register of CPUID 0x8000001F is set
Update the setting of CPUID 0x8000001F EBX to clearly document the ranges
associated with fields being set.
Fixes: 6cb8f2a663 ("cpu/i386: populate CPUID 0x8000_001F when SEV is active")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <5822fd7d02b575121380e1f493a8f6d9eba2b11a.1664550870.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Bandan Das <bsd@redhat.com>
---
target/i386/cpu.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 839706b430..4ac3046313 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6008,8 +6008,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
if (sev_enabled()) {
*eax = 0x2;
*eax |= sev_es_enabled() ? 0x8 : 0;
- *ebx = sev_get_cbit_position();
- *ebx |= sev_get_reduced_phys_bits() << 6;
+ *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */
+ *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */
}
break;
default:
--
2.39.3

@ -0,0 +1,77 @@
From 5c0d254762caaffd574bd95dbfc1df416e6e2509 Mon Sep 17 00:00:00 2001
From: Bandan Das <bsd@redhat.com>
Date: Thu, 3 Aug 2023 14:22:55 -0400
Subject: [PATCH 12/14] i386/sev: Update checks and information related to
reduced-phys-bits
RH-Author: Bandan Das <None>
RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter
RH-Bugzilla: 2214839
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [3/4] 7c5e7ea9f6cd39e84e5b60417c849430296399fd (bdas1/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839
commit 8168fed9f84e3128f7628969ae78af49433d5ce7
Author: Tom Lendacky <thomas.lendacky@amd.com>
Date: Fri Sep 30 10:14:29 2022 -0500
i386/sev: Update checks and information related to reduced-phys-bits
The value of the reduced-phys-bits parameter is propogated to the CPUID
information exposed to the guest. Update the current validation check to
account for the size of the CPUID field (6-bits), ensuring the value is
in the range of 1 to 63.
Maintain backward compatibility, to an extent, by allowing a value greater
than 1 (so that the previously documented value of 5 still works), but not
allowing anything over 63.
Fixes: d8575c6c02 ("sev/i386: add command to initialize the memory encryption context")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <cca5341a95ac73f904e6300f10b04f9c62e4e8ff.1664550870.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Bandan Das <bsd@redhat.com>
---
target/i386/sev.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 859e06f6ad..fe2144c038 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -932,15 +932,26 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL);
host_cbitpos = ebx & 0x3f;
+ /*
+ * The cbitpos value will be placed in bit positions 5:0 of the EBX
+ * register of CPUID 0x8000001F. No need to verify the range as the
+ * comparison against the host value accomplishes that.
+ */
if (host_cbitpos != sev->cbitpos) {
error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'",
__func__, host_cbitpos, sev->cbitpos);
goto err;
}
- if (sev->reduced_phys_bits < 1) {
- error_setg(errp, "%s: reduced_phys_bits check failed, it should be >=1,"
- " requested '%d'", __func__, sev->reduced_phys_bits);
+ /*
+ * The reduced-phys-bits value will be placed in bit positions 11:6 of
+ * the EBX register of CPUID 0x8000001F, so verify the supplied value
+ * is in the range of 1 to 63.
+ */
+ if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) {
+ error_setg(errp, "%s: reduced_phys_bits check failed,"
+ " it should be in the range of 1 to 63, requested '%d'",
+ __func__, sev->reduced_phys_bits);
goto err;
}
--
2.39.3

@ -1,64 +0,0 @@
From cadcc1c6a001622d971c86d44925516905e3d104 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 23 Feb 2023 14:59:21 +0800
Subject: [PATCH 8/8] intel-iommu: fail DEVIOTLB_UNMAP without dt mode
RH-Author: Laurent Vivier <lvivier@redhat.com>
RH-MergeRequest: 157: intel-iommu: fail DEVIOTLB_UNMAP without dt mode
RH-Bugzilla: 2156876
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: MST <mst@redhat.com>
RH-Commit: [1/1] eb9dbae6140ef4ba10d90b9e66abd75540f6892d (lvivier/qemu-kvm-centos)
Without dt mode, device IOTLB notifier won't work since guest won't
send device IOTLB invalidation descriptor in this case. Let's fail
early instead of misbehaving silently.
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Tested-by: Laurent Vivier <lvivier@redhat.com>
Tested-by: Viktor Prutyanov <viktor@daynix.com>
Buglink: https://bugzilla.redhat.com/2156876
Signed-off-by: Jason Wang <jasowang@redhat.com>
Message-Id: <20230223065924.42503-3-jasowang@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 09adb0e021207b60a0c51a68939b4539d98d3ef3)
Conflict in hw/i386/intel_iommu.c because of missing commit:
4ce27463ccce ("intel-iommu: fail MAP notifier without caching mode")
---
hw/i386/intel_iommu.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index a08ee85edf..d2983f40d3 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3179,6 +3179,7 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
{
VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
IntelIOMMUState *s = vtd_as->iommu_state;
+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
/* TODO: add support for VFIO and vhost users */
if (s->snoop_control) {
@@ -3186,6 +3187,13 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
"Snoop Control with vhost or VFIO is not supported");
return -ENOTSUP;
}
+ if (!x86_iommu->dt_supported && (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP)) {
+ error_setg_errno(errp, ENOTSUP,
+ "device %02x.%02x.%x requires device IOTLB mode",
+ pci_bus_num(vtd_as->bus), PCI_SLOT(vtd_as->devfn),
+ PCI_FUNC(vtd_as->devfn));
+ return -ENOTSUP;
+ }
/* Update per-address-space notifier flags */
vtd_as->notifier_flags = new;
--
2.39.1

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save