forked from rpms/qemu-kvm
parent
3b49081d25
commit
e8002e581b
@ -1 +1 @@
|
||||
SOURCES/qemu-8.0.0.tar.xz
|
||||
SOURCES/qemu-8.2.0.tar.xz
|
||||
|
@ -1 +1 @@
|
||||
17d54a85aa5d7f5dcfc619aa34049f9a91ceed0d SOURCES/qemu-8.0.0.tar.xz
|
||||
1615e59b1bd68324e0819245fe003e33c14a52f9 SOURCES/qemu-8.2.0.tar.xz
|
||||
|
@ -0,0 +1,44 @@
|
||||
From 3b9b38339346ebfaf3e8ddf0822eba1cc9e78408 Mon Sep 17 00:00:00 2001
|
||||
From: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
Date: Thu, 14 Dec 2023 04:42:01 -0500
|
||||
Subject: Introduce RHEL 9.4.0 qemu-kvm machine type for aarch64
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-17168
|
||||
|
||||
Adding new machine type to support enabling new features.
|
||||
|
||||
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 9 ++++++++-
|
||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index c541efee5e..0b17c94ad7 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3630,14 +3630,21 @@ static void rhel_machine_init(void)
|
||||
}
|
||||
type_init(rhel_machine_init);
|
||||
|
||||
+static void rhel940_virt_options(MachineClass *mc)
|
||||
+{
|
||||
+}
|
||||
+DEFINE_RHEL_MACHINE_AS_LATEST(9, 4, 0)
|
||||
+
|
||||
static void rhel920_virt_options(MachineClass *mc)
|
||||
{
|
||||
+ rhel940_virt_options(mc);
|
||||
+
|
||||
compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
|
||||
}
|
||||
-DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0)
|
||||
+DEFINE_RHEL_MACHINE(9, 2, 0)
|
||||
|
||||
static void rhel900_virt_options(MachineClass *mc)
|
||||
{
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,53 +0,0 @@
|
||||
From 78a42cf27aa519bb71214443ab570b40e156fa9c Mon Sep 17 00:00:00 2001
|
||||
From: Kfir Manor <kfir@daynix.com>
|
||||
Date: Sun, 22 Jan 2023 17:33:07 +0200
|
||||
Subject: qga/linux: add usb support to guest-get-fsinfo
|
||||
|
||||
RH-Author: Kostiantyn Kostiuk <kkostiuk@redhat.com>
|
||||
RH-MergeRequest: 140: qga/linux: add usb support to guest-get-fsinfo
|
||||
RH-Bugzilla: 2149191
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: yvugenfi <None>
|
||||
RH-Commit: [1/1] bae929a2d0d0ad20e7308ede69c26499fc2119c7 (kostyanf14/redhat_centos-stream_src_qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2149191
|
||||
Upstream patch: https://patchew.org/QEMU/20230122153307.1050593-1-kfir@daynix.com/
|
||||
|
||||
Signed-off-by: Kfir Manor <kfir@daynix.com>
|
||||
Reviewed-by: Konstantin Kostiuk <kkostiuk@redhat.com>
|
||||
Signed-off-by: Konstantin Kostiuk <kkostiuk@redhat.com>
|
||||
|
||||
Patch-name: kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch
|
||||
Patch-id: 72
|
||||
Patch-present-in-specfile: True
|
||||
---
|
||||
qga/commands-posix.c | 6 +++++-
|
||||
1 file changed, 5 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
|
||||
index 079689d79a..97754930c1 100644
|
||||
--- a/qga/commands-posix.c
|
||||
+++ b/qga/commands-posix.c
|
||||
@@ -879,7 +879,9 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
|
||||
g_str_equal(driver, "sym53c8xx") ||
|
||||
g_str_equal(driver, "virtio-pci") ||
|
||||
g_str_equal(driver, "ahci") ||
|
||||
- g_str_equal(driver, "nvme"))) {
|
||||
+ g_str_equal(driver, "nvme") ||
|
||||
+ g_str_equal(driver, "xhci_hcd") ||
|
||||
+ g_str_equal(driver, "ehci-pci"))) {
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -976,6 +978,8 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
|
||||
}
|
||||
} else if (strcmp(driver, "nvme") == 0) {
|
||||
disk->bus_type = GUEST_DISK_BUS_TYPE_NVME;
|
||||
+ } else if (strcmp(driver, "ehci-pci") == 0 || strcmp(driver, "xhci_hcd") == 0) {
|
||||
+ disk->bus_type = GUEST_DISK_BUS_TYPE_USB;
|
||||
} else {
|
||||
g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath);
|
||||
goto cleanup;
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,110 +0,0 @@
|
||||
From bd5d81d2865c239ffea0fecf32476732149ad05c Mon Sep 17 00:00:00 2001
|
||||
From: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
Date: Wed, 15 Feb 2023 02:03:17 -0500
|
||||
Subject: Add RHEL 9.2.0 compat structure
|
||||
|
||||
Adding compatibility bits necessary to keep 9.2.0 machine
|
||||
types same after rebase to 8.0.
|
||||
|
||||
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
|
||||
Rebase notes (8.0.0 rc4):
|
||||
- Added migration.x-preempt-pre-7-2 compat)
|
||||
---
|
||||
hw/arm/virt.c | 1 +
|
||||
hw/core/machine.c | 10 ++++++++++
|
||||
hw/i386/pc_piix.c | 2 ++
|
||||
hw/i386/pc_q35.c | 3 +++
|
||||
hw/s390x/s390-virtio-ccw.c | 1 +
|
||||
include/hw/boards.h | 3 +++
|
||||
6 files changed, 20 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 1ae1654be5..9be53e9355 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3669,6 +3669,7 @@ type_init(rhel_machine_init);
|
||||
static void rhel920_virt_options(MachineClass *mc)
|
||||
{
|
||||
compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
|
||||
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
|
||||
}
|
||||
DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 5aa567fad3..0e0120b7f2 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -52,6 +52,16 @@ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
|
||||
const char *rhel_old_machine_deprecation =
|
||||
"machine types for previous major releases are deprecated";
|
||||
|
||||
+GlobalProperty hw_compat_rhel_9_2[] = {
|
||||
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
+ { "e1000e", "migrate-timadj", "off" },
|
||||
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
+ { "virtio-mem", "x-early-migration", "false" },
|
||||
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
+ { "migration", "x-preempt-pre-7-2", "true" },
|
||||
+};
|
||||
+const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2);
|
||||
+
|
||||
/*
|
||||
* Mostly the same as hw_compat_7_0
|
||||
*/
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index 3e330fd36f..90fb6e2e03 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -947,6 +947,8 @@ static void pc_machine_rhel760_options(MachineClass *m)
|
||||
/* From pc_i440fx_5_1_machine_options() */
|
||||
pcmc->pci_root_uid = 1;
|
||||
pcmc->enforce_amd_1tb_hole = false;
|
||||
+ compat_props_add(m->compat_props, hw_compat_rhel_9_2,
|
||||
+ hw_compat_rhel_9_2_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
|
||||
hw_compat_rhel_9_1_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index 98601bb76f..8945b69175 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -701,6 +701,9 @@ static void pc_q35_machine_rhel920_options(MachineClass *m)
|
||||
m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)";
|
||||
pcmc->smbios_stream_product = "RHEL";
|
||||
pcmc->smbios_stream_version = "9.2.0";
|
||||
+
|
||||
+ compat_props_add(m->compat_props, hw_compat_rhel_9_2,
|
||||
+ hw_compat_rhel_9_2_len);
|
||||
}
|
||||
|
||||
DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
|
||||
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
||||
index dcd3b966b0..6a0b93c63d 100644
|
||||
--- a/hw/s390x/s390-virtio-ccw.c
|
||||
+++ b/hw/s390x/s390-virtio-ccw.c
|
||||
@@ -1211,6 +1211,7 @@ static void ccw_machine_rhel920_instance_options(MachineState *machine)
|
||||
|
||||
static void ccw_machine_rhel920_class_options(MachineClass *mc)
|
||||
{
|
||||
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
|
||||
}
|
||||
DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true);
|
||||
|
||||
diff --git a/include/hw/boards.h b/include/hw/boards.h
|
||||
index 5e7446ee40..5f08bd7550 100644
|
||||
--- a/include/hw/boards.h
|
||||
+++ b/include/hw/boards.h
|
||||
@@ -461,6 +461,9 @@ extern const size_t hw_compat_2_2_len;
|
||||
extern GlobalProperty hw_compat_2_1[];
|
||||
extern const size_t hw_compat_2_1_len;
|
||||
|
||||
+extern GlobalProperty hw_compat_rhel_9_2[];
|
||||
+extern const size_t hw_compat_rhel_9_2_len;
|
||||
+
|
||||
extern GlobalProperty hw_compat_rhel_9_1[];
|
||||
extern const size_t hw_compat_rhel_9_1_len;
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,76 +0,0 @@
|
||||
From c6eaf73adda2e87fe91c9a3836f45dd58a553e06 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Mon, 27 Mar 2023 15:14:03 +0200
|
||||
Subject: redhat: hw/i386/pc: Update x86 machine type compatibility for QEMU
|
||||
8.0.0 update
|
||||
|
||||
Add pc_rhel_9_2_compat based on upstream pc_compat_7_2.
|
||||
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
hw/i386/pc.c | 6 ++++++
|
||||
hw/i386/pc_piix.c | 2 ++
|
||||
hw/i386/pc_q35.c | 2 ++
|
||||
include/hw/i386/pc.h | 3 +++
|
||||
4 files changed, 13 insertions(+)
|
||||
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index 8abb1f872e..f216922cee 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -429,6 +429,12 @@ GlobalProperty pc_rhel_compat[] = {
|
||||
};
|
||||
const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat);
|
||||
|
||||
+GlobalProperty pc_rhel_9_2_compat[] = {
|
||||
+ /* pc_rhel_9_2_compat from pc_compat_7_2 */
|
||||
+ { "ICH9-LPC", "noreboot", "true" },
|
||||
+};
|
||||
+const size_t pc_rhel_9_2_compat_len = G_N_ELEMENTS(pc_rhel_9_2_compat);
|
||||
+
|
||||
GlobalProperty pc_rhel_9_0_compat[] = {
|
||||
/* pc_rhel_9_0_compat from pc_compat_6_2 */
|
||||
{ "virtio-mem", "unplugged-inaccessible", "off" },
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index 90fb6e2e03..fc704d783f 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -949,6 +949,8 @@ static void pc_machine_rhel760_options(MachineClass *m)
|
||||
pcmc->enforce_amd_1tb_hole = false;
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_2,
|
||||
hw_compat_rhel_9_2_len);
|
||||
+ compat_props_add(m->compat_props, pc_rhel_9_2_compat,
|
||||
+ pc_rhel_9_2_compat_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
|
||||
hw_compat_rhel_9_1_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index 8945b69175..e97655616a 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -704,6 +704,8 @@ static void pc_q35_machine_rhel920_options(MachineClass *m)
|
||||
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_2,
|
||||
hw_compat_rhel_9_2_len);
|
||||
+ compat_props_add(m->compat_props, pc_rhel_9_2_compat,
|
||||
+ pc_rhel_9_2_compat_len);
|
||||
}
|
||||
|
||||
DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
|
||||
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
|
||||
index 4376f64a47..d218ad1628 100644
|
||||
--- a/include/hw/i386/pc.h
|
||||
+++ b/include/hw/i386/pc.h
|
||||
@@ -296,6 +296,9 @@ int pc_machine_kvm_type(MachineState *machine, const char *vm_type);
|
||||
extern GlobalProperty pc_rhel_compat[];
|
||||
extern const size_t pc_rhel_compat_len;
|
||||
|
||||
+extern GlobalProperty pc_rhel_9_2_compat[];
|
||||
+extern const size_t pc_rhel_9_2_compat_len;
|
||||
+
|
||||
extern GlobalProperty pc_rhel_9_0_compat[];
|
||||
extern const size_t pc_rhel_9_0_compat_len;
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,83 +0,0 @@
|
||||
From 8173d2eabaf77312d36b00c618f6770948b80593 Mon Sep 17 00:00:00 2001
|
||||
From: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
Date: Mon, 17 Apr 2023 01:24:18 -0400
|
||||
Subject: Disable unwanted new devices
|
||||
|
||||
QEMU 8.0 adds two new device we do not want to support that can't
|
||||
be disabled using configure switch.
|
||||
|
||||
1) ide-cf - virtual CompactFlash card
|
||||
|
||||
2) i2c-echo - testing echo device
|
||||
|
||||
Use manual disabling of the device by changing code (1) and meson configs (2).
|
||||
|
||||
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
---
|
||||
hw/ide/qdev.c | 9 +++++++++
|
||||
hw/misc/meson.build | 3 ++-
|
||||
2 files changed, 11 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
|
||||
index 1b3b4da01d..454bfa5783 100644
|
||||
--- a/hw/ide/qdev.c
|
||||
+++ b/hw/ide/qdev.c
|
||||
@@ -283,10 +283,13 @@ static void ide_cd_realize(IDEDevice *dev, Error **errp)
|
||||
ide_dev_initfn(dev, IDE_CD, errp);
|
||||
}
|
||||
|
||||
+/* Disabled for Red Hat Enterprise Linux */
|
||||
+#if 0
|
||||
static void ide_cf_realize(IDEDevice *dev, Error **errp)
|
||||
{
|
||||
ide_dev_initfn(dev, IDE_CFATA, errp);
|
||||
}
|
||||
+#endif
|
||||
|
||||
#define DEFINE_IDE_DEV_PROPERTIES() \
|
||||
DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \
|
||||
@@ -346,6 +349,8 @@ static const TypeInfo ide_cd_info = {
|
||||
.class_init = ide_cd_class_init,
|
||||
};
|
||||
|
||||
+/* Disabled for Red Hat Enterprise Linux */
|
||||
+#if 0
|
||||
static Property ide_cf_properties[] = {
|
||||
DEFINE_IDE_DEV_PROPERTIES(),
|
||||
DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf),
|
||||
@@ -371,6 +376,7 @@ static const TypeInfo ide_cf_info = {
|
||||
.instance_size = sizeof(IDEDrive),
|
||||
.class_init = ide_cf_class_init,
|
||||
};
|
||||
+#endif
|
||||
|
||||
static void ide_device_class_init(ObjectClass *klass, void *data)
|
||||
{
|
||||
@@ -396,7 +402,10 @@ static void ide_register_types(void)
|
||||
type_register_static(&ide_bus_info);
|
||||
type_register_static(&ide_hd_info);
|
||||
type_register_static(&ide_cd_info);
|
||||
+/* Disabled for Red Hat Enterprise Linux */
|
||||
+#if 0
|
||||
type_register_static(&ide_cf_info);
|
||||
+#endif
|
||||
type_register_static(&ide_device_type_info);
|
||||
}
|
||||
|
||||
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
|
||||
index a40245ad44..9cc5a61ed7 100644
|
||||
--- a/hw/misc/meson.build
|
||||
+++ b/hw/misc/meson.build
|
||||
@@ -128,7 +128,8 @@ softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_rng.c'))
|
||||
|
||||
softmmu_ss.add(when: 'CONFIG_GRLIB', if_true: files('grlib_ahb_apb_pnp.c'))
|
||||
|
||||
-softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c'))
|
||||
+# Disabled for Red Hat Enterprise Linux
|
||||
+# softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c'))
|
||||
|
||||
specific_ss.add(when: 'CONFIG_AVR_POWER', if_true: files('avr_power.c'))
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,37 @@
|
||||
From 363d6aedc82314a70bdfbe9fa23b7e8fdda50138 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Thu, 11 Jan 2024 12:26:19 -0500
|
||||
Subject: [PATCH 066/101] Compile IOMMUFD object on aarch64
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [65/67] 9358030fdd499c5fe122dee3bb4f114966fac9c2 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Upstream: RHEL only
|
||||
|
||||
Compiles the IOMMUFD object on aarch64 to be able to use
|
||||
the IOMMUFD VFIO backend.
|
||||
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
|
||||
index aec1831199..b0191d3c69 100644
|
||||
--- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
|
||||
+++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
|
||||
@@ -39,3 +39,4 @@ CONFIG_PXB=y
|
||||
CONFIG_VHOST_VSOCK=y
|
||||
CONFIG_VHOST_USER_VSOCK=y
|
||||
CONFIG_VHOST_USER_FS=y
|
||||
+CONFIG_IOMMUFD=y
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,37 @@
|
||||
From c1e9ddf8d0ea6d358fcaa5cacd3a91920f36e73b Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Thu, 11 Jan 2024 12:33:17 -0500
|
||||
Subject: [PATCH 067/101] Compile IOMMUFD on s390x
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [66/67] d3004aafca2bb76d817ac99c3d65973b8fbd4557 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Upstream: RHEL only
|
||||
|
||||
Compiles the IOMMUFD object on s390x to be able to use
|
||||
the IOMMUFD VFIO backend.
|
||||
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
configs/devices/s390x-softmmu/s390x-rh-devices.mak | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak
|
||||
index 69a799adbd..24cf6dbd03 100644
|
||||
--- a/configs/devices/s390x-softmmu/s390x-rh-devices.mak
|
||||
+++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak
|
||||
@@ -16,3 +16,4 @@ CONFIG_WDT_DIAG288=y
|
||||
CONFIG_VHOST_VSOCK=y
|
||||
CONFIG_VHOST_USER_VSOCK=y
|
||||
CONFIG_VHOST_USER_FS=y
|
||||
+CONFIG_IOMMUFD=y
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,37 @@
|
||||
From be2c3d9bbee1bdec061c901f507bc999fa40a53e Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Thu, 11 Jan 2024 12:34:44 -0500
|
||||
Subject: [PATCH 068/101] Compile IOMMUFD on x86_64
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [67/67] 411d48a5cc7ce1f05be793fd9a89c143ce34c91a (eauger1/centos-qemu-kvm)
|
||||
|
||||
Upstream: RHEL only
|
||||
|
||||
Compiles the IOMMUFD object on s390x to be able to use
|
||||
the IOMMUFD VFIO backend.
|
||||
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
|
||||
index ce5be73633..ba41108e0c 100644
|
||||
--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
|
||||
+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
|
||||
@@ -108,3 +108,4 @@ CONFIG_SGX=y
|
||||
CONFIG_VHOST_VSOCK=y
|
||||
CONFIG_VHOST_USER_VSOCK=y
|
||||
CONFIG_VHOST_USER_FS=y
|
||||
+CONFIG_IOMMUFD=y
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,155 @@
|
||||
From 5c639f8ce65183ce8e44ee8e0230e9d627a440d7 Mon Sep 17 00:00:00 2001
|
||||
From: Igor Mammedov <imammedo@redhat.com>
|
||||
Date: Wed, 21 Feb 2024 17:00:27 +0000
|
||||
Subject: [PATCH 05/20] Implement SMBIOS type 9 v2.6
|
||||
|
||||
RH-Author: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS
|
||||
RH-Jira: RHEL-21705
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Acked-by: Ani Sinha <None>
|
||||
RH-Commit: [3/18] ead230527d93938907a561cf5b985ee4f54d82b1
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-21705
|
||||
Author: Felix Wu <flwu@google.com>
|
||||
|
||||
Signed-off-by: Felix Wu <flwu@google.com>
|
||||
Signed-off-by: Nabih Estefan <nabihestefan@google.com>
|
||||
Message-Id: <20240221170027.1027325-3-nabihestefan@google.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
|
||||
(cherry picked from commit 04f143d828845d0fd52dd4a52664d81a4f5431f7)
|
||||
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
|
||||
---
|
||||
hw/smbios/smbios.c | 49 +++++++++++++++++++++++++++++++++---
|
||||
include/hw/firmware/smbios.h | 4 +++
|
||||
qemu-options.hx | 2 +-
|
||||
3 files changed, 51 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
|
||||
index 4f5637d445..074705fa4c 100644
|
||||
--- a/hw/smbios/smbios.c
|
||||
+++ b/hw/smbios/smbios.c
|
||||
@@ -124,7 +124,7 @@ static QTAILQ_HEAD(, type8_instance) type8 = QTAILQ_HEAD_INITIALIZER(type8);
|
||||
|
||||
/* type 9 instance for parsing */
|
||||
struct type9_instance {
|
||||
- const char *slot_designation;
|
||||
+ const char *slot_designation, *pcidev;
|
||||
uint8_t slot_type, slot_data_bus_width, current_usage, slot_length,
|
||||
slot_characteristics1, slot_characteristics2;
|
||||
uint16_t slot_id;
|
||||
@@ -427,6 +427,11 @@ static const QemuOptDesc qemu_smbios_type9_opts[] = {
|
||||
.type = QEMU_OPT_NUMBER,
|
||||
.help = "slot characteristics2, see the spec",
|
||||
},
|
||||
+ {
|
||||
+ .name = "pci_device",
|
||||
+ .type = QEMU_OPT_STRING,
|
||||
+ .help = "PCI device, if provided."
|
||||
+ }
|
||||
};
|
||||
|
||||
static const QemuOptDesc qemu_smbios_type11_opts[] = {
|
||||
@@ -851,7 +856,7 @@ static void smbios_build_type_8_table(void)
|
||||
}
|
||||
}
|
||||
|
||||
-static void smbios_build_type_9_table(void)
|
||||
+static void smbios_build_type_9_table(Error **errp)
|
||||
{
|
||||
unsigned instance = 0;
|
||||
struct type9_instance *t9;
|
||||
@@ -868,6 +873,43 @@ static void smbios_build_type_9_table(void)
|
||||
t->slot_characteristics1 = t9->slot_characteristics1;
|
||||
t->slot_characteristics2 = t9->slot_characteristics2;
|
||||
|
||||
+ if (t9->pcidev) {
|
||||
+ PCIDevice *pdev = NULL;
|
||||
+ int rc = pci_qdev_find_device(t9->pcidev, &pdev);
|
||||
+ if (rc != 0) {
|
||||
+ error_setg(errp,
|
||||
+ "No PCI device %s for SMBIOS type 9 entry %s",
|
||||
+ t9->pcidev, t9->slot_designation);
|
||||
+ return;
|
||||
+ }
|
||||
+ /*
|
||||
+ * We only handle the case were the device is attached to
|
||||
+ * the PCI root bus. The general case is more complex as
|
||||
+ * bridges are enumerated later and the table would need
|
||||
+ * to be updated at this moment.
|
||||
+ */
|
||||
+ if (!pci_bus_is_root(pci_get_bus(pdev))) {
|
||||
+ error_setg(errp,
|
||||
+ "Cannot create type 9 entry for PCI device %s: "
|
||||
+ "not attached to the root bus",
|
||||
+ t9->pcidev);
|
||||
+ return;
|
||||
+ }
|
||||
+ t->segment_group_number = cpu_to_le16(0);
|
||||
+ t->bus_number = pci_dev_bus_num(pdev);
|
||||
+ t->device_number = pdev->devfn;
|
||||
+ } else {
|
||||
+ /*
|
||||
+ * Per SMBIOS spec, For slots that are not of the PCI, AGP, PCI-X,
|
||||
+ * or PCI-Express type that do not have bus/device/function
|
||||
+ * information, 0FFh should be populated in the fields of Segment
|
||||
+ * Group Number, Bus Number, Device/Function Number.
|
||||
+ */
|
||||
+ t->segment_group_number = 0xff;
|
||||
+ t->bus_number = 0xff;
|
||||
+ t->device_number = 0xff;
|
||||
+ }
|
||||
+
|
||||
SMBIOS_BUILD_TABLE_POST;
|
||||
instance++;
|
||||
}
|
||||
@@ -1222,7 +1264,7 @@ void smbios_get_tables(MachineState *ms,
|
||||
}
|
||||
|
||||
smbios_build_type_8_table();
|
||||
- smbios_build_type_9_table();
|
||||
+ smbios_build_type_9_table(errp);
|
||||
smbios_build_type_11_table();
|
||||
|
||||
#define MAX_DIMM_SZ (16 * GiB)
|
||||
@@ -1568,6 +1610,7 @@ void smbios_entry_add(QemuOpts *opts, Error **errp)
|
||||
t->slot_id = qemu_opt_get_number(opts, "slot_id", 0);
|
||||
t->slot_characteristics1 = qemu_opt_get_number(opts, "slot_characteristics1", 0);
|
||||
t->slot_characteristics2 = qemu_opt_get_number(opts, "slot_characteristics2", 0);
|
||||
+ save_opt(&t->pcidev, opts, "pcidev");
|
||||
QTAILQ_INSERT_TAIL(&type9, t, next);
|
||||
return;
|
||||
}
|
||||
diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h
|
||||
index 6bbd5a4c20..f8dd07fe4c 100644
|
||||
--- a/include/hw/firmware/smbios.h
|
||||
+++ b/include/hw/firmware/smbios.h
|
||||
@@ -222,6 +222,10 @@ struct smbios_type_9 {
|
||||
uint16_t slot_id;
|
||||
uint8_t slot_characteristics1;
|
||||
uint8_t slot_characteristics2;
|
||||
+ /* SMBIOS spec v2.6+ */
|
||||
+ uint16_t segment_group_number;
|
||||
+ uint8_t bus_number;
|
||||
+ uint8_t device_number;
|
||||
} QEMU_PACKED;
|
||||
|
||||
/* SMBIOS type 11 - OEM strings */
|
||||
diff --git a/qemu-options.hx b/qemu-options.hx
|
||||
index 94cacc2c63..93364e1765 100644
|
||||
--- a/qemu-options.hx
|
||||
+++ b/qemu-options.hx
|
||||
@@ -2710,7 +2710,7 @@ SRST
|
||||
``-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str][,asset=str][,part=str][,processor-id=%d]``
|
||||
Specify SMBIOS type 4 fields
|
||||
|
||||
-``-smbios type=9[,slot_designation=str][,slot_type=%d][,slot_data_bus_width=%d][,current_usage=%d][,slot_length=%d][,slot_id=%d][,slot_characteristics1=%d][,slot_characteristics12=%d]``
|
||||
+``-smbios type=9[,slot_designation=str][,slot_type=%d][,slot_data_bus_width=%d][,current_usage=%d][,slot_length=%d][,slot_id=%d][,slot_characteristics1=%d][,slot_characteristics12=%d][,pci_device=str]``
|
||||
Specify SMBIOS type 9 fields
|
||||
|
||||
``-smbios type=11[,value=str][,path=filename]``
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,218 @@
|
||||
From 84fc607d678bd72397a41d706e91fa241fd97266 Mon Sep 17 00:00:00 2001
|
||||
From: Igor Mammedov <imammedo@redhat.com>
|
||||
Date: Wed, 21 Feb 2024 17:00:26 +0000
|
||||
Subject: [PATCH 04/20] Implement base of SMBIOS type 9 descriptor.
|
||||
|
||||
RH-Author: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS
|
||||
RH-Jira: RHEL-21705
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Acked-by: Ani Sinha <None>
|
||||
RH-Commit: [2/18] 2678cc080bfbf3357fa2f94ceaf42fc61b690d32
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-21705
|
||||
|
||||
commit: 735eee07d1f963635d3c3bf9f5e4bf1bc000870e
|
||||
Author: Felix Wu <flwu@google.com>
|
||||
|
||||
Version 2.1+.
|
||||
|
||||
Signed-off-by: Felix Wu <flwu@google.com>
|
||||
Signed-off-by: Nabih Estefan <nabihestefan@google.com>
|
||||
Message-Id: <20240221170027.1027325-2-nabihestefan@google.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
|
||||
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
|
||||
---
|
||||
hw/smbios/smbios.c | 99 ++++++++++++++++++++++++++++++++++++
|
||||
include/hw/firmware/smbios.h | 13 +++++
|
||||
qemu-options.hx | 3 ++
|
||||
3 files changed, 115 insertions(+)
|
||||
|
||||
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
|
||||
index 7bde23e59d..4f5637d445 100644
|
||||
--- a/hw/smbios/smbios.c
|
||||
+++ b/hw/smbios/smbios.c
|
||||
@@ -122,6 +122,16 @@ struct type8_instance {
|
||||
};
|
||||
static QTAILQ_HEAD(, type8_instance) type8 = QTAILQ_HEAD_INITIALIZER(type8);
|
||||
|
||||
+/* type 9 instance for parsing */
|
||||
+struct type9_instance {
|
||||
+ const char *slot_designation;
|
||||
+ uint8_t slot_type, slot_data_bus_width, current_usage, slot_length,
|
||||
+ slot_characteristics1, slot_characteristics2;
|
||||
+ uint16_t slot_id;
|
||||
+ QTAILQ_ENTRY(type9_instance) next;
|
||||
+};
|
||||
+static QTAILQ_HEAD(, type9_instance) type9 = QTAILQ_HEAD_INITIALIZER(type9);
|
||||
+
|
||||
static struct {
|
||||
size_t nvalues;
|
||||
char **values;
|
||||
@@ -371,6 +381,54 @@ static const QemuOptDesc qemu_smbios_type8_opts[] = {
|
||||
},
|
||||
};
|
||||
|
||||
+static const QemuOptDesc qemu_smbios_type9_opts[] = {
|
||||
+ {
|
||||
+ .name = "type",
|
||||
+ .type = QEMU_OPT_NUMBER,
|
||||
+ .help = "SMBIOS element type",
|
||||
+ },
|
||||
+ {
|
||||
+ .name = "slot_designation",
|
||||
+ .type = QEMU_OPT_STRING,
|
||||
+ .help = "string number for reference designation",
|
||||
+ },
|
||||
+ {
|
||||
+ .name = "slot_type",
|
||||
+ .type = QEMU_OPT_NUMBER,
|
||||
+ .help = "connector type",
|
||||
+ },
|
||||
+ {
|
||||
+ .name = "slot_data_bus_width",
|
||||
+ .type = QEMU_OPT_NUMBER,
|
||||
+ .help = "port type",
|
||||
+ },
|
||||
+ {
|
||||
+ .name = "current_usage",
|
||||
+ .type = QEMU_OPT_NUMBER,
|
||||
+ .help = "current usage",
|
||||
+ },
|
||||
+ {
|
||||
+ .name = "slot_length",
|
||||
+ .type = QEMU_OPT_NUMBER,
|
||||
+ .help = "system slot length",
|
||||
+ },
|
||||
+ {
|
||||
+ .name = "slot_id",
|
||||
+ .type = QEMU_OPT_NUMBER,
|
||||
+ .help = "system slot id",
|
||||
+ },
|
||||
+ {
|
||||
+ .name = "slot_characteristics1",
|
||||
+ .type = QEMU_OPT_NUMBER,
|
||||
+ .help = "slot characteristics1, see the spec",
|
||||
+ },
|
||||
+ {
|
||||
+ .name = "slot_characteristics2",
|
||||
+ .type = QEMU_OPT_NUMBER,
|
||||
+ .help = "slot characteristics2, see the spec",
|
||||
+ },
|
||||
+};
|
||||
+
|
||||
static const QemuOptDesc qemu_smbios_type11_opts[] = {
|
||||
{
|
||||
.name = "value",
|
||||
@@ -594,6 +652,7 @@ bool smbios_skip_table(uint8_t type, bool required_table)
|
||||
#define T2_BASE 0x200
|
||||
#define T3_BASE 0x300
|
||||
#define T4_BASE 0x400
|
||||
+#define T9_BASE 0x900
|
||||
#define T11_BASE 0xe00
|
||||
|
||||
#define T16_BASE 0x1000
|
||||
@@ -792,6 +851,28 @@ static void smbios_build_type_8_table(void)
|
||||
}
|
||||
}
|
||||
|
||||
+static void smbios_build_type_9_table(void)
|
||||
+{
|
||||
+ unsigned instance = 0;
|
||||
+ struct type9_instance *t9;
|
||||
+
|
||||
+ QTAILQ_FOREACH(t9, &type9, next) {
|
||||
+ SMBIOS_BUILD_TABLE_PRE(9, T9_BASE + instance, true);
|
||||
+
|
||||
+ SMBIOS_TABLE_SET_STR(9, slot_designation, t9->slot_designation);
|
||||
+ t->slot_type = t9->slot_type;
|
||||
+ t->slot_data_bus_width = t9->slot_data_bus_width;
|
||||
+ t->current_usage = t9->current_usage;
|
||||
+ t->slot_length = t9->slot_length;
|
||||
+ t->slot_id = t9->slot_id;
|
||||
+ t->slot_characteristics1 = t9->slot_characteristics1;
|
||||
+ t->slot_characteristics2 = t9->slot_characteristics2;
|
||||
+
|
||||
+ SMBIOS_BUILD_TABLE_POST;
|
||||
+ instance++;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void smbios_build_type_11_table(void)
|
||||
{
|
||||
char count_str[128];
|
||||
@@ -1141,6 +1222,7 @@ void smbios_get_tables(MachineState *ms,
|
||||
}
|
||||
|
||||
smbios_build_type_8_table();
|
||||
+ smbios_build_type_9_table();
|
||||
smbios_build_type_11_table();
|
||||
|
||||
#define MAX_DIMM_SZ (16 * GiB)
|
||||
@@ -1472,6 +1554,23 @@ void smbios_entry_add(QemuOpts *opts, Error **errp)
|
||||
t8_i->port_type = qemu_opt_get_number(opts, "port_type", 0);
|
||||
QTAILQ_INSERT_TAIL(&type8, t8_i, next);
|
||||
return;
|
||||
+ case 9: {
|
||||
+ if (!qemu_opts_validate(opts, qemu_smbios_type9_opts, errp)) {
|
||||
+ return;
|
||||
+ }
|
||||
+ struct type9_instance *t;
|
||||
+ t = g_new0(struct type9_instance, 1);
|
||||
+ save_opt(&t->slot_designation, opts, "slot_designation");
|
||||
+ t->slot_type = qemu_opt_get_number(opts, "slot_type", 0);
|
||||
+ t->slot_data_bus_width = qemu_opt_get_number(opts, "slot_data_bus_width", 0);
|
||||
+ t->current_usage = qemu_opt_get_number(opts, "current_usage", 0);
|
||||
+ t->slot_length = qemu_opt_get_number(opts, "slot_length", 0);
|
||||
+ t->slot_id = qemu_opt_get_number(opts, "slot_id", 0);
|
||||
+ t->slot_characteristics1 = qemu_opt_get_number(opts, "slot_characteristics1", 0);
|
||||
+ t->slot_characteristics2 = qemu_opt_get_number(opts, "slot_characteristics2", 0);
|
||||
+ QTAILQ_INSERT_TAIL(&type9, t, next);
|
||||
+ return;
|
||||
+ }
|
||||
case 11:
|
||||
if (!qemu_opts_validate(opts, qemu_smbios_type11_opts, errp)) {
|
||||
return;
|
||||
diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h
|
||||
index d24b3ccd32..6bbd5a4c20 100644
|
||||
--- a/include/hw/firmware/smbios.h
|
||||
+++ b/include/hw/firmware/smbios.h
|
||||
@@ -211,6 +211,19 @@ struct smbios_type_8 {
|
||||
uint8_t port_type;
|
||||
} QEMU_PACKED;
|
||||
|
||||
+/* SMBIOS type 9 - System Slots (v2.1+) */
|
||||
+struct smbios_type_9 {
|
||||
+ struct smbios_structure_header header;
|
||||
+ uint8_t slot_designation;
|
||||
+ uint8_t slot_type;
|
||||
+ uint8_t slot_data_bus_width;
|
||||
+ uint8_t current_usage;
|
||||
+ uint8_t slot_length;
|
||||
+ uint16_t slot_id;
|
||||
+ uint8_t slot_characteristics1;
|
||||
+ uint8_t slot_characteristics2;
|
||||
+} QEMU_PACKED;
|
||||
+
|
||||
/* SMBIOS type 11 - OEM strings */
|
||||
struct smbios_type_11 {
|
||||
struct smbios_structure_header header;
|
||||
diff --git a/qemu-options.hx b/qemu-options.hx
|
||||
index 0814f43066..94cacc2c63 100644
|
||||
--- a/qemu-options.hx
|
||||
+++ b/qemu-options.hx
|
||||
@@ -2710,6 +2710,9 @@ SRST
|
||||
``-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str][,asset=str][,part=str][,processor-id=%d]``
|
||||
Specify SMBIOS type 4 fields
|
||||
|
||||
+``-smbios type=9[,slot_designation=str][,slot_type=%d][,slot_data_bus_width=%d][,current_usage=%d][,slot_length=%d][,slot_id=%d][,slot_characteristics1=%d][,slot_characteristics12=%d]``
|
||||
+ Specify SMBIOS type 9 fields
|
||||
+
|
||||
``-smbios type=11[,value=str][,path=filename]``
|
||||
Specify SMBIOS type 11 fields
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,60 @@
|
||||
From 2e0e4355b2d4edb66b7d8c198339e17940abd682 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= <berrange@redhat.com>
|
||||
Date: Mon, 18 Mar 2024 13:03:19 +0000
|
||||
Subject: [PATCH 2/3] Revert "chardev/char-socket: Fix TLS io channels sending
|
||||
too much data to the backend"
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Daniel P. Berrangé <berrange@redhat.com>
|
||||
RH-MergeRequest: 233: Fix handling of TLS sessions in chardevs
|
||||
RH-Jira: RHEL-24614
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
RH-Commit: [2/3] 1cb3d72b86ced0f70a09dfa0d325ae8a85db1b2b (berrange/centos-src-qemu)
|
||||
|
||||
This commit results in unexpected termination of the TLS connection.
|
||||
When 'fd_can_read' returns 0, the code goes on to pass a zero length
|
||||
buffer to qio_channel_read. The TLS impl calls into gnutls_recv()
|
||||
with this zero length buffer, at which point GNUTLS returns an error
|
||||
GNUTLS_E_INVALID_REQUEST. This is treated as fatal by QEMU's TLS code
|
||||
resulting in the connection being torn down by the chardev.
|
||||
|
||||
Simply skipping the qio_channel_read when the buffer length is zero
|
||||
is also not satisfactory, as it results in a high CPU burn busy loop
|
||||
massively slowing QEMU's functionality.
|
||||
|
||||
The proper solution is to avoid tcp_chr_read being called at all
|
||||
unless the frontend is able to accept more data. This will be done
|
||||
in a followup commit.
|
||||
|
||||
This reverts commit 462945cd22d2bcd233401ed3aa167d83a8e35b05
|
||||
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
(cherry picked from commit e8ee827ffdb86ebbd5f5213a1f78123c25a90864)
|
||||
---
|
||||
chardev/char-socket.c | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
|
||||
index f48d341ebc..51d0943fce 100644
|
||||
--- a/chardev/char-socket.c
|
||||
+++ b/chardev/char-socket.c
|
||||
@@ -492,9 +492,9 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
|
||||
s->max_size <= 0) {
|
||||
return TRUE;
|
||||
}
|
||||
- len = tcp_chr_read_poll(opaque);
|
||||
- if (len > sizeof(buf)) {
|
||||
- len = sizeof(buf);
|
||||
+ len = sizeof(buf);
|
||||
+ if (len > s->max_size) {
|
||||
+ len = s->max_size;
|
||||
}
|
||||
size = tcp_chr_recv(chr, (void *)buf, len);
|
||||
if (size == 0 || (size == -1 && errno != EAGAIN)) {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,216 @@
|
||||
From ab5a33d57b48e35388928e388bb6e6479bc77651 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= <berrange@redhat.com>
|
||||
Date: Mon, 18 Mar 2024 17:08:30 +0000
|
||||
Subject: [PATCH 3/3] Revert "chardev: use a child source for qio input source"
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Daniel P. Berrangé <berrange@redhat.com>
|
||||
RH-MergeRequest: 233: Fix handling of TLS sessions in chardevs
|
||||
RH-Jira: RHEL-24614
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
RH-Commit: [3/3] b58e6c19c2b11d5d28db31cf1386226fb01d195e (berrange/centos-src-qemu)
|
||||
|
||||
This reverts commit a7077b8e354d90fec26c2921aa2dea85b90dff90,
|
||||
and add comments to explain why child sources cannot be used.
|
||||
|
||||
When a GSource is added as a child of another GSource, if its
|
||||
'prepare' function indicates readiness, then the parent's
|
||||
'prepare' function will never be run. The io_watch_poll_prepare
|
||||
absolutely *must* be run on every iteration of the main loop,
|
||||
to ensure that the chardev backend doesn't feed data to the
|
||||
frontend that it is unable to consume.
|
||||
|
||||
At the time a7077b8e354d90fec26c2921aa2dea85b90dff90 was made,
|
||||
all the child GSource impls were relying on poll'ing an FD,
|
||||
so their 'prepare' functions would never indicate readiness
|
||||
ahead of poll() being invoked. So the buggy behaviour was
|
||||
not noticed and lay dormant.
|
||||
|
||||
Relatively recently the QIOChannelTLS impl introduced a
|
||||
level 2 child GSource, which checks with GNUTLS whether it
|
||||
has cached any data that was decoded but not yet consumed:
|
||||
|
||||
commit ffda5db65aef42266a5053a4be34515106c4c7ee
|
||||
Author: Antoine Damhet <antoine.damhet@shadow.tech>
|
||||
Date: Tue Nov 15 15:23:29 2022 +0100
|
||||
|
||||
io/channel-tls: fix handling of bigger read buffers
|
||||
|
||||
Since the TLS backend can read more data from the underlying QIOChannel
|
||||
we introduce a minimal child GSource to notify if we still have more
|
||||
data available to be read.
|
||||
|
||||
Signed-off-by: Antoine Damhet <antoine.damhet@shadow.tech>
|
||||
Signed-off-by: Charles Frey <charles.frey@shadow.tech>
|
||||
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
|
||||
With this, it is now quite common for the 'prepare' function
|
||||
on a QIOChannelTLS GSource to indicate immediate readiness,
|
||||
bypassing the parent GSource 'prepare' function. IOW, the
|
||||
critical 'io_watch_poll_prepare' is being skipped on some
|
||||
iterations of the main loop. As a result chardev frontend
|
||||
asserts are now being triggered as they are fed data they
|
||||
are not ready to consume.
|
||||
|
||||
A reproducer is as follows:
|
||||
|
||||
* In terminal 1 run a GNUTLS *echo* server
|
||||
|
||||
$ gnutls-serv --echo \
|
||||
--x509cafile ca-cert.pem \
|
||||
--x509keyfile server-key.pem \
|
||||
--x509certfile server-cert.pem \
|
||||
-p 9000
|
||||
|
||||
* In terminal 2 run a QEMU guest
|
||||
|
||||
$ qemu-system-s390x \
|
||||
-nodefaults \
|
||||
-display none \
|
||||
-object tls-creds-x509,id=tls0,dir=$PWD,endpoint=client \
|
||||
-chardev socket,id=con0,host=localhost,port=9000,tls-creds=tls0 \
|
||||
-device sclpconsole,chardev=con0 \
|
||||
-hda Fedora-Cloud-Base-39-1.5.s390x.qcow2
|
||||
|
||||
After the previous patch revert, but before this patch revert,
|
||||
this scenario will crash:
|
||||
|
||||
qemu-system-s390x: ../hw/char/sclpconsole.c:73: chr_read: Assertion
|
||||
`size <= SIZE_BUFFER_VT220 - scon->iov_data_len' failed.
|
||||
|
||||
This assert indicates that 'tcp_chr_read' was called without
|
||||
'tcp_chr_read_poll' having first been checked for ability to
|
||||
receive more data
|
||||
|
||||
QEMU's use of a 'prepare' function to create/delete another
|
||||
GSource is rather a hack and not normally the kind of thing that
|
||||
is expected to be done by a GSource. There is no mechanism to
|
||||
force GLib to always run the 'prepare' function of a parent
|
||||
GSource. The best option is to simply not use the child source
|
||||
concept, and go back to the functional approach previously
|
||||
relied on.
|
||||
|
||||
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Tested-by: Thomas Huth <thuth@redhat.com>
|
||||
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
(cherry picked from commit 038b4217884c6f297278bb1ec6f0463c6c8221de)
|
||||
---
|
||||
chardev/char-io.c | 56 ++++++++++++++++++++++++++++++++++++++++++-----
|
||||
1 file changed, 51 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/chardev/char-io.c b/chardev/char-io.c
|
||||
index 4451128cba..dab77b112e 100644
|
||||
--- a/chardev/char-io.c
|
||||
+++ b/chardev/char-io.c
|
||||
@@ -33,6 +33,7 @@ typedef struct IOWatchPoll {
|
||||
IOCanReadHandler *fd_can_read;
|
||||
GSourceFunc fd_read;
|
||||
void *opaque;
|
||||
+ GMainContext *context;
|
||||
} IOWatchPoll;
|
||||
|
||||
static IOWatchPoll *io_watch_poll_from_source(GSource *source)
|
||||
@@ -50,28 +51,59 @@ static gboolean io_watch_poll_prepare(GSource *source,
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * We do not register the QIOChannel watch as a child GSource.
|
||||
+ * The 'prepare' function on the parent GSource will be
|
||||
+ * skipped if a child GSource's 'prepare' function indicates
|
||||
+ * readiness. We need this prepare function be guaranteed
|
||||
+ * to run on *every* iteration of the main loop, because
|
||||
+ * it is critical to ensure we remove the QIOChannel watch
|
||||
+ * if 'fd_can_read' indicates the frontend cannot receive
|
||||
+ * more data.
|
||||
+ */
|
||||
if (now_active) {
|
||||
iwp->src = qio_channel_create_watch(
|
||||
iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL);
|
||||
g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL);
|
||||
- g_source_add_child_source(source, iwp->src);
|
||||
- g_source_unref(iwp->src);
|
||||
+ g_source_attach(iwp->src, iwp->context);
|
||||
} else {
|
||||
- g_source_remove_child_source(source, iwp->src);
|
||||
+ g_source_destroy(iwp->src);
|
||||
+ g_source_unref(iwp->src);
|
||||
iwp->src = NULL;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
+static gboolean io_watch_poll_check(GSource *source)
|
||||
+{
|
||||
+ return FALSE;
|
||||
+}
|
||||
+
|
||||
static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback,
|
||||
gpointer user_data)
|
||||
{
|
||||
- return G_SOURCE_CONTINUE;
|
||||
+ abort();
|
||||
+}
|
||||
+
|
||||
+static void io_watch_poll_finalize(GSource *source)
|
||||
+{
|
||||
+ /*
|
||||
+ * Due to a glib bug, removing the last reference to a source
|
||||
+ * inside a finalize callback causes recursive locking (and a
|
||||
+ * deadlock). This is not a problem inside other callbacks,
|
||||
+ * including dispatch callbacks, so we call io_remove_watch_poll
|
||||
+ * to remove this source. At this point, iwp->src must
|
||||
+ * be NULL, or we would leak it.
|
||||
+ */
|
||||
+ IOWatchPoll *iwp = io_watch_poll_from_source(source);
|
||||
+ assert(iwp->src == NULL);
|
||||
}
|
||||
|
||||
static GSourceFuncs io_watch_poll_funcs = {
|
||||
.prepare = io_watch_poll_prepare,
|
||||
+ .check = io_watch_poll_check,
|
||||
.dispatch = io_watch_poll_dispatch,
|
||||
+ .finalize = io_watch_poll_finalize,
|
||||
};
|
||||
|
||||
GSource *io_add_watch_poll(Chardev *chr,
|
||||
@@ -91,6 +123,7 @@ GSource *io_add_watch_poll(Chardev *chr,
|
||||
iwp->ioc = ioc;
|
||||
iwp->fd_read = (GSourceFunc) fd_read;
|
||||
iwp->src = NULL;
|
||||
+ iwp->context = context;
|
||||
|
||||
name = g_strdup_printf("chardev-iowatch-%s", chr->label);
|
||||
g_source_set_name((GSource *)iwp, name);
|
||||
@@ -101,10 +134,23 @@ GSource *io_add_watch_poll(Chardev *chr,
|
||||
return (GSource *)iwp;
|
||||
}
|
||||
|
||||
+static void io_remove_watch_poll(GSource *source)
|
||||
+{
|
||||
+ IOWatchPoll *iwp;
|
||||
+
|
||||
+ iwp = io_watch_poll_from_source(source);
|
||||
+ if (iwp->src) {
|
||||
+ g_source_destroy(iwp->src);
|
||||
+ g_source_unref(iwp->src);
|
||||
+ iwp->src = NULL;
|
||||
+ }
|
||||
+ g_source_destroy(&iwp->parent);
|
||||
+}
|
||||
+
|
||||
void remove_fd_in_watch(Chardev *chr)
|
||||
{
|
||||
if (chr->gsource) {
|
||||
- g_source_destroy(chr->gsource);
|
||||
+ io_remove_watch_poll(chr->gsource);
|
||||
chr->gsource = NULL;
|
||||
}
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,84 +0,0 @@
|
||||
From 61256a82ce78f40222455becb8850b5f5ebb5d72 Mon Sep 17 00:00:00 2001
|
||||
From: Igor Mammedov <imammedo@redhat.com>
|
||||
Date: Tue, 18 Apr 2023 11:04:49 +0200
|
||||
Subject: [PATCH 1/3] acpi: pcihp: allow repeating hot-unplug requests
|
||||
|
||||
RH-Author: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-MergeRequest: 159: acpi: pcihp: allow repeating hot-unplug requests
|
||||
RH-Bugzilla: 2087047
|
||||
RH-Acked-by: Ani Sinha <None>
|
||||
RH-Acked-by: Julia Suvorova <None>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Commit: [1/1] 9c597232466b27d91f127ee6004322d6ba69755f (imammedo/qemu-kvm-c-9-s-imam)
|
||||
|
||||
with Q35 using ACPI PCI hotplug by default, user's request to unplug
|
||||
device is ignored when it's issued before guest OS has been booted.
|
||||
And any additional attempt to request device hot-unplug afterwards
|
||||
results in following error:
|
||||
|
||||
"Device XYZ is already in the process of unplug"
|
||||
|
||||
arguably it can be considered as a regression introduced by [2],
|
||||
before which it was possible to issue unplug request multiple
|
||||
times.
|
||||
|
||||
Accept new uplug requests after timeout (1ms). This brings ACPI PCI
|
||||
hotplug on par with native PCIe unplug behavior [1] and allows user
|
||||
to repeat unplug requests at propper times.
|
||||
Set expire timeout to arbitrary 1msec so user won't be able to
|
||||
flood guest with SCI interrupts by calling device_del in tight loop.
|
||||
|
||||
PS:
|
||||
ACPI spec doesn't mandate what OSPM can do with GPEx.status
|
||||
bits set before it's booted => it's impl. depended.
|
||||
Status bits may be retained (I tested with one Windows version)
|
||||
or cleared (Linux since 2.6 kernel times) during guest's ACPI
|
||||
subsystem initialization.
|
||||
Clearing status bits (though not wrong per se) hides the unplug
|
||||
event from guest, and it's upto user to repeat device_del later
|
||||
when guest is able to handle unplug requests.
|
||||
|
||||
1) 18416c62e3 ("pcie: expire pending delete")
|
||||
2)
|
||||
Fixes: cce8944cc9ef ("qdev-monitor: Forbid repeated device_del")
|
||||
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
|
||||
CC: mst@redhat.com
|
||||
CC: anisinha@redhat.com
|
||||
CC: jusual@redhat.com
|
||||
CC: kraxel@redhat.com
|
||||
Message-Id: <20230418090449.2155757-1-imammedo@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Reviewed-by: Ani Sinha <anisinha@redhat.com>
|
||||
(cherry picked from commit 0f689cf5ada4d5df5ab95c7f7aa9fc221afa855d)
|
||||
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
|
||||
---
|
||||
hw/acpi/pcihp.c | 10 ++++++++++
|
||||
1 file changed, 10 insertions(+)
|
||||
|
||||
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
|
||||
index dcfb779a7a..cdd6f775a1 100644
|
||||
--- a/hw/acpi/pcihp.c
|
||||
+++ b/hw/acpi/pcihp.c
|
||||
@@ -357,6 +357,16 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev,
|
||||
* acpi_pcihp_eject_slot() when the operation is completed.
|
||||
*/
|
||||
pdev->qdev.pending_deleted_event = true;
|
||||
+ /* if unplug was requested before OSPM is initialized,
|
||||
+ * linux kernel will clear GPE0.sts[] bits during boot, which effectively
|
||||
+ * hides unplug event. And than followup qmp_device_del() calls remain
|
||||
+ * blocked by above flag permanently.
|
||||
+ * Unblock qmp_device_del() by setting expire limit, so user can
|
||||
+ * repeat unplug request later when OSPM has been booted.
|
||||
+ */
|
||||
+ pdev->qdev.pending_deleted_expires_ms =
|
||||
+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); /* 1 msec */
|
||||
+
|
||||
s->acpi_pcihp_pci_status[bsel].down |= (1U << slot);
|
||||
acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS);
|
||||
}
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,60 @@
|
||||
From 6b5cfed21e20b372090046a934387255ff4bda58 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 5 Dec 2023 13:20:01 -0500
|
||||
Subject: [PATCH 084/101] aio: make aio_context_acquire()/aio_context_release()
|
||||
a no-op
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [15/26] 723dcada900aaf08862e8221921be22506b561a8 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
aio_context_acquire()/aio_context_release() has been replaced by
|
||||
fine-grained locking to protect state shared by multiple threads. The
|
||||
AioContext lock still plays the role of balancing locking in
|
||||
AIO_WAIT_WHILE() and many functions in QEMU either require that the
|
||||
AioContext lock is held or not held for this reason. In other words, the
|
||||
AioContext lock is purely there for consistency with itself and serves
|
||||
no real purpose anymore.
|
||||
|
||||
Stop actually acquiring/releasing the lock in
|
||||
aio_context_acquire()/aio_context_release() so that subsequent patches
|
||||
can remove callers across the codebase incrementally.
|
||||
|
||||
I have performed "make check" and qemu-iotests stress tests across
|
||||
x86-64, ppc64le, and aarch64 to confirm that there are no failures as a
|
||||
result of eliminating the lock.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-ID: <20231205182011.1976568-5-stefanha@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
util/async.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index 8f90ddc304..04ee83d220 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -725,12 +725,12 @@ void aio_context_unref(AioContext *ctx)
|
||||
|
||||
void aio_context_acquire(AioContext *ctx)
|
||||
{
|
||||
- qemu_rec_mutex_lock(&ctx->lock);
|
||||
+ /* TODO remove this function */
|
||||
}
|
||||
|
||||
void aio_context_release(AioContext *ctx)
|
||||
{
|
||||
- qemu_rec_mutex_unlock(&ctx->lock);
|
||||
+ /* TODO remove this function */
|
||||
}
|
||||
|
||||
QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext)
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,102 @@
|
||||
From 14913d8970090c8914dc19dad14f3b9f91985ec3 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 5 Dec 2023 13:20:07 -0500
|
||||
Subject: [PATCH 090/101] aio: remove
|
||||
aio_context_acquire()/aio_context_release() API
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [21/26] 4b6d4afcac79d3248a6722b063b5fc777dc418df (kmwolf/centos-qemu-kvm)
|
||||
|
||||
Delete these functions because nothing calls these functions anymore.
|
||||
|
||||
I introduced these APIs in commit 98563fc3ec44 ("aio: add
|
||||
aio_context_acquire() and aio_context_release()") in 2014. It's with a
|
||||
sigh of relief that I delete these APIs almost 10 years later.
|
||||
|
||||
Thanks to Paolo Bonzini's vision for multi-queue QEMU, we got an
|
||||
understanding of where the code needed to go in order to remove the
|
||||
limitations that the original dataplane and the IOThread/AioContext
|
||||
approach that followed it.
|
||||
|
||||
Emanuele Giuseppe Esposito had the splendid determination to convert
|
||||
large parts of the codebase so that they no longer needed the AioContext
|
||||
lock. This was a painstaking process, both in the actual code changes
|
||||
required and the iterations of code review that Emanuele eked out of
|
||||
Kevin and me over many months.
|
||||
|
||||
Kevin Wolf tackled multitudes of graph locking conversions to protect
|
||||
in-flight I/O from run-time changes to the block graph as well as the
|
||||
clang Thread Safety Analysis annotations that allow the compiler to
|
||||
check whether the graph lock is being used correctly.
|
||||
|
||||
And me, well, I'm just here to add some pizzazz to the QEMU multi-queue
|
||||
block layer :). Thank you to everyone who helped with this effort,
|
||||
including Eric Blake, code reviewer extraordinaire, and others who I've
|
||||
forgotten to mention.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20231205182011.1976568-11-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
include/block/aio.h | 17 -----------------
|
||||
util/async.c | 10 ----------
|
||||
2 files changed, 27 deletions(-)
|
||||
|
||||
diff --git a/include/block/aio.h b/include/block/aio.h
|
||||
index f08b358077..af05512a7d 100644
|
||||
--- a/include/block/aio.h
|
||||
+++ b/include/block/aio.h
|
||||
@@ -278,23 +278,6 @@ void aio_context_ref(AioContext *ctx);
|
||||
*/
|
||||
void aio_context_unref(AioContext *ctx);
|
||||
|
||||
-/* Take ownership of the AioContext. If the AioContext will be shared between
|
||||
- * threads, and a thread does not want to be interrupted, it will have to
|
||||
- * take ownership around calls to aio_poll(). Otherwise, aio_poll()
|
||||
- * automatically takes care of calling aio_context_acquire and
|
||||
- * aio_context_release.
|
||||
- *
|
||||
- * Note that this is separate from bdrv_drained_begin/bdrv_drained_end. A
|
||||
- * thread still has to call those to avoid being interrupted by the guest.
|
||||
- *
|
||||
- * Bottom halves, timers and callbacks can be created or removed without
|
||||
- * acquiring the AioContext.
|
||||
- */
|
||||
-void aio_context_acquire(AioContext *ctx);
|
||||
-
|
||||
-/* Relinquish ownership of the AioContext. */
|
||||
-void aio_context_release(AioContext *ctx);
|
||||
-
|
||||
/**
|
||||
* aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will
|
||||
* run only once and as soon as possible.
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index dfd44ef612..460529057c 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -719,16 +719,6 @@ void aio_context_unref(AioContext *ctx)
|
||||
g_source_unref(&ctx->source);
|
||||
}
|
||||
|
||||
-void aio_context_acquire(AioContext *ctx)
|
||||
-{
|
||||
- /* TODO remove this function */
|
||||
-}
|
||||
-
|
||||
-void aio_context_release(AioContext *ctx)
|
||||
-{
|
||||
- /* TODO remove this function */
|
||||
-}
|
||||
-
|
||||
QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext)
|
||||
|
||||
AioContext *qemu_get_current_aio_context(void)
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,81 @@
|
||||
From e1e2f3972065c4b5d6fcf37e0e1c4fb92a0d5260 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 5 Dec 2023 13:20:06 -0500
|
||||
Subject: [PATCH 089/101] aio-wait: draw equivalence between AIO_WAIT_WHILE()
|
||||
and AIO_WAIT_WHILE_UNLOCKED()
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [20/26] 20e49777869714c99769263103f1b0c2c370cfcd (kmwolf/centos-qemu-kvm)
|
||||
|
||||
Now that the AioContext lock no longer exists, AIO_WAIT_WHILE() and
|
||||
AIO_WAIT_WHILE_UNLOCKED() are equivalent.
|
||||
|
||||
A future patch will get rid of AIO_WAIT_WHILE_UNLOCKED().
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20231205182011.1976568-10-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
include/block/aio-wait.h | 16 ++++------------
|
||||
1 file changed, 4 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
|
||||
index 5449b6d742..157f105916 100644
|
||||
--- a/include/block/aio-wait.h
|
||||
+++ b/include/block/aio-wait.h
|
||||
@@ -63,9 +63,6 @@ extern AioWait global_aio_wait;
|
||||
* @ctx: the aio context, or NULL if multiple aio contexts (for which the
|
||||
* caller does not hold a lock) are involved in the polling condition.
|
||||
* @cond: wait while this conditional expression is true
|
||||
- * @unlock: whether to unlock and then lock again @ctx. This applies
|
||||
- * only when waiting for another AioContext from the main loop.
|
||||
- * Otherwise it's ignored.
|
||||
*
|
||||
* Wait while a condition is true. Use this to implement synchronous
|
||||
* operations that require event loop activity.
|
||||
@@ -78,7 +75,7 @@ extern AioWait global_aio_wait;
|
||||
* wait on conditions between two IOThreads since that could lead to deadlock,
|
||||
* go via the main loop instead.
|
||||
*/
|
||||
-#define AIO_WAIT_WHILE_INTERNAL(ctx, cond, unlock) ({ \
|
||||
+#define AIO_WAIT_WHILE_INTERNAL(ctx, cond) ({ \
|
||||
bool waited_ = false; \
|
||||
AioWait *wait_ = &global_aio_wait; \
|
||||
AioContext *ctx_ = (ctx); \
|
||||
@@ -95,13 +92,7 @@ extern AioWait global_aio_wait;
|
||||
assert(qemu_get_current_aio_context() == \
|
||||
qemu_get_aio_context()); \
|
||||
while ((cond)) { \
|
||||
- if (unlock && ctx_) { \
|
||||
- aio_context_release(ctx_); \
|
||||
- } \
|
||||
aio_poll(qemu_get_aio_context(), true); \
|
||||
- if (unlock && ctx_) { \
|
||||
- aio_context_acquire(ctx_); \
|
||||
- } \
|
||||
waited_ = true; \
|
||||
} \
|
||||
} \
|
||||
@@ -109,10 +100,11 @@ extern AioWait global_aio_wait;
|
||||
waited_; })
|
||||
|
||||
#define AIO_WAIT_WHILE(ctx, cond) \
|
||||
- AIO_WAIT_WHILE_INTERNAL(ctx, cond, true)
|
||||
+ AIO_WAIT_WHILE_INTERNAL(ctx, cond)
|
||||
|
||||
+/* TODO replace this with AIO_WAIT_WHILE() in a future patch */
|
||||
#define AIO_WAIT_WHILE_UNLOCKED(ctx, cond) \
|
||||
- AIO_WAIT_WHILE_INTERNAL(ctx, cond, false)
|
||||
+ AIO_WAIT_WHILE_INTERNAL(ctx, cond)
|
||||
|
||||
/**
|
||||
* aio_wait_kick:
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,55 +0,0 @@
|
||||
From 5beea8b889a38aa59259679d7f1ba050f09eb0f0 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 12/21] apic: disable reentrancy detection for apic-msi
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [8/13] 329f3b1c02fc42d85c821dd14c70e6b885cf849a (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit 50795ee051a342c681a9b45671c552fbd6274db8
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:13 2023 -0400
|
||||
|
||||
apic: disable reentrancy detection for apic-msi
|
||||
|
||||
As the code is designed for re-entrant calls to apic-msi, mark apic-msi
|
||||
as reentrancy-safe.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Message-Id: <20230427211013.2994127-9-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/intc/apic.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
|
||||
index 20b5a94073..ac3d47d231 100644
|
||||
--- a/hw/intc/apic.c
|
||||
+++ b/hw/intc/apic.c
|
||||
@@ -885,6 +885,13 @@ static void apic_realize(DeviceState *dev, Error **errp)
|
||||
memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi",
|
||||
APIC_SPACE_SIZE);
|
||||
|
||||
+ /*
|
||||
+ * apic-msi's apic_mem_write can call into ioapic_eoi_broadcast, which can
|
||||
+ * write back to apic-msi. As such mark the apic-msi region re-entrancy
|
||||
+ * safe.
|
||||
+ */
|
||||
+ s->io_memory.disable_reentrancy_guard = true;
|
||||
+
|
||||
s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s);
|
||||
local_apics[s->id] = s;
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,231 +0,0 @@
|
||||
From f6db359f543723e2eb840653d35004af357ea5ac Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 06/21] async: Add an optional reentrancy guard to the BH API
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/13] 009a9a68c1c25b9ad0cd9bc0d73b3e07bee2a19d (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit 9c86c97f12c060bf7484dd931f38634e166a81f0
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:07 2023 -0400
|
||||
|
||||
async: Add an optional reentrancy guard to the BH API
|
||||
|
||||
Devices can pass their MemoryReentrancyGuard (from their DeviceState),
|
||||
when creating new BHes. Then, the async API will toggle the guard
|
||||
before/after calling the BH call-back. This prevents bh->mmio reentrancy
|
||||
issues.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Message-Id: <20230427211013.2994127-3-alxndr@bu.edu>
|
||||
[thuth: Fix "line over 90 characters" checkpatch.pl error]
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
docs/devel/multiple-iothreads.txt | 7 +++++++
|
||||
include/block/aio.h | 18 ++++++++++++++++--
|
||||
include/qemu/main-loop.h | 7 +++++--
|
||||
tests/unit/ptimer-test-stubs.c | 3 ++-
|
||||
util/async.c | 18 +++++++++++++++++-
|
||||
util/main-loop.c | 6 ++++--
|
||||
util/trace-events | 1 +
|
||||
7 files changed, 52 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt
|
||||
index 343120f2ef..a3e949f6b3 100644
|
||||
--- a/docs/devel/multiple-iothreads.txt
|
||||
+++ b/docs/devel/multiple-iothreads.txt
|
||||
@@ -61,6 +61,7 @@ There are several old APIs that use the main loop AioContext:
|
||||
* LEGACY qemu_aio_set_event_notifier() - monitor an event notifier
|
||||
* LEGACY timer_new_ms() - create a timer
|
||||
* LEGACY qemu_bh_new() - create a BH
|
||||
+ * LEGACY qemu_bh_new_guarded() - create a BH with a device re-entrancy guard
|
||||
* LEGACY qemu_aio_wait() - run an event loop iteration
|
||||
|
||||
Since they implicitly work on the main loop they cannot be used in code that
|
||||
@@ -72,8 +73,14 @@ Instead, use the AioContext functions directly (see include/block/aio.h):
|
||||
* aio_set_event_notifier() - monitor an event notifier
|
||||
* aio_timer_new() - create a timer
|
||||
* aio_bh_new() - create a BH
|
||||
+ * aio_bh_new_guarded() - create a BH with a device re-entrancy guard
|
||||
* aio_poll() - run an event loop iteration
|
||||
|
||||
+The qemu_bh_new_guarded/aio_bh_new_guarded APIs accept a "MemReentrancyGuard"
|
||||
+argument, which is used to check for and prevent re-entrancy problems. For
|
||||
+BHs associated with devices, the reentrancy-guard is contained in the
|
||||
+corresponding DeviceState and named "mem_reentrancy_guard".
|
||||
+
|
||||
The AioContext can be obtained from the IOThread using
|
||||
iothread_get_aio_context() or for the main loop using qemu_get_aio_context().
|
||||
Code that takes an AioContext argument works both in IOThreads or the main
|
||||
diff --git a/include/block/aio.h b/include/block/aio.h
|
||||
index 543717f294..db6f23c619 100644
|
||||
--- a/include/block/aio.h
|
||||
+++ b/include/block/aio.h
|
||||
@@ -23,6 +23,8 @@
|
||||
#include "qemu/thread.h"
|
||||
#include "qemu/timer.h"
|
||||
#include "block/graph-lock.h"
|
||||
+#include "hw/qdev-core.h"
|
||||
+
|
||||
|
||||
typedef struct BlockAIOCB BlockAIOCB;
|
||||
typedef void BlockCompletionFunc(void *opaque, int ret);
|
||||
@@ -331,9 +333,11 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
* is opaque and must be allocated prior to its use.
|
||||
*
|
||||
* @name: A human-readable identifier for debugging purposes.
|
||||
+ * @reentrancy_guard: A guard set when entering a cb to prevent
|
||||
+ * device-reentrancy issues
|
||||
*/
|
||||
QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
- const char *name);
|
||||
+ const char *name, MemReentrancyGuard *reentrancy_guard);
|
||||
|
||||
/**
|
||||
* aio_bh_new: Allocate a new bottom half structure
|
||||
@@ -342,7 +346,17 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
* string.
|
||||
*/
|
||||
#define aio_bh_new(ctx, cb, opaque) \
|
||||
- aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)))
|
||||
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL)
|
||||
+
|
||||
+/**
|
||||
+ * aio_bh_new_guarded: Allocate a new bottom half structure with a
|
||||
+ * reentrancy_guard
|
||||
+ *
|
||||
+ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name
|
||||
+ * string.
|
||||
+ */
|
||||
+#define aio_bh_new_guarded(ctx, cb, opaque, guard) \
|
||||
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard)
|
||||
|
||||
/**
|
||||
* aio_notify: Force processing of pending events.
|
||||
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
|
||||
index b3e54e00bc..68e70e61aa 100644
|
||||
--- a/include/qemu/main-loop.h
|
||||
+++ b/include/qemu/main-loop.h
|
||||
@@ -387,9 +387,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms);
|
||||
|
||||
/* internal interfaces */
|
||||
|
||||
+#define qemu_bh_new_guarded(cb, opaque, guard) \
|
||||
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard)
|
||||
#define qemu_bh_new(cb, opaque) \
|
||||
- qemu_bh_new_full((cb), (opaque), (stringify(cb)))
|
||||
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name);
|
||||
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL)
|
||||
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
|
||||
+ MemReentrancyGuard *reentrancy_guard);
|
||||
void qemu_bh_schedule_idle(QEMUBH *bh);
|
||||
|
||||
enum {
|
||||
diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c
|
||||
index f2bfcede93..8c9407c560 100644
|
||||
--- a/tests/unit/ptimer-test-stubs.c
|
||||
+++ b/tests/unit/ptimer-test-stubs.c
|
||||
@@ -107,7 +107,8 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
|
||||
return deadline;
|
||||
}
|
||||
|
||||
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
|
||||
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
|
||||
+ MemReentrancyGuard *reentrancy_guard)
|
||||
{
|
||||
QEMUBH *bh = g_new(QEMUBH, 1);
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index 21016a1ac7..a9b528c370 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -65,6 +65,7 @@ struct QEMUBH {
|
||||
void *opaque;
|
||||
QSLIST_ENTRY(QEMUBH) next;
|
||||
unsigned flags;
|
||||
+ MemReentrancyGuard *reentrancy_guard;
|
||||
};
|
||||
|
||||
/* Called concurrently from any thread */
|
||||
@@ -137,7 +138,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb,
|
||||
}
|
||||
|
||||
QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
- const char *name)
|
||||
+ const char *name, MemReentrancyGuard *reentrancy_guard)
|
||||
{
|
||||
QEMUBH *bh;
|
||||
bh = g_new(QEMUBH, 1);
|
||||
@@ -146,13 +147,28 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
.cb = cb,
|
||||
.opaque = opaque,
|
||||
.name = name,
|
||||
+ .reentrancy_guard = reentrancy_guard,
|
||||
};
|
||||
return bh;
|
||||
}
|
||||
|
||||
void aio_bh_call(QEMUBH *bh)
|
||||
{
|
||||
+ bool last_engaged_in_io = false;
|
||||
+
|
||||
+ if (bh->reentrancy_guard) {
|
||||
+ last_engaged_in_io = bh->reentrancy_guard->engaged_in_io;
|
||||
+ if (bh->reentrancy_guard->engaged_in_io) {
|
||||
+ trace_reentrant_aio(bh->ctx, bh->name);
|
||||
+ }
|
||||
+ bh->reentrancy_guard->engaged_in_io = true;
|
||||
+ }
|
||||
+
|
||||
bh->cb(bh->opaque);
|
||||
+
|
||||
+ if (bh->reentrancy_guard) {
|
||||
+ bh->reentrancy_guard->engaged_in_io = last_engaged_in_io;
|
||||
+ }
|
||||
}
|
||||
|
||||
/* Multiple occurrences of aio_bh_poll cannot be called concurrently. */
|
||||
diff --git a/util/main-loop.c b/util/main-loop.c
|
||||
index e180c85145..7022f02ef8 100644
|
||||
--- a/util/main-loop.c
|
||||
+++ b/util/main-loop.c
|
||||
@@ -605,9 +605,11 @@ void main_loop_wait(int nonblocking)
|
||||
|
||||
/* Functions to operate on the main QEMU AioContext. */
|
||||
|
||||
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
|
||||
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
|
||||
+ MemReentrancyGuard *reentrancy_guard)
|
||||
{
|
||||
- return aio_bh_new_full(qemu_aio_context, cb, opaque, name);
|
||||
+ return aio_bh_new_full(qemu_aio_context, cb, opaque, name,
|
||||
+ reentrancy_guard);
|
||||
}
|
||||
|
||||
/*
|
||||
diff --git a/util/trace-events b/util/trace-events
|
||||
index 16f78d8fe5..3f7e766683 100644
|
||||
--- a/util/trace-events
|
||||
+++ b/util/trace-events
|
||||
@@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d"
|
||||
# async.c
|
||||
aio_co_schedule(void *ctx, void *co) "ctx %p co %p"
|
||||
aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p"
|
||||
+reentrant_aio(void *ctx, const char *name) "ctx %p name %s"
|
||||
|
||||
# thread-pool.c
|
||||
thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,70 +0,0 @@
|
||||
From 137e84f68da06666ebf7f391766cc6209ce1c39c Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 13/21] async: avoid use-after-free on re-entrancy guard
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [9/13] d4b957108aaacf4a597122aaeeaa8e56985f1fca (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit 7915bd06f25e1803778081161bf6fa10c42dc7cd
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Mon May 1 10:19:56 2023 -0400
|
||||
|
||||
async: avoid use-after-free on re-entrancy guard
|
||||
|
||||
A BH callback can free the BH, causing a use-after-free in aio_bh_call.
|
||||
Fix that by keeping a local copy of the re-entrancy guard pointer.
|
||||
|
||||
Buglink: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58513
|
||||
Fixes: 9c86c97f12 ("async: Add an optional reentrancy guard to the BH API")
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Message-Id: <20230501141956.3444868-1-alxndr@bu.edu>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
util/async.c | 14 ++++++++------
|
||||
1 file changed, 8 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index a9b528c370..cd1a1815f9 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -156,18 +156,20 @@ void aio_bh_call(QEMUBH *bh)
|
||||
{
|
||||
bool last_engaged_in_io = false;
|
||||
|
||||
- if (bh->reentrancy_guard) {
|
||||
- last_engaged_in_io = bh->reentrancy_guard->engaged_in_io;
|
||||
- if (bh->reentrancy_guard->engaged_in_io) {
|
||||
+ /* Make a copy of the guard-pointer as cb may free the bh */
|
||||
+ MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard;
|
||||
+ if (reentrancy_guard) {
|
||||
+ last_engaged_in_io = reentrancy_guard->engaged_in_io;
|
||||
+ if (reentrancy_guard->engaged_in_io) {
|
||||
trace_reentrant_aio(bh->ctx, bh->name);
|
||||
}
|
||||
- bh->reentrancy_guard->engaged_in_io = true;
|
||||
+ reentrancy_guard->engaged_in_io = true;
|
||||
}
|
||||
|
||||
bh->cb(bh->opaque);
|
||||
|
||||
- if (bh->reentrancy_guard) {
|
||||
- bh->reentrancy_guard->engaged_in_io = last_engaged_in_io;
|
||||
+ if (reentrancy_guard) {
|
||||
+ reentrancy_guard->engaged_in_io = last_engaged_in_io;
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,476 @@
|
||||
From 0d8255c98b3ef6f603ff0279592d3e91de26de0e Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Tue, 21 Nov 2023 16:44:00 +0800
|
||||
Subject: [PATCH 021/101] backends/iommufd: Introduce the iommufd object
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [20/67] 8a56344ab4a2126f248bfa492ccddd19265f39be (eauger1/centos-qemu-kvm)
|
||||
|
||||
Introduce an iommufd object which allows the interaction
|
||||
with the host /dev/iommu device.
|
||||
|
||||
The /dev/iommu can have been already pre-opened outside of qemu,
|
||||
in which case the fd can be passed directly along with the
|
||||
iommufd object:
|
||||
|
||||
This allows the iommufd object to be shared accross several
|
||||
subsystems (VFIO, VDPA, ...). For example, libvirt would open
|
||||
the /dev/iommu once.
|
||||
|
||||
If no fd is passed along with the iommufd object, the /dev/iommu
|
||||
is opened by the qemu code.
|
||||
|
||||
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||||
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit 6e6d8ac62b5b38dc9d4b69ffdf073f0a0b43b7be)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
MAINTAINERS | 8 ++
|
||||
backends/Kconfig | 4 +
|
||||
backends/iommufd.c | 245 +++++++++++++++++++++++++++++++++++++++
|
||||
backends/meson.build | 1 +
|
||||
backends/trace-events | 10 ++
|
||||
include/sysemu/iommufd.h | 38 ++++++
|
||||
qapi/qom.json | 19 +++
|
||||
qemu-options.hx | 12 ++
|
||||
8 files changed, 337 insertions(+)
|
||||
create mode 100644 backends/iommufd.c
|
||||
create mode 100644 include/sysemu/iommufd.h
|
||||
|
||||
diff --git a/MAINTAINERS b/MAINTAINERS
|
||||
index 695e0bd34f..a5a446914a 100644
|
||||
--- a/MAINTAINERS
|
||||
+++ b/MAINTAINERS
|
||||
@@ -2167,6 +2167,14 @@ F: hw/vfio/ap.c
|
||||
F: docs/system/s390x/vfio-ap.rst
|
||||
L: qemu-s390x@nongnu.org
|
||||
|
||||
+iommufd
|
||||
+M: Yi Liu <yi.l.liu@intel.com>
|
||||
+M: Eric Auger <eric.auger@redhat.com>
|
||||
+M: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
+S: Supported
|
||||
+F: backends/iommufd.c
|
||||
+F: include/sysemu/iommufd.h
|
||||
+
|
||||
vhost
|
||||
M: Michael S. Tsirkin <mst@redhat.com>
|
||||
S: Supported
|
||||
diff --git a/backends/Kconfig b/backends/Kconfig
|
||||
index f35abc1609..2cb23f62fa 100644
|
||||
--- a/backends/Kconfig
|
||||
+++ b/backends/Kconfig
|
||||
@@ -1 +1,5 @@
|
||||
source tpm/Kconfig
|
||||
+
|
||||
+config IOMMUFD
|
||||
+ bool
|
||||
+ depends on VFIO
|
||||
diff --git a/backends/iommufd.c b/backends/iommufd.c
|
||||
new file mode 100644
|
||||
index 0000000000..ba58a0eb0d
|
||||
--- /dev/null
|
||||
+++ b/backends/iommufd.c
|
||||
@@ -0,0 +1,245 @@
|
||||
+/*
|
||||
+ * iommufd container backend
|
||||
+ *
|
||||
+ * Copyright (C) 2023 Intel Corporation.
|
||||
+ * Copyright Red Hat, Inc. 2023
|
||||
+ *
|
||||
+ * Authors: Yi Liu <yi.l.liu@intel.com>
|
||||
+ * Eric Auger <eric.auger@redhat.com>
|
||||
+ *
|
||||
+ * SPDX-License-Identifier: GPL-2.0-or-later
|
||||
+ */
|
||||
+
|
||||
+#include "qemu/osdep.h"
|
||||
+#include "sysemu/iommufd.h"
|
||||
+#include "qapi/error.h"
|
||||
+#include "qapi/qmp/qerror.h"
|
||||
+#include "qemu/module.h"
|
||||
+#include "qom/object_interfaces.h"
|
||||
+#include "qemu/error-report.h"
|
||||
+#include "monitor/monitor.h"
|
||||
+#include "trace.h"
|
||||
+#include <sys/ioctl.h>
|
||||
+#include <linux/iommufd.h>
|
||||
+
|
||||
+static void iommufd_backend_init(Object *obj)
|
||||
+{
|
||||
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
|
||||
+
|
||||
+ be->fd = -1;
|
||||
+ be->users = 0;
|
||||
+ be->owned = true;
|
||||
+ qemu_mutex_init(&be->lock);
|
||||
+}
|
||||
+
|
||||
+static void iommufd_backend_finalize(Object *obj)
|
||||
+{
|
||||
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
|
||||
+
|
||||
+ if (be->owned) {
|
||||
+ close(be->fd);
|
||||
+ be->fd = -1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp)
|
||||
+{
|
||||
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
|
||||
+ int fd = -1;
|
||||
+
|
||||
+ fd = monitor_fd_param(monitor_cur(), str, errp);
|
||||
+ if (fd == -1) {
|
||||
+ error_prepend(errp, "Could not parse remote object fd %s:", str);
|
||||
+ return;
|
||||
+ }
|
||||
+ qemu_mutex_lock(&be->lock);
|
||||
+ be->fd = fd;
|
||||
+ be->owned = false;
|
||||
+ qemu_mutex_unlock(&be->lock);
|
||||
+ trace_iommu_backend_set_fd(be->fd);
|
||||
+}
|
||||
+
|
||||
+static bool iommufd_backend_can_be_deleted(UserCreatable *uc)
|
||||
+{
|
||||
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(uc);
|
||||
+
|
||||
+ return !be->users;
|
||||
+}
|
||||
+
|
||||
+static void iommufd_backend_class_init(ObjectClass *oc, void *data)
|
||||
+{
|
||||
+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
|
||||
+
|
||||
+ ucc->can_be_deleted = iommufd_backend_can_be_deleted;
|
||||
+
|
||||
+ object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd);
|
||||
+}
|
||||
+
|
||||
+int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
|
||||
+{
|
||||
+ int fd, ret = 0;
|
||||
+
|
||||
+ qemu_mutex_lock(&be->lock);
|
||||
+ if (be->users == UINT32_MAX) {
|
||||
+ error_setg(errp, "too many connections");
|
||||
+ ret = -E2BIG;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ if (be->owned && !be->users) {
|
||||
+ fd = qemu_open_old("/dev/iommu", O_RDWR);
|
||||
+ if (fd < 0) {
|
||||
+ error_setg_errno(errp, errno, "/dev/iommu opening failed");
|
||||
+ ret = fd;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ be->fd = fd;
|
||||
+ }
|
||||
+ be->users++;
|
||||
+out:
|
||||
+ trace_iommufd_backend_connect(be->fd, be->owned,
|
||||
+ be->users, ret);
|
||||
+ qemu_mutex_unlock(&be->lock);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+void iommufd_backend_disconnect(IOMMUFDBackend *be)
|
||||
+{
|
||||
+ qemu_mutex_lock(&be->lock);
|
||||
+ if (!be->users) {
|
||||
+ goto out;
|
||||
+ }
|
||||
+ be->users--;
|
||||
+ if (!be->users && be->owned) {
|
||||
+ close(be->fd);
|
||||
+ be->fd = -1;
|
||||
+ }
|
||||
+out:
|
||||
+ trace_iommufd_backend_disconnect(be->fd, be->users);
|
||||
+ qemu_mutex_unlock(&be->lock);
|
||||
+}
|
||||
+
|
||||
+int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ int ret, fd = be->fd;
|
||||
+ struct iommu_ioas_alloc alloc_data = {
|
||||
+ .size = sizeof(alloc_data),
|
||||
+ .flags = 0,
|
||||
+ };
|
||||
+
|
||||
+ ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data);
|
||||
+ if (ret) {
|
||||
+ error_setg_errno(errp, errno, "Failed to allocate ioas");
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ *ioas_id = alloc_data.out_ioas_id;
|
||||
+ trace_iommufd_backend_alloc_ioas(fd, *ioas_id, ret);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id)
|
||||
+{
|
||||
+ int ret, fd = be->fd;
|
||||
+ struct iommu_destroy des = {
|
||||
+ .size = sizeof(des),
|
||||
+ .id = id,
|
||||
+ };
|
||||
+
|
||||
+ ret = ioctl(fd, IOMMU_DESTROY, &des);
|
||||
+ trace_iommufd_backend_free_id(fd, id, ret);
|
||||
+ if (ret) {
|
||||
+ error_report("Failed to free id: %u %m", id);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
|
||||
+ ram_addr_t size, void *vaddr, bool readonly)
|
||||
+{
|
||||
+ int ret, fd = be->fd;
|
||||
+ struct iommu_ioas_map map = {
|
||||
+ .size = sizeof(map),
|
||||
+ .flags = IOMMU_IOAS_MAP_READABLE |
|
||||
+ IOMMU_IOAS_MAP_FIXED_IOVA,
|
||||
+ .ioas_id = ioas_id,
|
||||
+ .__reserved = 0,
|
||||
+ .user_va = (uintptr_t)vaddr,
|
||||
+ .iova = iova,
|
||||
+ .length = size,
|
||||
+ };
|
||||
+
|
||||
+ if (!readonly) {
|
||||
+ map.flags |= IOMMU_IOAS_MAP_WRITEABLE;
|
||||
+ }
|
||||
+
|
||||
+ ret = ioctl(fd, IOMMU_IOAS_MAP, &map);
|
||||
+ trace_iommufd_backend_map_dma(fd, ioas_id, iova, size,
|
||||
+ vaddr, readonly, ret);
|
||||
+ if (ret) {
|
||||
+ ret = -errno;
|
||||
+
|
||||
+ /* TODO: Not support mapping hardware PCI BAR region for now. */
|
||||
+ if (errno == EFAULT) {
|
||||
+ warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?");
|
||||
+ } else {
|
||||
+ error_report("IOMMU_IOAS_MAP failed: %m");
|
||||
+ }
|
||||
+ }
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
|
||||
+ hwaddr iova, ram_addr_t size)
|
||||
+{
|
||||
+ int ret, fd = be->fd;
|
||||
+ struct iommu_ioas_unmap unmap = {
|
||||
+ .size = sizeof(unmap),
|
||||
+ .ioas_id = ioas_id,
|
||||
+ .iova = iova,
|
||||
+ .length = size,
|
||||
+ };
|
||||
+
|
||||
+ ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap);
|
||||
+ /*
|
||||
+ * IOMMUFD takes mapping as some kind of object, unmapping
|
||||
+ * nonexistent mapping is treated as deleting a nonexistent
|
||||
+ * object and return ENOENT. This is different from legacy
|
||||
+ * backend which allows it. vIOMMU may trigger a lot of
|
||||
+ * redundant unmapping, to avoid flush the log, treat them
|
||||
+ * as succeess for IOMMUFD just like legacy backend.
|
||||
+ */
|
||||
+ if (ret && errno == ENOENT) {
|
||||
+ trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret);
|
||||
+ ret = 0;
|
||||
+ } else {
|
||||
+ trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret);
|
||||
+ }
|
||||
+
|
||||
+ if (ret) {
|
||||
+ ret = -errno;
|
||||
+ error_report("IOMMU_IOAS_UNMAP failed: %m");
|
||||
+ }
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static const TypeInfo iommufd_backend_info = {
|
||||
+ .name = TYPE_IOMMUFD_BACKEND,
|
||||
+ .parent = TYPE_OBJECT,
|
||||
+ .instance_size = sizeof(IOMMUFDBackend),
|
||||
+ .instance_init = iommufd_backend_init,
|
||||
+ .instance_finalize = iommufd_backend_finalize,
|
||||
+ .class_size = sizeof(IOMMUFDBackendClass),
|
||||
+ .class_init = iommufd_backend_class_init,
|
||||
+ .interfaces = (InterfaceInfo[]) {
|
||||
+ { TYPE_USER_CREATABLE },
|
||||
+ { }
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+static void register_types(void)
|
||||
+{
|
||||
+ type_register_static(&iommufd_backend_info);
|
||||
+}
|
||||
+
|
||||
+type_init(register_types);
|
||||
diff --git a/backends/meson.build b/backends/meson.build
|
||||
index 914c7c4afb..9a5cea480d 100644
|
||||
--- a/backends/meson.build
|
||||
+++ b/backends/meson.build
|
||||
@@ -20,6 +20,7 @@ if have_vhost_user
|
||||
system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c'))
|
||||
endif
|
||||
system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c'))
|
||||
+system_ss.add(when: 'CONFIG_IOMMUFD', if_true: files('iommufd.c'))
|
||||
if have_vhost_user_crypto
|
||||
system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c'))
|
||||
endif
|
||||
diff --git a/backends/trace-events b/backends/trace-events
|
||||
index 652eb76a57..d45c6e31a6 100644
|
||||
--- a/backends/trace-events
|
||||
+++ b/backends/trace-events
|
||||
@@ -5,3 +5,13 @@ dbus_vmstate_pre_save(void)
|
||||
dbus_vmstate_post_load(int version_id) "version_id: %d"
|
||||
dbus_vmstate_loading(const char *id) "id: %s"
|
||||
dbus_vmstate_saving(const char *id) "id: %s"
|
||||
+
|
||||
+# iommufd.c
|
||||
+iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d owned=%d users=%d (%d)"
|
||||
+iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d"
|
||||
+iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d"
|
||||
+iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)"
|
||||
+iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
|
||||
+iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
|
||||
+iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)"
|
||||
+iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
|
||||
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
|
||||
new file mode 100644
|
||||
index 0000000000..9c5524b0ed
|
||||
--- /dev/null
|
||||
+++ b/include/sysemu/iommufd.h
|
||||
@@ -0,0 +1,38 @@
|
||||
+#ifndef SYSEMU_IOMMUFD_H
|
||||
+#define SYSEMU_IOMMUFD_H
|
||||
+
|
||||
+#include "qom/object.h"
|
||||
+#include "qemu/thread.h"
|
||||
+#include "exec/hwaddr.h"
|
||||
+#include "exec/cpu-common.h"
|
||||
+
|
||||
+#define TYPE_IOMMUFD_BACKEND "iommufd"
|
||||
+OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND)
|
||||
+
|
||||
+struct IOMMUFDBackendClass {
|
||||
+ ObjectClass parent_class;
|
||||
+};
|
||||
+
|
||||
+struct IOMMUFDBackend {
|
||||
+ Object parent;
|
||||
+
|
||||
+ /*< protected >*/
|
||||
+ int fd; /* /dev/iommu file descriptor */
|
||||
+ bool owned; /* is the /dev/iommu opened internally */
|
||||
+ QemuMutex lock;
|
||||
+ uint32_t users;
|
||||
+
|
||||
+ /*< public >*/
|
||||
+};
|
||||
+
|
||||
+int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp);
|
||||
+void iommufd_backend_disconnect(IOMMUFDBackend *be);
|
||||
+
|
||||
+int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
|
||||
+ Error **errp);
|
||||
+void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id);
|
||||
+int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
|
||||
+ ram_addr_t size, void *vaddr, bool readonly);
|
||||
+int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
|
||||
+ hwaddr iova, ram_addr_t size);
|
||||
+#endif
|
||||
diff --git a/qapi/qom.json b/qapi/qom.json
|
||||
index c53ef978ff..95516ba325 100644
|
||||
--- a/qapi/qom.json
|
||||
+++ b/qapi/qom.json
|
||||
@@ -794,6 +794,23 @@
|
||||
{ 'struct': 'VfioUserServerProperties',
|
||||
'data': { 'socket': 'SocketAddress', 'device': 'str' } }
|
||||
|
||||
+##
|
||||
+# @IOMMUFDProperties:
|
||||
+#
|
||||
+# Properties for iommufd objects.
|
||||
+#
|
||||
+# @fd: file descriptor name previously passed via 'getfd' command,
|
||||
+# which represents a pre-opened /dev/iommu. This allows the
|
||||
+# iommufd object to be shared accross several subsystems
|
||||
+# (VFIO, VDPA, ...), and the file descriptor to be shared
|
||||
+# with other process, e.g. DPDK. (default: QEMU opens
|
||||
+# /dev/iommu by itself)
|
||||
+#
|
||||
+# Since: 9.0
|
||||
+##
|
||||
+{ 'struct': 'IOMMUFDProperties',
|
||||
+ 'data': { '*fd': 'str' } }
|
||||
+
|
||||
##
|
||||
# @RngProperties:
|
||||
#
|
||||
@@ -934,6 +951,7 @@
|
||||
'input-barrier',
|
||||
{ 'name': 'input-linux',
|
||||
'if': 'CONFIG_LINUX' },
|
||||
+ 'iommufd',
|
||||
'iothread',
|
||||
'main-loop',
|
||||
{ 'name': 'memory-backend-epc',
|
||||
@@ -1003,6 +1021,7 @@
|
||||
'input-barrier': 'InputBarrierProperties',
|
||||
'input-linux': { 'type': 'InputLinuxProperties',
|
||||
'if': 'CONFIG_LINUX' },
|
||||
+ 'iommufd': 'IOMMUFDProperties',
|
||||
'iothread': 'IothreadProperties',
|
||||
'main-loop': 'MainLoopProperties',
|
||||
'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties',
|
||||
diff --git a/qemu-options.hx b/qemu-options.hx
|
||||
index 557118cb1f..0814f43066 100644
|
||||
--- a/qemu-options.hx
|
||||
+++ b/qemu-options.hx
|
||||
@@ -5224,6 +5224,18 @@ SRST
|
||||
|
||||
The ``share`` boolean option is on by default with memfd.
|
||||
|
||||
+ ``-object iommufd,id=id[,fd=fd]``
|
||||
+ Creates an iommufd backend which allows control of DMA mapping
|
||||
+ through the ``/dev/iommu`` device.
|
||||
+
|
||||
+ The ``id`` parameter is a unique ID which frontends (such as
|
||||
+ vfio-pci of vdpa) will use to connect with the iommufd backend.
|
||||
+
|
||||
+ The ``fd`` parameter is an optional pre-opened file descriptor
|
||||
+ resulting from ``/dev/iommu`` opening. Usually the iommufd is shared
|
||||
+ across all subsystems, bringing the benefit of centralized
|
||||
+ reference counting.
|
||||
+
|
||||
``-object rng-builtin,id=id``
|
||||
Creates a random number generator backend which obtains entropy
|
||||
from QEMU builtin functions. The ``id`` parameter is a unique ID
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,47 @@
|
||||
From da9a24793e876f6f2727d57f939d882be26a47b8 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Fri, 22 Dec 2023 08:55:23 +0100
|
||||
Subject: [PATCH 064/101] backends/iommufd: Remove check on number of backend
|
||||
users
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [63/67] ac4d4589d1f2de5ac3f0adfd8d1f27dbf6bbfdee (eauger1/centos-qemu-kvm)
|
||||
|
||||
QOM already has a ref count on objects and it will assert much
|
||||
earlier, when INT_MAX is reached.
|
||||
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit c2ab3a6f7411c895e538e8350fee8948ac07c1a0)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
backends/iommufd.c | 5 -----
|
||||
1 file changed, 5 deletions(-)
|
||||
|
||||
diff --git a/backends/iommufd.c b/backends/iommufd.c
|
||||
index ba58a0eb0d..393c0d9a37 100644
|
||||
--- a/backends/iommufd.c
|
||||
+++ b/backends/iommufd.c
|
||||
@@ -80,11 +80,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
|
||||
int fd, ret = 0;
|
||||
|
||||
qemu_mutex_lock(&be->lock);
|
||||
- if (be->users == UINT32_MAX) {
|
||||
- error_setg(errp, "too many connections");
|
||||
- ret = -E2BIG;
|
||||
- goto out;
|
||||
- }
|
||||
if (be->owned && !be->users) {
|
||||
fd = qemu_open_old("/dev/iommu", O_RDWR);
|
||||
if (fd < 0) {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,112 @@
|
||||
From 92aff3cc1a412de01e9563802fa48848eae5283f Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Thu, 21 Dec 2023 16:58:41 +0100
|
||||
Subject: [PATCH 065/101] backends/iommufd: Remove mutex
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [64/67] 65518432b18f18ceadafe1b0698cdaa962e84f61 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Coverity reports a concurrent data access violation because be->users
|
||||
is being accessed in iommufd_backend_can_be_deleted() without holding
|
||||
the mutex.
|
||||
|
||||
However, these routines are called from the QEMU main thread when a
|
||||
device is created. In this case, the code paths should be protected by
|
||||
the BQL lock and it should be safe to drop the IOMMUFD backend mutex.
|
||||
Simply remove it.
|
||||
|
||||
Fixes: CID 1531550
|
||||
Fixes: CID 1531549
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit 19368b1905b4b917e915526fcbd5bfa3f7439451)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
backends/iommufd.c | 7 -------
|
||||
include/sysemu/iommufd.h | 2 --
|
||||
2 files changed, 9 deletions(-)
|
||||
|
||||
diff --git a/backends/iommufd.c b/backends/iommufd.c
|
||||
index 393c0d9a37..1ef683c7b0 100644
|
||||
--- a/backends/iommufd.c
|
||||
+++ b/backends/iommufd.c
|
||||
@@ -29,7 +29,6 @@ static void iommufd_backend_init(Object *obj)
|
||||
be->fd = -1;
|
||||
be->users = 0;
|
||||
be->owned = true;
|
||||
- qemu_mutex_init(&be->lock);
|
||||
}
|
||||
|
||||
static void iommufd_backend_finalize(Object *obj)
|
||||
@@ -52,10 +51,8 @@ static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp)
|
||||
error_prepend(errp, "Could not parse remote object fd %s:", str);
|
||||
return;
|
||||
}
|
||||
- qemu_mutex_lock(&be->lock);
|
||||
be->fd = fd;
|
||||
be->owned = false;
|
||||
- qemu_mutex_unlock(&be->lock);
|
||||
trace_iommu_backend_set_fd(be->fd);
|
||||
}
|
||||
|
||||
@@ -79,7 +76,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
|
||||
{
|
||||
int fd, ret = 0;
|
||||
|
||||
- qemu_mutex_lock(&be->lock);
|
||||
if (be->owned && !be->users) {
|
||||
fd = qemu_open_old("/dev/iommu", O_RDWR);
|
||||
if (fd < 0) {
|
||||
@@ -93,13 +89,11 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
|
||||
out:
|
||||
trace_iommufd_backend_connect(be->fd, be->owned,
|
||||
be->users, ret);
|
||||
- qemu_mutex_unlock(&be->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void iommufd_backend_disconnect(IOMMUFDBackend *be)
|
||||
{
|
||||
- qemu_mutex_lock(&be->lock);
|
||||
if (!be->users) {
|
||||
goto out;
|
||||
}
|
||||
@@ -110,7 +104,6 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be)
|
||||
}
|
||||
out:
|
||||
trace_iommufd_backend_disconnect(be->fd, be->users);
|
||||
- qemu_mutex_unlock(&be->lock);
|
||||
}
|
||||
|
||||
int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
|
||||
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
|
||||
index 9c5524b0ed..9af27ebd6c 100644
|
||||
--- a/include/sysemu/iommufd.h
|
||||
+++ b/include/sysemu/iommufd.h
|
||||
@@ -2,7 +2,6 @@
|
||||
#define SYSEMU_IOMMUFD_H
|
||||
|
||||
#include "qom/object.h"
|
||||
-#include "qemu/thread.h"
|
||||
#include "exec/hwaddr.h"
|
||||
#include "exec/cpu-common.h"
|
||||
|
||||
@@ -19,7 +18,6 @@ struct IOMMUFDBackend {
|
||||
/*< protected >*/
|
||||
int fd; /* /dev/iommu file descriptor */
|
||||
bool owned; /* is the /dev/iommu opened internally */
|
||||
- QemuMutex lock;
|
||||
uint32_t users;
|
||||
|
||||
/*< public >*/
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,57 +0,0 @@
|
||||
From 40866640d15e6a8c9f6af7e437edc1ec1e17ba34 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 10/21] bcm2835_property: disable reentrancy detection for
|
||||
iomem
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [6/13] 128ebc85e228674af66553af82fba70eb87960e6 (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit 985c4a4e547afb9573b6bd6843d20eb2c3d1d1cd
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:11 2023 -0400
|
||||
|
||||
bcm2835_property: disable reentrancy detection for iomem
|
||||
|
||||
As the code is designed for re-entrant calls from bcm2835_property to
|
||||
bcm2835_mbox and back into bcm2835_property, mark iomem as
|
||||
reentrancy-safe.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Message-Id: <20230427211013.2994127-7-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/misc/bcm2835_property.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c
|
||||
index 890ae7bae5..de056ea2df 100644
|
||||
--- a/hw/misc/bcm2835_property.c
|
||||
+++ b/hw/misc/bcm2835_property.c
|
||||
@@ -382,6 +382,13 @@ static void bcm2835_property_init(Object *obj)
|
||||
|
||||
memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_property_ops, s,
|
||||
TYPE_BCM2835_PROPERTY, 0x10);
|
||||
+
|
||||
+ /*
|
||||
+ * bcm2835_property_ops call into bcm2835_mbox, which in-turn reads from
|
||||
+ * iomem. As such, mark iomem as re-entracy safe.
|
||||
+ */
|
||||
+ s->iomem.disable_reentrancy_guard = true;
|
||||
+
|
||||
sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
|
||||
sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq);
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,354 +0,0 @@
|
||||
From ff05c0b0d3414c0e5b3903048280accdc6c75ca0 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Czenczek <hreitz@redhat.com>
|
||||
Date: Tue, 11 Apr 2023 19:34:16 +0200
|
||||
Subject: [PATCH 2/9] block: Collapse padded I/O vecs exceeding IOV_MAX
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX
|
||||
RH-Bugzilla: 2174676
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/5] 84c56bd16f841a18cf2baa918dfeab3240e3944d (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
When processing vectored guest requests that are not aligned to the
|
||||
storage request alignment, we pad them by adding head and/or tail
|
||||
buffers for a read-modify-write cycle.
|
||||
|
||||
The guest can submit I/O vectors up to IOV_MAX (1024) in length, but
|
||||
with this padding, the vector can exceed that limit. As of
|
||||
4c002cef0e9abe7135d7916c51abce47f7fc1ee2 ("util/iov: make
|
||||
qemu_iovec_init_extended() honest"), we refuse to pad vectors beyond the
|
||||
limit, instead returning an error to the guest.
|
||||
|
||||
To the guest, this appears as a random I/O error. We should not return
|
||||
an I/O error to the guest when it issued a perfectly valid request.
|
||||
|
||||
Before 4c002cef0e9abe7135d7916c51abce47f7fc1ee2, we just made the vector
|
||||
longer than IOV_MAX, which generally seems to work (because the guest
|
||||
assumes a smaller alignment than we really have, file-posix's
|
||||
raw_co_prw() will generally see bdrv_qiov_is_aligned() return false, and
|
||||
so emulate the request, so that the IOV_MAX does not matter). However,
|
||||
that does not seem exactly great.
|
||||
|
||||
I see two ways to fix this problem:
|
||||
1. We split such long requests into two requests.
|
||||
2. We join some elements of the vector into new buffers to make it
|
||||
shorter.
|
||||
|
||||
I am wary of (1), because it seems like it may have unintended side
|
||||
effects.
|
||||
|
||||
(2) on the other hand seems relatively simple to implement, with
|
||||
hopefully few side effects, so this patch does that.
|
||||
|
||||
To do this, the use of qemu_iovec_init_extended() in bdrv_pad_request()
|
||||
is effectively replaced by the new function bdrv_create_padded_qiov(),
|
||||
which not only wraps the request IOV with padding head/tail, but also
|
||||
ensures that the resulting vector will not have more than IOV_MAX
|
||||
elements. Putting that functionality into qemu_iovec_init_extended() is
|
||||
infeasible because it requires allocating a bounce buffer; doing so
|
||||
would require many more parameters (buffer alignment, how to initialize
|
||||
the buffer, and out parameters like the buffer, its length, and the
|
||||
original elements), which is not reasonable.
|
||||
|
||||
Conversely, it is not difficult to move qemu_iovec_init_extended()'s
|
||||
functionality into bdrv_create_padded_qiov() by using public
|
||||
qemu_iovec_* functions, so that is what this patch does.
|
||||
|
||||
Because bdrv_pad_request() was the only "serious" user of
|
||||
qemu_iovec_init_extended(), the next patch will remove the latter
|
||||
function, so the functionality is not implemented twice.
|
||||
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2141964
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Message-Id: <20230411173418.19549-3-hreitz@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
(cherry picked from commit 18743311b829cafc1737a5f20bc3248d5f91ee2a)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/io.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++-----
|
||||
1 file changed, 151 insertions(+), 15 deletions(-)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 2e267a85ab..4e8e90208b 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -1439,6 +1439,14 @@ out:
|
||||
* @merge_reads is true for small requests,
|
||||
* if @buf_len == @head + bytes + @tail. In this case it is possible that both
|
||||
* head and tail exist but @buf_len == align and @tail_buf == @buf.
|
||||
+ *
|
||||
+ * @write is true for write requests, false for read requests.
|
||||
+ *
|
||||
+ * If padding makes the vector too long (exceeding IOV_MAX), then we need to
|
||||
+ * merge existing vector elements into a single one. @collapse_bounce_buf acts
|
||||
+ * as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse
|
||||
+ * I/O vector elements so for read requests, the data can be copied back after
|
||||
+ * the read is done.
|
||||
*/
|
||||
typedef struct BdrvRequestPadding {
|
||||
uint8_t *buf;
|
||||
@@ -1447,11 +1455,17 @@ typedef struct BdrvRequestPadding {
|
||||
size_t head;
|
||||
size_t tail;
|
||||
bool merge_reads;
|
||||
+ bool write;
|
||||
QEMUIOVector local_qiov;
|
||||
+
|
||||
+ uint8_t *collapse_bounce_buf;
|
||||
+ size_t collapse_len;
|
||||
+ QEMUIOVector pre_collapse_qiov;
|
||||
} BdrvRequestPadding;
|
||||
|
||||
static bool bdrv_init_padding(BlockDriverState *bs,
|
||||
int64_t offset, int64_t bytes,
|
||||
+ bool write,
|
||||
BdrvRequestPadding *pad)
|
||||
{
|
||||
int64_t align = bs->bl.request_alignment;
|
||||
@@ -1483,6 +1497,8 @@ static bool bdrv_init_padding(BlockDriverState *bs,
|
||||
pad->tail_buf = pad->buf + pad->buf_len - align;
|
||||
}
|
||||
|
||||
+ pad->write = write;
|
||||
+
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1547,8 +1563,23 @@ zero_mem:
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
+/**
|
||||
+ * Free *pad's associated buffers, and perform any necessary finalization steps.
|
||||
+ */
|
||||
+static void bdrv_padding_finalize(BdrvRequestPadding *pad)
|
||||
{
|
||||
+ if (pad->collapse_bounce_buf) {
|
||||
+ if (!pad->write) {
|
||||
+ /*
|
||||
+ * If padding required elements in the vector to be collapsed into a
|
||||
+ * bounce buffer, copy the bounce buffer content back
|
||||
+ */
|
||||
+ qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0,
|
||||
+ pad->collapse_bounce_buf, pad->collapse_len);
|
||||
+ }
|
||||
+ qemu_vfree(pad->collapse_bounce_buf);
|
||||
+ qemu_iovec_destroy(&pad->pre_collapse_qiov);
|
||||
+ }
|
||||
if (pad->buf) {
|
||||
qemu_vfree(pad->buf);
|
||||
qemu_iovec_destroy(&pad->local_qiov);
|
||||
@@ -1556,6 +1587,101 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
memset(pad, 0, sizeof(*pad));
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while
|
||||
+ * ensuring that the resulting vector will not exceed IOV_MAX elements.
|
||||
+ *
|
||||
+ * To ensure this, when necessary, the first two or three elements of @iov are
|
||||
+ * merged into pad->collapse_bounce_buf and replaced by a reference to that
|
||||
+ * bounce buffer in pad->local_qiov.
|
||||
+ *
|
||||
+ * After performing a read request, the data from the bounce buffer must be
|
||||
+ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()).
|
||||
+ */
|
||||
+static int bdrv_create_padded_qiov(BlockDriverState *bs,
|
||||
+ BdrvRequestPadding *pad,
|
||||
+ struct iovec *iov, int niov,
|
||||
+ size_t iov_offset, size_t bytes)
|
||||
+{
|
||||
+ int padded_niov, surplus_count, collapse_count;
|
||||
+
|
||||
+ /* Assert this invariant */
|
||||
+ assert(niov <= IOV_MAX);
|
||||
+
|
||||
+ /*
|
||||
+ * Cannot pad if resulting length would exceed SIZE_MAX. Returning an error
|
||||
+ * to the guest is not ideal, but there is little else we can do. At least
|
||||
+ * this will practically never happen on 64-bit systems.
|
||||
+ */
|
||||
+ if (SIZE_MAX - pad->head < bytes ||
|
||||
+ SIZE_MAX - pad->head - bytes < pad->tail)
|
||||
+ {
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ /* Length of the resulting IOV if we just concatenated everything */
|
||||
+ padded_niov = !!pad->head + niov + !!pad->tail;
|
||||
+
|
||||
+ qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX));
|
||||
+
|
||||
+ if (pad->head) {
|
||||
+ qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head);
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * If padded_niov > IOV_MAX, we cannot just concatenate everything.
|
||||
+ * Instead, merge the first two or three elements of @iov to reduce the
|
||||
+ * number of vector elements as necessary.
|
||||
+ */
|
||||
+ if (padded_niov > IOV_MAX) {
|
||||
+ /*
|
||||
+ * Only head and tail can have lead to the number of entries exceeding
|
||||
+ * IOV_MAX, so we can exceed it by the head and tail at most. We need
|
||||
+ * to reduce the number of elements by `surplus_count`, so we merge that
|
||||
+ * many elements plus one into one element.
|
||||
+ */
|
||||
+ surplus_count = padded_niov - IOV_MAX;
|
||||
+ assert(surplus_count <= !!pad->head + !!pad->tail);
|
||||
+ collapse_count = surplus_count + 1;
|
||||
+
|
||||
+ /*
|
||||
+ * Move the elements to collapse into `pad->pre_collapse_qiov`, then
|
||||
+ * advance `iov` (and associated variables) by those elements.
|
||||
+ */
|
||||
+ qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count);
|
||||
+ qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov,
|
||||
+ collapse_count, iov_offset, SIZE_MAX);
|
||||
+ iov += collapse_count;
|
||||
+ iov_offset = 0;
|
||||
+ niov -= collapse_count;
|
||||
+ bytes -= pad->pre_collapse_qiov.size;
|
||||
+
|
||||
+ /*
|
||||
+ * Construct the bounce buffer to match the length of the to-collapse
|
||||
+ * vector elements, and for write requests, initialize it with the data
|
||||
+ * from those elements. Then add it to `pad->local_qiov`.
|
||||
+ */
|
||||
+ pad->collapse_len = pad->pre_collapse_qiov.size;
|
||||
+ pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len);
|
||||
+ if (pad->write) {
|
||||
+ qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0,
|
||||
+ pad->collapse_bounce_buf, pad->collapse_len);
|
||||
+ }
|
||||
+ qemu_iovec_add(&pad->local_qiov,
|
||||
+ pad->collapse_bounce_buf, pad->collapse_len);
|
||||
+ }
|
||||
+
|
||||
+ qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes);
|
||||
+
|
||||
+ if (pad->tail) {
|
||||
+ qemu_iovec_add(&pad->local_qiov,
|
||||
+ pad->buf + pad->buf_len - pad->tail, pad->tail);
|
||||
+ }
|
||||
+
|
||||
+ assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX));
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* bdrv_pad_request
|
||||
*
|
||||
@@ -1563,6 +1689,8 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
* read of padding, bdrv_padding_rmw_read() should be called separately if
|
||||
* needed.
|
||||
*
|
||||
+ * @write is true for write requests, false for read requests.
|
||||
+ *
|
||||
* Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out:
|
||||
* - on function start they represent original request
|
||||
* - on failure or when padding is not needed they are unchanged
|
||||
@@ -1571,26 +1699,34 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
static int bdrv_pad_request(BlockDriverState *bs,
|
||||
QEMUIOVector **qiov, size_t *qiov_offset,
|
||||
int64_t *offset, int64_t *bytes,
|
||||
+ bool write,
|
||||
BdrvRequestPadding *pad, bool *padded,
|
||||
BdrvRequestFlags *flags)
|
||||
{
|
||||
int ret;
|
||||
+ struct iovec *sliced_iov;
|
||||
+ int sliced_niov;
|
||||
+ size_t sliced_head, sliced_tail;
|
||||
|
||||
bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
|
||||
|
||||
- if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
|
||||
+ if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
|
||||
if (padded) {
|
||||
*padded = false;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
- ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
|
||||
- *qiov, *qiov_offset, *bytes,
|
||||
- pad->buf + pad->buf_len - pad->tail,
|
||||
- pad->tail);
|
||||
+ sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
|
||||
+ &sliced_head, &sliced_tail,
|
||||
+ &sliced_niov);
|
||||
+
|
||||
+ /* Guaranteed by bdrv_check_qiov_request() */
|
||||
+ assert(*bytes <= SIZE_MAX);
|
||||
+ ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
|
||||
+ sliced_head, *bytes);
|
||||
if (ret < 0) {
|
||||
- bdrv_padding_destroy(pad);
|
||||
+ bdrv_padding_finalize(pad);
|
||||
return ret;
|
||||
}
|
||||
*bytes += pad->head + pad->tail;
|
||||
@@ -1657,8 +1793,8 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
|
||||
flags |= BDRV_REQ_COPY_ON_READ;
|
||||
}
|
||||
|
||||
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
|
||||
- NULL, &flags);
|
||||
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false,
|
||||
+ &pad, NULL, &flags);
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
@@ -1668,7 +1804,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
|
||||
bs->bl.request_alignment,
|
||||
qiov, qiov_offset, flags);
|
||||
tracked_request_end(&req);
|
||||
- bdrv_padding_destroy(&pad);
|
||||
+ bdrv_padding_finalize(&pad);
|
||||
|
||||
fail:
|
||||
bdrv_dec_in_flight(bs);
|
||||
@@ -2000,7 +2136,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
|
||||
/* This flag doesn't make sense for padding or zero writes */
|
||||
flags &= ~BDRV_REQ_REGISTERED_BUF;
|
||||
|
||||
- padding = bdrv_init_padding(bs, offset, bytes, &pad);
|
||||
+ padding = bdrv_init_padding(bs, offset, bytes, true, &pad);
|
||||
if (padding) {
|
||||
assert(!(flags & BDRV_REQ_NO_WAIT));
|
||||
bdrv_make_request_serialising(req, align);
|
||||
@@ -2048,7 +2184,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
|
||||
}
|
||||
|
||||
out:
|
||||
- bdrv_padding_destroy(&pad);
|
||||
+ bdrv_padding_finalize(&pad);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -2116,8 +2252,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
|
||||
* bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
|
||||
* alignment only if there is no ZERO flag.
|
||||
*/
|
||||
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
|
||||
- &padded, &flags);
|
||||
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true,
|
||||
+ &pad, &padded, &flags);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
@@ -2147,7 +2283,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
|
||||
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
|
||||
qiov, qiov_offset, flags);
|
||||
|
||||
- bdrv_padding_destroy(&pad);
|
||||
+ bdrv_padding_finalize(&pad);
|
||||
|
||||
out:
|
||||
tracked_request_end(&req);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,56 +0,0 @@
|
||||
From dfa2811e88afaf996345552330e97f0513c1803c Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 4 May 2023 13:57:34 +0200
|
||||
Subject: [PATCH 53/56] block: Don't call no_coroutine_fns in
|
||||
qmp_block_resize()
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize()
|
||||
RH-Bugzilla: 2185688
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [2/4] 7ac7e34821cfc8bd5f0daadd7a1c4a5596bc60a6 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
This QMP handler runs in a coroutine, so it must use the corresponding
|
||||
no_co_wrappers instead.
|
||||
|
||||
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2185688
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20230504115750.54437-5-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 0c7d204f50c382c6baac8c94bd57af4a022b3888)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
blockdev.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/blockdev.c b/blockdev.c
|
||||
index d7b5c18f0a..eb509cf964 100644
|
||||
--- a/blockdev.c
|
||||
+++ b/blockdev.c
|
||||
@@ -2430,7 +2430,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
|
||||
return;
|
||||
}
|
||||
|
||||
- blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
|
||||
+ blk = blk_co_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
|
||||
if (!blk) {
|
||||
return;
|
||||
}
|
||||
@@ -2445,7 +2445,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
|
||||
|
||||
bdrv_co_lock(bs);
|
||||
bdrv_drained_end(bs);
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
bdrv_co_unlock(bs);
|
||||
}
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,73 +0,0 @@
|
||||
From 547f6bf93734f7c13675eebb93273ef2273f7c31 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Czenczek <hreitz@redhat.com>
|
||||
Date: Fri, 14 Jul 2023 10:59:38 +0200
|
||||
Subject: [PATCH 5/9] block: Fix pad_request's request restriction
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX
|
||||
RH-Bugzilla: 2174676
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [5/5] e8abc0485f6e0608a1ec55143ff40a14d273dfc8 (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
bdrv_pad_request() relies on requests' lengths not to exceed SIZE_MAX,
|
||||
which bdrv_check_qiov_request() does not guarantee.
|
||||
|
||||
bdrv_check_request32() however will guarantee this, and both of
|
||||
bdrv_pad_request()'s callers (bdrv_co_preadv_part() and
|
||||
bdrv_co_pwritev_part()) already run it before calling
|
||||
bdrv_pad_request(). Therefore, bdrv_pad_request() can safely call
|
||||
bdrv_check_request32() without expecting error, too.
|
||||
|
||||
In effect, this patch will not change guest-visible behavior. It is a
|
||||
clean-up to tighten a condition to match what is guaranteed by our
|
||||
callers, and which exists purely to show clearly why the subsequent
|
||||
assertion (`assert(*bytes <= SIZE_MAX)`) is always true.
|
||||
|
||||
Note there is a difference between the interfaces of
|
||||
bdrv_check_qiov_request() and bdrv_check_request32(): The former takes
|
||||
an errp, the latter does not, so we can no longer just pass
|
||||
&error_abort. Instead, we need to check the returned value. While we
|
||||
do expect success (because the callers have already run this function),
|
||||
an assert(ret == 0) is not much simpler than just to return an error if
|
||||
it occurs, so let us handle errors by returning them up the stack now.
|
||||
|
||||
Reported-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Message-id: 20230714085938.202730-1-hreitz@redhat.com
|
||||
Fixes: 18743311b829cafc1737a5f20bc3248d5f91ee2a
|
||||
("block: Collapse padded I/O vecs exceeding IOV_MAX")
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
block/io.c | 8 ++++++--
|
||||
1 file changed, 6 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 4e8e90208b..807c9fb720 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -1708,7 +1708,11 @@ static int bdrv_pad_request(BlockDriverState *bs,
|
||||
int sliced_niov;
|
||||
size_t sliced_head, sliced_tail;
|
||||
|
||||
- bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
|
||||
+ /* Should have been checked by the caller already */
|
||||
+ ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
+ }
|
||||
|
||||
if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
|
||||
if (padded) {
|
||||
@@ -1721,7 +1725,7 @@ static int bdrv_pad_request(BlockDriverState *bs,
|
||||
&sliced_head, &sliced_tail,
|
||||
&sliced_niov);
|
||||
|
||||
- /* Guaranteed by bdrv_check_qiov_request() */
|
||||
+ /* Guaranteed by bdrv_check_request32() */
|
||||
assert(*bytes <= SIZE_MAX);
|
||||
ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
|
||||
sliced_head, *bytes);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,104 @@
|
||||
From afa842e9fdf6e1d6e5d5785679a22779632142bd Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Czenczek <hreitz@redhat.com>
|
||||
Date: Fri, 2 Feb 2024 15:47:54 +0100
|
||||
Subject: [PATCH 03/22] block-backend: Allow concurrent context changes
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 222: Allow concurrent BlockBackend context changes
|
||||
RH-Jira: RHEL-24593
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [1/2] 9e1b535f60f7afa94a0817dc3e71136e41631c71 (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
Since AioContext locks have been removed, a BlockBackend's AioContext
|
||||
may really change at any time (only exception is that it is often
|
||||
confined to a drained section, as noted in this patch). Therefore,
|
||||
blk_get_aio_context() cannot rely on its root node's context always
|
||||
matching that of the BlockBackend.
|
||||
|
||||
In practice, whether they match does not matter anymore anyway: Requests
|
||||
can be sent to BDSs from any context, so anyone who requests the BB's
|
||||
context should have no reason to require the root node to have the same
|
||||
context. Therefore, we can and should remove the assertion to that
|
||||
effect.
|
||||
|
||||
In addition, because the context can be set and queried from different
|
||||
threads concurrently, it has to be accessed with atomic operations.
|
||||
|
||||
Buglink: https://issues.redhat.com/browse/RHEL-19381
|
||||
Suggested-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Message-ID: <20240202144755.671354-2-hreitz@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit ad893672027ffe26db498947d70cde6d4f58a111)
|
||||
---
|
||||
block/block-backend.c | 22 +++++++++++-----------
|
||||
1 file changed, 11 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/block/block-backend.c b/block/block-backend.c
|
||||
index 209eb07528..9c4de79e6b 100644
|
||||
--- a/block/block-backend.c
|
||||
+++ b/block/block-backend.c
|
||||
@@ -44,7 +44,7 @@ struct BlockBackend {
|
||||
char *name;
|
||||
int refcnt;
|
||||
BdrvChild *root;
|
||||
- AioContext *ctx;
|
||||
+ AioContext *ctx; /* access with atomic operations only */
|
||||
DriveInfo *legacy_dinfo; /* null unless created by drive_new() */
|
||||
QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */
|
||||
QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
|
||||
@@ -2414,22 +2414,22 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason)
|
||||
}
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * Return BB's current AioContext. Note that this context may change
|
||||
+ * concurrently at any time, with one exception: If the BB has a root node
|
||||
+ * attached, its context will only change through bdrv_try_change_aio_context(),
|
||||
+ * which creates a drained section. Therefore, incrementing such a BB's
|
||||
+ * in-flight counter will prevent its context from changing.
|
||||
+ */
|
||||
AioContext *blk_get_aio_context(BlockBackend *blk)
|
||||
{
|
||||
- BlockDriverState *bs;
|
||||
IO_CODE();
|
||||
|
||||
if (!blk) {
|
||||
return qemu_get_aio_context();
|
||||
}
|
||||
|
||||
- bs = blk_bs(blk);
|
||||
- if (bs) {
|
||||
- AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
|
||||
- assert(ctx == blk->ctx);
|
||||
- }
|
||||
-
|
||||
- return blk->ctx;
|
||||
+ return qatomic_read(&blk->ctx);
|
||||
}
|
||||
|
||||
int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
|
||||
@@ -2442,7 +2442,7 @@ int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
if (!bs) {
|
||||
- blk->ctx = new_context;
|
||||
+ qatomic_set(&blk->ctx, new_context);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2471,7 +2471,7 @@ static void blk_root_set_aio_ctx_commit(void *opaque)
|
||||
AioContext *new_context = s->new_ctx;
|
||||
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
|
||||
|
||||
- blk->ctx = new_context;
|
||||
+ qatomic_set(&blk->ctx, new_context);
|
||||
if (tgm->throttle_state) {
|
||||
throttle_group_detach_aio_context(tgm);
|
||||
throttle_group_attach_aio_context(tgm, new_context);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,386 +0,0 @@
|
||||
From 7baea25be90e184175dd5a919ee5878cbd4970c2 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 4 May 2023 13:57:33 +0200
|
||||
Subject: [PATCH 52/56] block: bdrv/blk_co_unref() for calls in coroutine
|
||||
context
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize()
|
||||
RH-Bugzilla: 2185688
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [1/4] 8ebf8486b082c30ca1b39a6ede35e471eaaccfa3 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
These functions must not be called in coroutine context, because they
|
||||
need write access to the graph.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20230504115750.54437-4-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit b2ab5f545fa1eaaf2955dd617bee19a8b3279786)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block.c | 2 +-
|
||||
block/crypto.c | 6 +++---
|
||||
block/parallels.c | 6 +++---
|
||||
block/qcow.c | 6 +++---
|
||||
block/qcow2.c | 14 +++++++-------
|
||||
block/qed.c | 6 +++---
|
||||
block/vdi.c | 6 +++---
|
||||
block/vhdx.c | 6 +++---
|
||||
block/vmdk.c | 18 +++++++++---------
|
||||
block/vpc.c | 6 +++---
|
||||
include/block/block-global-state.h | 3 ++-
|
||||
include/sysemu/block-backend-global-state.h | 5 ++++-
|
||||
12 files changed, 44 insertions(+), 40 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index d79a52ca74..a48112f945 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -680,7 +680,7 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
diff --git a/block/crypto.c b/block/crypto.c
|
||||
index ca67289187..8fd3ad0054 100644
|
||||
--- a/block/crypto.c
|
||||
+++ b/block/crypto.c
|
||||
@@ -355,7 +355,7 @@ block_crypto_co_create_generic(BlockDriverState *bs, int64_t size,
|
||||
ret = 0;
|
||||
cleanup:
|
||||
qcrypto_block_free(crypto);
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -661,7 +661,7 @@ block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp)
|
||||
|
||||
ret = 0;
|
||||
fail:
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -730,7 +730,7 @@ fail:
|
||||
bdrv_co_delete_file_noerr(bs);
|
||||
}
|
||||
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_QCryptoBlockCreateOptions(create_opts);
|
||||
qobject_unref(cryptoopts);
|
||||
return ret;
|
||||
diff --git a/block/parallels.c b/block/parallels.c
|
||||
index 013684801a..b49c35929e 100644
|
||||
--- a/block/parallels.c
|
||||
+++ b/block/parallels.c
|
||||
@@ -613,8 +613,8 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts,
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
return ret;
|
||||
|
||||
exit:
|
||||
@@ -691,7 +691,7 @@ parallels_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
|
||||
done:
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/qcow.c b/block/qcow.c
|
||||
index 490e4f819e..a0c701f578 100644
|
||||
--- a/block/qcow.c
|
||||
+++ b/block/qcow.c
|
||||
@@ -915,8 +915,8 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
|
||||
g_free(tmp);
|
||||
ret = 0;
|
||||
exit:
|
||||
- blk_unref(qcow_blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(qcow_blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
qcrypto_block_free(crypto);
|
||||
return ret;
|
||||
}
|
||||
@@ -1015,7 +1015,7 @@ qcow_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
fail:
|
||||
g_free(backing_fmt);
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/qcow2.c b/block/qcow2.c
|
||||
index 22084730f9..0b8beb8b47 100644
|
||||
--- a/block/qcow2.c
|
||||
+++ b/block/qcow2.c
|
||||
@@ -3711,7 +3711,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
|
||||
goto out;
|
||||
}
|
||||
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
blk = NULL;
|
||||
|
||||
/*
|
||||
@@ -3791,7 +3791,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
|
||||
}
|
||||
}
|
||||
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
blk = NULL;
|
||||
|
||||
/* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning.
|
||||
@@ -3816,9 +3816,9 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
- bdrv_unref(data_bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
+ bdrv_co_unref(data_bs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -3949,8 +3949,8 @@ finish:
|
||||
}
|
||||
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
- bdrv_unref(data_bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
+ bdrv_co_unref(data_bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/qed.c b/block/qed.c
|
||||
index 0705a7b4e2..aff2a2076e 100644
|
||||
--- a/block/qed.c
|
||||
+++ b/block/qed.c
|
||||
@@ -748,8 +748,8 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
|
||||
ret = 0; /* success */
|
||||
out:
|
||||
g_free(l1_table);
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -819,7 +819,7 @@ bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
|
||||
fail:
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/vdi.c b/block/vdi.c
|
||||
index f2434d6153..08331d2dd7 100644
|
||||
--- a/block/vdi.c
|
||||
+++ b/block/vdi.c
|
||||
@@ -886,8 +886,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
|
||||
|
||||
ret = 0;
|
||||
exit:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs_file);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs_file);
|
||||
g_free(bmap);
|
||||
return ret;
|
||||
}
|
||||
@@ -975,7 +975,7 @@ vdi_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
done:
|
||||
qobject_unref(qdict);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
- bdrv_unref(bs_file);
|
||||
+ bdrv_co_unref(bs_file);
|
||||
return ret;
|
||||
}
|
||||
|
||||
diff --git a/block/vhdx.c b/block/vhdx.c
|
||||
index 81420722a1..00777da91a 100644
|
||||
--- a/block/vhdx.c
|
||||
+++ b/block/vhdx.c
|
||||
@@ -2053,8 +2053,8 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts,
|
||||
|
||||
ret = 0;
|
||||
delete_and_exit:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
g_free(creator);
|
||||
return ret;
|
||||
}
|
||||
@@ -2144,7 +2144,7 @@ vhdx_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
|
||||
fail:
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/vmdk.c b/block/vmdk.c
|
||||
index f5f49018fe..01ca13c82b 100644
|
||||
--- a/block/vmdk.c
|
||||
+++ b/block/vmdk.c
|
||||
@@ -2306,7 +2306,7 @@ exit:
|
||||
if (pbb) {
|
||||
*pbb = blk;
|
||||
} else {
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
blk = NULL;
|
||||
}
|
||||
}
|
||||
@@ -2516,12 +2516,12 @@ vmdk_co_do_create(int64_t size,
|
||||
if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) {
|
||||
error_setg(errp, "Invalid backing file format: %s. Must be vmdk",
|
||||
blk_bs(backing)->drv->format_name);
|
||||
- blk_unref(backing);
|
||||
+ blk_co_unref(backing);
|
||||
ret = -EINVAL;
|
||||
goto exit;
|
||||
}
|
||||
ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid);
|
||||
- blk_unref(backing);
|
||||
+ blk_co_unref(backing);
|
||||
if (ret) {
|
||||
error_setg(errp, "Failed to read parent CID");
|
||||
goto exit;
|
||||
@@ -2542,14 +2542,14 @@ vmdk_co_do_create(int64_t size,
|
||||
blk_bs(extent_blk)->filename);
|
||||
created_size += cur_size;
|
||||
extent_idx++;
|
||||
- blk_unref(extent_blk);
|
||||
+ blk_co_unref(extent_blk);
|
||||
}
|
||||
|
||||
/* Check whether we got excess extents */
|
||||
extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain,
|
||||
opaque, NULL);
|
||||
if (extent_blk) {
|
||||
- blk_unref(extent_blk);
|
||||
+ blk_co_unref(extent_blk);
|
||||
error_setg(errp, "List of extents contains unused extents");
|
||||
ret = -EINVAL;
|
||||
goto exit;
|
||||
@@ -2590,7 +2590,7 @@ vmdk_co_do_create(int64_t size,
|
||||
ret = 0;
|
||||
exit:
|
||||
if (blk) {
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
}
|
||||
g_free(desc);
|
||||
g_free(parent_desc_line);
|
||||
@@ -2641,7 +2641,7 @@ vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split,
|
||||
errp)) {
|
||||
goto exit;
|
||||
}
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
exit:
|
||||
g_free(ext_filename);
|
||||
return blk;
|
||||
@@ -2797,12 +2797,12 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx,
|
||||
return NULL;
|
||||
}
|
||||
blk_set_allow_write_beyond_eof(blk, true);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
|
||||
if (size != -1) {
|
||||
ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp);
|
||||
if (ret) {
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
blk = NULL;
|
||||
}
|
||||
}
|
||||
diff --git a/block/vpc.c b/block/vpc.c
|
||||
index b89b0ff8e2..07ddda5b99 100644
|
||||
--- a/block/vpc.c
|
||||
+++ b/block/vpc.c
|
||||
@@ -1082,8 +1082,8 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts,
|
||||
}
|
||||
|
||||
out:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1162,7 +1162,7 @@ vpc_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
|
||||
fail:
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
|
||||
index 399200a9a3..cd4ea554bf 100644
|
||||
--- a/include/block/block-global-state.h
|
||||
+++ b/include/block/block-global-state.h
|
||||
@@ -214,7 +214,8 @@ void bdrv_img_create(const char *filename, const char *fmt,
|
||||
bool quiet, Error **errp);
|
||||
|
||||
void bdrv_ref(BlockDriverState *bs);
|
||||
-void bdrv_unref(BlockDriverState *bs);
|
||||
+void no_coroutine_fn bdrv_unref(BlockDriverState *bs);
|
||||
+void coroutine_fn no_co_wrapper bdrv_co_unref(BlockDriverState *bs);
|
||||
void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
|
||||
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
|
||||
BlockDriverState *child_bs,
|
||||
diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h
|
||||
index 2b6d27db7c..fa83f9389c 100644
|
||||
--- a/include/sysemu/block-backend-global-state.h
|
||||
+++ b/include/sysemu/block-backend-global-state.h
|
||||
@@ -42,7 +42,10 @@ blk_co_new_open(const char *filename, const char *reference, QDict *options,
|
||||
|
||||
int blk_get_refcnt(BlockBackend *blk);
|
||||
void blk_ref(BlockBackend *blk);
|
||||
-void blk_unref(BlockBackend *blk);
|
||||
+
|
||||
+void no_coroutine_fn blk_unref(BlockBackend *blk);
|
||||
+void coroutine_fn no_co_wrapper blk_co_unref(BlockBackend *blk);
|
||||
+
|
||||
void blk_remove_all_bs(void);
|
||||
BlockBackend *blk_by_name(const char *name);
|
||||
BlockBackend *blk_next(BlockBackend *blk);
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,74 +0,0 @@
|
||||
From b1f0546548e561856252c2bc610a8f4f8fcdf007 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Wed, 26 Jul 2023 09:48:07 +0200
|
||||
Subject: [PATCH 02/14] block/blkio: do not use open flags in qemu_open()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [2/6] 1ccd0ef56182bb5e2374c3b5be98ee1ec05066d6 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
qemu_open() in blkio_virtio_blk_common_open() is used to open the
|
||||
character device (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or in
|
||||
the future eventually the unix socket.
|
||||
|
||||
In all these cases we cannot open the path in read-only mode,
|
||||
when the `read-only` option of blockdev is on, because the exchange
|
||||
of IOCTL commands for example will fail.
|
||||
|
||||
In order to open the device read-only, we have to use the `read-only`
|
||||
property of the libblkio driver as we already do in blkio_file_open().
|
||||
|
||||
Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk")
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2225439
|
||||
Reported-by: Qing Wang <qinwang@redhat.com>
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Message-id: 20230726074807.14041-1-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit a5942c177b7bcc1357e496b7d68668befcfc2bb9)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 21 ++++++++++++---------
|
||||
1 file changed, 12 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 3ea9841bd8..5a82c6cb1a 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -685,15 +685,18 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs,
|
||||
* layer through the "/dev/fdset/N" special path.
|
||||
*/
|
||||
if (fd_supported) {
|
||||
- int open_flags;
|
||||
-
|
||||
- if (flags & BDRV_O_RDWR) {
|
||||
- open_flags = O_RDWR;
|
||||
- } else {
|
||||
- open_flags = O_RDONLY;
|
||||
- }
|
||||
-
|
||||
- fd = qemu_open(path, open_flags, errp);
|
||||
+ /*
|
||||
+ * `path` can contain the path of a character device
|
||||
+ * (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or a unix socket.
|
||||
+ *
|
||||
+ * So, we should always open it with O_RDWR flag, also if BDRV_O_RDWR
|
||||
+ * is not set in the open flags, because the exchange of IOCTL commands
|
||||
+ * for example will fail.
|
||||
+ *
|
||||
+ * In order to open the device read-only, we are using the `read-only`
|
||||
+ * property of the libblkio driver in blkio_file_open().
|
||||
+ */
|
||||
+ fd = qemu_open(path, O_RDWR, errp);
|
||||
if (fd < 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,54 +0,0 @@
|
||||
From ef99db21e9469f3fc946b7bf3edc1837d7b24e0b Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Tue, 25 Jul 2023 12:37:44 +0200
|
||||
Subject: [PATCH 01/14] block/blkio: enable the completion eventfd
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [1/6] d91b3a465942863550130105ae2f38f47a82a360 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Until libblkio 1.3.0, virtio-blk drivers had completion eventfd
|
||||
notifications enabled from the start, but from the next releases
|
||||
this is no longer the case, so we have to explicitly enable them.
|
||||
|
||||
In fact, the libblkio documentation says they could be disabled,
|
||||
so we should always enable them at the start if we want to be
|
||||
sure to get completion eventfd notifications:
|
||||
|
||||
By default, the driver might not generate completion events for
|
||||
requests so it is necessary to explicitly enable the completion
|
||||
file descriptor before use:
|
||||
|
||||
void blkioq_set_completion_fd_enabled(struct blkioq *q, bool enable);
|
||||
|
||||
I discovered this while trying a development version of libblkio:
|
||||
the guest kernel hangs during boot, while probing the device.
|
||||
|
||||
Fixes: fd66dbd424f5 ("blkio: add libblkio block driver")
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230725103744.77343-1-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 9359c459889fce1804c4e1b2a2ff8f182b4a9ae8)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index afcec359f2..3ea9841bd8 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -844,6 +844,7 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
QLIST_INIT(&s->bounce_bufs);
|
||||
s->blkioq = blkio_get_queue(s->blkio, 0);
|
||||
s->completion_fd = blkioq_get_completion_fd(s->blkioq);
|
||||
+ blkioq_set_completion_fd_enabled(s->blkioq, true);
|
||||
|
||||
blkio_attach_aio_context(bs, bdrv_get_aio_context(bs));
|
||||
return 0;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,67 +0,0 @@
|
||||
From c1ce3ba81698b9d52ac9dff83c01ee8141ca403d Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Thu, 27 Jul 2023 18:10:19 +0200
|
||||
Subject: [PATCH 05/14] block/blkio: fall back on using `path` when `fd`
|
||||
setting fails
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [5/6] c03cea95146a59b2830ffe2dd56ef77a6630ce3e (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
qemu_open() fails if called with an unix domain socket in this way:
|
||||
-blockdev node-name=drive0,driver=virtio-blk-vhost-user,path=vhost-user-blk.sock,cache.direct=on: Could not open 'vhost-user-blk.sock': No such device or address
|
||||
|
||||
Since virtio-blk-vhost-user does not support fd passing, let`s always fall back
|
||||
on using `path` if we fail the fd passing.
|
||||
|
||||
Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk")
|
||||
Reported-by: Qing Wang <qinwang@redhat.com>
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230727161020.84213-4-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 723bea27b127969931fa26bc0de79372a3d9e148)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 20 ++++++++++----------
|
||||
1 file changed, 10 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 93a8f8fc5c..eef80e9ce5 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -710,19 +710,19 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
|
||||
* In order to open the device read-only, we are using the `read-only`
|
||||
* property of the libblkio driver in blkio_file_open().
|
||||
*/
|
||||
- fd = qemu_open(path, O_RDWR, errp);
|
||||
+ fd = qemu_open(path, O_RDWR, NULL);
|
||||
if (fd < 0) {
|
||||
- return -EINVAL;
|
||||
+ fd_supported = false;
|
||||
+ } else {
|
||||
+ ret = blkio_set_int(s->blkio, "fd", fd);
|
||||
+ if (ret < 0) {
|
||||
+ fd_supported = false;
|
||||
+ qemu_close(fd);
|
||||
+ }
|
||||
}
|
||||
+ }
|
||||
|
||||
- ret = blkio_set_int(s->blkio, "fd", fd);
|
||||
- if (ret < 0) {
|
||||
- error_setg_errno(errp, -ret, "failed to set fd: %s",
|
||||
- blkio_get_error_msg());
|
||||
- qemu_close(fd);
|
||||
- return ret;
|
||||
- }
|
||||
- } else {
|
||||
+ if (!fd_supported) {
|
||||
ret = blkio_set_str(s->blkio, "path", path);
|
||||
if (ret < 0) {
|
||||
error_setg_errno(errp, -ret, "failed to set path: %s",
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,205 +0,0 @@
|
||||
From 545482400ea87d54b1b839587f8aaad41e30692f Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 4 Jul 2023 14:34:36 +0200
|
||||
Subject: [PATCH 36/37] block/blkio: fix module_block.py parsing
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 181: block/blkio: fix module_block.py parsing
|
||||
RH-Bugzilla: 2213317
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [1/2] c85df95824f4889526a73527771dec9efcb06926 (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
When QEMU is built with --enable-modules, the module_block.py script
|
||||
parses block/*.c to find block drivers that are built as modules. The
|
||||
script generates a table of block drivers called block_driver_modules[].
|
||||
This table is used for block driver module loading.
|
||||
|
||||
The blkio.c driver uses macros to define its BlockDriver structs. This
|
||||
was done to avoid code duplication but the module_block.py script is
|
||||
unable to parse the macro. The result is that libblkio-based block
|
||||
drivers can be built as modules but will not be found at runtime.
|
||||
|
||||
One fix is to make the module_block.py script or build system fancier so
|
||||
it can parse C macros (e.g. by parsing the preprocessed source code). I
|
||||
chose not to do this because it raises the complexity of the build,
|
||||
making future issues harder to debug.
|
||||
|
||||
Keep things simple: use the macro to avoid duplicating BlockDriver
|
||||
function pointers but define .format_name and .protocol_name manually
|
||||
for each BlockDriver. This way the module_block.py is able to parse the
|
||||
code.
|
||||
|
||||
Also get rid of the block driver name macros (e.g. DRIVER_IO_URING)
|
||||
because module_block.py cannot parse them either.
|
||||
|
||||
Fixes: fd66dbd424f5 ("blkio: add libblkio block driver")
|
||||
Reported-by: Qing Wang <qinwang@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230704123436.187761-1-stefanha@redhat.com
|
||||
Cc: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit c21eae1ccc782440f320accb6f90c66cb8f45ee9)
|
||||
|
||||
Conflicts:
|
||||
- Downstream lacks commit 28ff7b4dfbb5 ("block/blkio: convert to
|
||||
blk_io_plug_call() API") so keep the .bdrv_co_io_unplug callback.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
block/blkio.c | 118 ++++++++++++++++++++++++++------------------------
|
||||
1 file changed, 61 insertions(+), 57 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 6a6f20f923..afcec359f2 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -21,16 +21,6 @@
|
||||
|
||||
#include "block/block-io.h"
|
||||
|
||||
-/*
|
||||
- * Keep the QEMU BlockDriver names identical to the libblkio driver names.
|
||||
- * Using macros instead of typing out the string literals avoids typos.
|
||||
- */
|
||||
-#define DRIVER_IO_URING "io_uring"
|
||||
-#define DRIVER_NVME_IO_URING "nvme-io_uring"
|
||||
-#define DRIVER_VIRTIO_BLK_VFIO_PCI "virtio-blk-vfio-pci"
|
||||
-#define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user"
|
||||
-#define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa"
|
||||
-
|
||||
/*
|
||||
* Allocated bounce buffers are kept in a list sorted by buffer address.
|
||||
*/
|
||||
@@ -743,15 +733,15 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
return ret;
|
||||
}
|
||||
|
||||
- if (strcmp(blkio_driver, DRIVER_IO_URING) == 0) {
|
||||
+ if (strcmp(blkio_driver, "io_uring") == 0) {
|
||||
ret = blkio_io_uring_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) {
|
||||
+ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
|
||||
ret = blkio_nvme_io_uring(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VFIO_PCI) == 0) {
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
|
||||
ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) {
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
|
||||
ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) {
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
|
||||
ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
} else {
|
||||
g_assert_not_reached();
|
||||
@@ -1027,50 +1017,64 @@ static void blkio_refresh_limits(BlockDriverState *bs, Error **errp)
|
||||
* - truncate
|
||||
*/
|
||||
|
||||
-#define BLKIO_DRIVER(name, ...) \
|
||||
- { \
|
||||
- .format_name = name, \
|
||||
- .protocol_name = name, \
|
||||
- .instance_size = sizeof(BDRVBlkioState), \
|
||||
- .bdrv_file_open = blkio_file_open, \
|
||||
- .bdrv_close = blkio_close, \
|
||||
- .bdrv_co_getlength = blkio_co_getlength, \
|
||||
- .bdrv_co_truncate = blkio_truncate, \
|
||||
- .bdrv_co_get_info = blkio_co_get_info, \
|
||||
- .bdrv_attach_aio_context = blkio_attach_aio_context, \
|
||||
- .bdrv_detach_aio_context = blkio_detach_aio_context, \
|
||||
- .bdrv_co_pdiscard = blkio_co_pdiscard, \
|
||||
- .bdrv_co_preadv = blkio_co_preadv, \
|
||||
- .bdrv_co_pwritev = blkio_co_pwritev, \
|
||||
- .bdrv_co_flush_to_disk = blkio_co_flush, \
|
||||
- .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
|
||||
- .bdrv_co_io_unplug = blkio_co_io_unplug, \
|
||||
- .bdrv_refresh_limits = blkio_refresh_limits, \
|
||||
- .bdrv_register_buf = blkio_register_buf, \
|
||||
- .bdrv_unregister_buf = blkio_unregister_buf, \
|
||||
- __VA_ARGS__ \
|
||||
- }
|
||||
-
|
||||
-static BlockDriver bdrv_io_uring = BLKIO_DRIVER(
|
||||
- DRIVER_IO_URING,
|
||||
- .bdrv_needs_filename = true,
|
||||
-);
|
||||
-
|
||||
-static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER(
|
||||
- DRIVER_NVME_IO_URING,
|
||||
-);
|
||||
-
|
||||
-static BlockDriver bdrv_virtio_blk_vfio_pci = BLKIO_DRIVER(
|
||||
- DRIVER_VIRTIO_BLK_VFIO_PCI
|
||||
-);
|
||||
+/*
|
||||
+ * Do not include .format_name and .protocol_name because module_block.py
|
||||
+ * does not parse macros in the source code.
|
||||
+ */
|
||||
+#define BLKIO_DRIVER_COMMON \
|
||||
+ .instance_size = sizeof(BDRVBlkioState), \
|
||||
+ .bdrv_file_open = blkio_file_open, \
|
||||
+ .bdrv_close = blkio_close, \
|
||||
+ .bdrv_co_getlength = blkio_co_getlength, \
|
||||
+ .bdrv_co_truncate = blkio_truncate, \
|
||||
+ .bdrv_co_get_info = blkio_co_get_info, \
|
||||
+ .bdrv_attach_aio_context = blkio_attach_aio_context, \
|
||||
+ .bdrv_detach_aio_context = blkio_detach_aio_context, \
|
||||
+ .bdrv_co_pdiscard = blkio_co_pdiscard, \
|
||||
+ .bdrv_co_preadv = blkio_co_preadv, \
|
||||
+ .bdrv_co_pwritev = blkio_co_pwritev, \
|
||||
+ .bdrv_co_flush_to_disk = blkio_co_flush, \
|
||||
+ .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
|
||||
+ .bdrv_co_io_unplug = blkio_co_io_unplug, \
|
||||
+ .bdrv_refresh_limits = blkio_refresh_limits, \
|
||||
+ .bdrv_register_buf = blkio_register_buf, \
|
||||
+ .bdrv_unregister_buf = blkio_unregister_buf,
|
||||
|
||||
-static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER(
|
||||
- DRIVER_VIRTIO_BLK_VHOST_USER
|
||||
-);
|
||||
+/*
|
||||
+ * Use the same .format_name and .protocol_name as the libblkio driver name for
|
||||
+ * consistency.
|
||||
+ */
|
||||
|
||||
-static BlockDriver bdrv_virtio_blk_vhost_vdpa = BLKIO_DRIVER(
|
||||
- DRIVER_VIRTIO_BLK_VHOST_VDPA
|
||||
-);
|
||||
+static BlockDriver bdrv_io_uring = {
|
||||
+ .format_name = "io_uring",
|
||||
+ .protocol_name = "io_uring",
|
||||
+ .bdrv_needs_filename = true,
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
+
|
||||
+static BlockDriver bdrv_nvme_io_uring = {
|
||||
+ .format_name = "nvme-io_uring",
|
||||
+ .protocol_name = "nvme-io_uring",
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
+
|
||||
+static BlockDriver bdrv_virtio_blk_vfio_pci = {
|
||||
+ .format_name = "virtio-blk-vfio-pci",
|
||||
+ .protocol_name = "virtio-blk-vfio-pci",
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
+
|
||||
+static BlockDriver bdrv_virtio_blk_vhost_user = {
|
||||
+ .format_name = "virtio-blk-vhost-user",
|
||||
+ .protocol_name = "virtio-blk-vhost-user",
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
+
|
||||
+static BlockDriver bdrv_virtio_blk_vhost_vdpa = {
|
||||
+ .format_name = "virtio-blk-vhost-vdpa",
|
||||
+ .protocol_name = "virtio-blk-vhost-vdpa",
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
|
||||
static void bdrv_blkio_init(void)
|
||||
{
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,151 +0,0 @@
|
||||
From 458c33c9f19ed01beeb9b2b494ce6ed10d2ed4ac Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Thu, 27 Jul 2023 18:10:17 +0200
|
||||
Subject: [PATCH 03/14] block/blkio: move blkio_connect() in the drivers
|
||||
functions
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [3/6] c356108d7dfe1ba2098c094f8d12b6e40853560c (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
This is in preparation for the next patch, where for virtio-blk
|
||||
drivers we need to handle the failure of blkio_connect().
|
||||
|
||||
Let's also rename the *_open() functions to *_connect() to make
|
||||
the code reflect the changes applied.
|
||||
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230727161020.84213-2-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 69785d66ae1ec43f77fc65109a21721992bead9f)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 67 ++++++++++++++++++++++++++++++---------------------
|
||||
1 file changed, 40 insertions(+), 27 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 5a82c6cb1a..85d1eed5fb 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -602,8 +602,8 @@ static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size)
|
||||
}
|
||||
}
|
||||
|
||||
-static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
- Error **errp)
|
||||
+static int blkio_io_uring_connect(BlockDriverState *bs, QDict *options,
|
||||
+ int flags, Error **errp)
|
||||
{
|
||||
const char *filename = qdict_get_str(options, "filename");
|
||||
BDRVBlkioState *s = bs->opaque;
|
||||
@@ -626,11 +626,18 @@ static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
}
|
||||
}
|
||||
|
||||
+ ret = blkio_connect(s->blkio);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags,
|
||||
- Error **errp)
|
||||
+static int blkio_nvme_io_uring_connect(BlockDriverState *bs, QDict *options,
|
||||
+ int flags, Error **errp)
|
||||
{
|
||||
const char *path = qdict_get_try_str(options, "path");
|
||||
BDRVBlkioState *s = bs->opaque;
|
||||
@@ -654,11 +661,18 @@ static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
+ ret = blkio_connect(s->blkio);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int blkio_virtio_blk_common_open(BlockDriverState *bs,
|
||||
- QDict *options, int flags, Error **errp)
|
||||
+static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
|
||||
+ int flags, Error **errp)
|
||||
{
|
||||
const char *path = qdict_get_try_str(options, "path");
|
||||
BDRVBlkioState *s = bs->opaque;
|
||||
@@ -717,6 +731,13 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs,
|
||||
}
|
||||
}
|
||||
|
||||
+ ret = blkio_connect(s->blkio);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
qdict_del(options, "path");
|
||||
|
||||
return 0;
|
||||
@@ -736,24 +757,6 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
return ret;
|
||||
}
|
||||
|
||||
- if (strcmp(blkio_driver, "io_uring") == 0) {
|
||||
- ret = blkio_io_uring_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
|
||||
- ret = blkio_nvme_io_uring(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
|
||||
- ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
|
||||
- ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
|
||||
- ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else {
|
||||
- g_assert_not_reached();
|
||||
- }
|
||||
- if (ret < 0) {
|
||||
- blkio_destroy(&s->blkio);
|
||||
- return ret;
|
||||
- }
|
||||
-
|
||||
if (!(flags & BDRV_O_RDWR)) {
|
||||
ret = blkio_set_bool(s->blkio, "read-only", true);
|
||||
if (ret < 0) {
|
||||
@@ -764,10 +767,20 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
}
|
||||
}
|
||||
|
||||
- ret = blkio_connect(s->blkio);
|
||||
+ if (strcmp(blkio_driver, "io_uring") == 0) {
|
||||
+ ret = blkio_io_uring_connect(bs, options, flags, errp);
|
||||
+ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
|
||||
+ ret = blkio_nvme_io_uring_connect(bs, options, flags, errp);
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
|
||||
+ ret = blkio_virtio_blk_connect(bs, options, flags, errp);
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
|
||||
+ ret = blkio_virtio_blk_connect(bs, options, flags, errp);
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
|
||||
+ ret = blkio_virtio_blk_connect(bs, options, flags, errp);
|
||||
+ } else {
|
||||
+ g_assert_not_reached();
|
||||
+ }
|
||||
if (ret < 0) {
|
||||
- error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
- blkio_get_error_msg());
|
||||
blkio_destroy(&s->blkio);
|
||||
return ret;
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,85 +0,0 @@
|
||||
From ece855a71d9234c58497f37cb5498f507742167d Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Thu, 27 Jul 2023 18:10:18 +0200
|
||||
Subject: [PATCH 04/14] block/blkio: retry blkio_connect() if it fails using
|
||||
`fd`
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [4/6] 14ebc1f333617ce22c68693dec1c9a186d4f8a08 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
libblkio 1.3.0 added support of "fd" property for virtio-blk-vhost-vdpa
|
||||
driver. In QEMU, starting from commit cad2ccc395 ("block/blkio: use
|
||||
qemu_open() to support fd passing for virtio-blk") we are using
|
||||
`blkio_get_int(..., "fd")` to check if the "fd" property is supported
|
||||
for all the virtio-blk-* driver.
|
||||
|
||||
Unfortunately that property is also available for those driver that do
|
||||
not support it, such as virtio-blk-vhost-user.
|
||||
|
||||
So, `blkio_get_int()` is not enough to check whether the driver supports
|
||||
the `fd` property or not. This is because the virito-blk common libblkio
|
||||
driver only checks whether or not `fd` is set during `blkio_connect()`
|
||||
and fails with -EINVAL for those transports that do not support it
|
||||
(all except vhost-vdpa for now).
|
||||
|
||||
So let's handle the `blkio_connect()` failure, retrying it using `path`
|
||||
directly.
|
||||
|
||||
Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk")
|
||||
Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230727161020.84213-3-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 809c319f8a089fbc49223dc29e1cc2b978beeada)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 29 +++++++++++++++++++++++++++++
|
||||
1 file changed, 29 insertions(+)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 85d1eed5fb..93a8f8fc5c 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -732,6 +732,35 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
|
||||
}
|
||||
|
||||
ret = blkio_connect(s->blkio);
|
||||
+ /*
|
||||
+ * If the libblkio driver doesn't support the `fd` property, blkio_connect()
|
||||
+ * will fail with -EINVAL. So let's try calling blkio_connect() again by
|
||||
+ * directly setting `path`.
|
||||
+ */
|
||||
+ if (fd_supported && ret == -EINVAL) {
|
||||
+ qemu_close(fd);
|
||||
+
|
||||
+ /*
|
||||
+ * We need to clear the `fd` property we set previously by setting
|
||||
+ * it to -1.
|
||||
+ */
|
||||
+ ret = blkio_set_int(s->blkio, "fd", -1);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "failed to set fd: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ ret = blkio_set_str(s->blkio, "path", path);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "failed to set path: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ ret = blkio_connect(s->blkio);
|
||||
+ }
|
||||
+
|
||||
if (ret < 0) {
|
||||
error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
blkio_get_error_msg());
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,49 +0,0 @@
|
||||
From 2f4436e7cc2f63d198229dc8ba32783460c0b185 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Thu, 27 Jul 2023 18:10:20 +0200
|
||||
Subject: [PATCH 06/14] block/blkio: use blkio_set_int("fd") to check fd
|
||||
support
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [6/6] d57aafb2c3a8ed13aa3c6dcce5525a9cc8f5aa21 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Setting the `fd` property fails with virtio-blk-* libblkio drivers
|
||||
that do not support fd passing since
|
||||
https://gitlab.com/libblkio/libblkio/-/merge_requests/208.
|
||||
|
||||
Getting the `fd` property, on the other hand, always succeeds for
|
||||
virtio-blk-* libblkio drivers even when they don't support fd passing.
|
||||
|
||||
This patch switches to setting the `fd` property because it is a
|
||||
better mechanism for probing fd passing support than getting the `fd`
|
||||
property.
|
||||
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230727161020.84213-5-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 1c38fe69e2b8a05c1762b122292fa7e3662f06fd)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index eef80e9ce5..8defbf744f 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -689,7 +689,7 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
- if (blkio_get_int(s->blkio, "fd", &fd) == 0) {
|
||||
+ if (blkio_set_int(s->blkio, "fd", -1) == 0) {
|
||||
fd_supported = true;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,108 +0,0 @@
|
||||
From fd57241cf0f8c2906fa56118f8da1e65a5b1e4d8 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Tue, 30 May 2023 09:19:40 +0200
|
||||
Subject: [PATCH 3/5] block/blkio: use qemu_open() to support fd passing for
|
||||
virtio-blk
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 169: block/blkio: support fd passing for virtio-blk-vhost-vdpa driver
|
||||
RH-Bugzilla: 2180076
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [1/2] 9ff1a1510500db101648341207a36318a0c41c5a (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Some virtio-blk drivers (e.g. virtio-blk-vhost-vdpa) supports the fd
|
||||
passing. Let's expose this to the user, so the management layer
|
||||
can pass the file descriptor of an already opened path.
|
||||
|
||||
If the libblkio virtio-blk driver supports fd passing, let's always
|
||||
use qemu_open() to open the `path`, so we can handle fd passing
|
||||
from the management layer through the "/dev/fdset/N" special path.
|
||||
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230530071941.8954-2-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit cad2ccc395c7113fb30bc9390774b67b34f06c68)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 53 ++++++++++++++++++++++++++++++++++++++++++---------
|
||||
1 file changed, 44 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 0cdc99a729..6a6f20f923 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -672,25 +672,60 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs,
|
||||
{
|
||||
const char *path = qdict_get_try_str(options, "path");
|
||||
BDRVBlkioState *s = bs->opaque;
|
||||
- int ret;
|
||||
+ bool fd_supported = false;
|
||||
+ int fd, ret;
|
||||
|
||||
if (!path) {
|
||||
error_setg(errp, "missing 'path' option");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
- ret = blkio_set_str(s->blkio, "path", path);
|
||||
- qdict_del(options, "path");
|
||||
- if (ret < 0) {
|
||||
- error_setg_errno(errp, -ret, "failed to set path: %s",
|
||||
- blkio_get_error_msg());
|
||||
- return ret;
|
||||
- }
|
||||
-
|
||||
if (!(flags & BDRV_O_NOCACHE)) {
|
||||
error_setg(errp, "cache.direct=off is not supported");
|
||||
return -EINVAL;
|
||||
}
|
||||
+
|
||||
+ if (blkio_get_int(s->blkio, "fd", &fd) == 0) {
|
||||
+ fd_supported = true;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * If the libblkio driver supports fd passing, let's always use qemu_open()
|
||||
+ * to open the `path`, so we can handle fd passing from the management
|
||||
+ * layer through the "/dev/fdset/N" special path.
|
||||
+ */
|
||||
+ if (fd_supported) {
|
||||
+ int open_flags;
|
||||
+
|
||||
+ if (flags & BDRV_O_RDWR) {
|
||||
+ open_flags = O_RDWR;
|
||||
+ } else {
|
||||
+ open_flags = O_RDONLY;
|
||||
+ }
|
||||
+
|
||||
+ fd = qemu_open(path, open_flags, errp);
|
||||
+ if (fd < 0) {
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ ret = blkio_set_int(s->blkio, "fd", fd);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "failed to set fd: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ qemu_close(fd);
|
||||
+ return ret;
|
||||
+ }
|
||||
+ } else {
|
||||
+ ret = blkio_set_str(s->blkio, "path", path);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "failed to set path: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ qdict_del(options, "path");
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,121 +0,0 @@
|
||||
From d9190117f3c701380701d6e9b2aa3c2446b9708f Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Mon, 1 May 2023 13:34:43 -0400
|
||||
Subject: [PATCH 01/21] block: compile out assert_bdrv_graph_readable() by
|
||||
default
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 166: block/graph-lock: Disable locking for now
|
||||
RH-Bugzilla: 2186725
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [1/4] d8cb4bb832c85e8216d97e57679a34c7bc6a8f71 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
reader_count() is a performance bottleneck because the global
|
||||
aio_context_list_lock mutex causes thread contention. Put this debugging
|
||||
assertion behind a new ./configure --enable-debug-graph-lock option and
|
||||
disable it by default.
|
||||
|
||||
The --enable-debug-graph-lock option is also enabled by the more general
|
||||
--enable-debug option.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20230501173443.153062-1-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 58a2e3f5c37be02dac3086b81bdda9414b931edf)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block/graph-lock.c | 3 +++
|
||||
configure | 1 +
|
||||
meson.build | 2 ++
|
||||
meson_options.txt | 2 ++
|
||||
scripts/meson-buildoptions.sh | 4 ++++
|
||||
5 files changed, 12 insertions(+)
|
||||
|
||||
diff --git a/block/graph-lock.c b/block/graph-lock.c
|
||||
index 454c31e691..259a7a0bde 100644
|
||||
--- a/block/graph-lock.c
|
||||
+++ b/block/graph-lock.c
|
||||
@@ -265,7 +265,10 @@ void bdrv_graph_rdunlock_main_loop(void)
|
||||
|
||||
void assert_bdrv_graph_readable(void)
|
||||
{
|
||||
+ /* reader_count() is slow due to aio_context_list_lock lock contention */
|
||||
+#ifdef CONFIG_DEBUG_GRAPH_LOCK
|
||||
assert(qemu_in_main_thread() || reader_count());
|
||||
+#endif
|
||||
}
|
||||
|
||||
void assert_bdrv_graph_writable(void)
|
||||
diff --git a/configure b/configure
|
||||
index 800b5850f4..a62a3e6be9 100755
|
||||
--- a/configure
|
||||
+++ b/configure
|
||||
@@ -806,6 +806,7 @@ for opt do
|
||||
--enable-debug)
|
||||
# Enable debugging options that aren't excessively noisy
|
||||
debug_tcg="yes"
|
||||
+ meson_option_parse --enable-debug-graph-lock ""
|
||||
meson_option_parse --enable-debug-mutex ""
|
||||
meson_option_add -Doptimization=0
|
||||
fortify_source="no"
|
||||
diff --git a/meson.build b/meson.build
|
||||
index c44d05a13f..d964e741e7 100644
|
||||
--- a/meson.build
|
||||
+++ b/meson.build
|
||||
@@ -1956,6 +1956,7 @@ if get_option('debug_stack_usage') and have_coroutine_pool
|
||||
have_coroutine_pool = false
|
||||
endif
|
||||
config_host_data.set10('CONFIG_COROUTINE_POOL', have_coroutine_pool)
|
||||
+config_host_data.set('CONFIG_DEBUG_GRAPH_LOCK', get_option('debug_graph_lock'))
|
||||
config_host_data.set('CONFIG_DEBUG_MUTEX', get_option('debug_mutex'))
|
||||
config_host_data.set('CONFIG_DEBUG_STACK_USAGE', get_option('debug_stack_usage'))
|
||||
config_host_data.set('CONFIG_GPROF', get_option('gprof'))
|
||||
@@ -3833,6 +3834,7 @@ summary_info += {'PIE': get_option('b_pie')}
|
||||
summary_info += {'static build': config_host.has_key('CONFIG_STATIC')}
|
||||
summary_info += {'malloc trim support': has_malloc_trim}
|
||||
summary_info += {'membarrier': have_membarrier}
|
||||
+summary_info += {'debug graph lock': get_option('debug_graph_lock')}
|
||||
summary_info += {'debug stack usage': get_option('debug_stack_usage')}
|
||||
summary_info += {'mutex debugging': get_option('debug_mutex')}
|
||||
summary_info += {'memory allocator': get_option('malloc')}
|
||||
diff --git a/meson_options.txt b/meson_options.txt
|
||||
index fc9447d267..bc857fe68b 100644
|
||||
--- a/meson_options.txt
|
||||
+++ b/meson_options.txt
|
||||
@@ -311,6 +311,8 @@ option('rng_none', type: 'boolean', value: false,
|
||||
description: 'dummy RNG, avoid using /dev/(u)random and getrandom()')
|
||||
option('coroutine_pool', type: 'boolean', value: true,
|
||||
description: 'coroutine freelist (better performance)')
|
||||
+option('debug_graph_lock', type: 'boolean', value: false,
|
||||
+ description: 'graph lock debugging support')
|
||||
option('debug_mutex', type: 'boolean', value: false,
|
||||
description: 'mutex debugging support')
|
||||
option('debug_stack_usage', type: 'boolean', value: false,
|
||||
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
|
||||
index 009fab1515..30e1f25259 100644
|
||||
--- a/scripts/meson-buildoptions.sh
|
||||
+++ b/scripts/meson-buildoptions.sh
|
||||
@@ -21,6 +21,8 @@ meson_options_help() {
|
||||
printf "%s\n" ' QEMU'
|
||||
printf "%s\n" ' --enable-cfi Control-Flow Integrity (CFI)'
|
||||
printf "%s\n" ' --enable-cfi-debug Verbose errors in case of CFI violation'
|
||||
+ printf "%s\n" ' --enable-debug-graph-lock'
|
||||
+ printf "%s\n" ' graph lock debugging support'
|
||||
printf "%s\n" ' --enable-debug-mutex mutex debugging support'
|
||||
printf "%s\n" ' --enable-debug-stack-usage'
|
||||
printf "%s\n" ' measure coroutine stack usage'
|
||||
@@ -249,6 +251,8 @@ _meson_option_parse() {
|
||||
--datadir=*) quote_sh "-Ddatadir=$2" ;;
|
||||
--enable-dbus-display) printf "%s" -Ddbus_display=enabled ;;
|
||||
--disable-dbus-display) printf "%s" -Ddbus_display=disabled ;;
|
||||
+ --enable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=true ;;
|
||||
+ --disable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=false ;;
|
||||
--enable-debug-mutex) printf "%s" -Ddebug_mutex=true ;;
|
||||
--disable-debug-mutex) printf "%s" -Ddebug_mutex=false ;;
|
||||
--enable-debug-stack-usage) printf "%s" -Ddebug_stack_usage=true ;;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,69 @@
|
||||
From b1a68aebadecd7d339cf5eaffeda15099c998528 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 12 Sep 2023 19:10:37 -0400
|
||||
Subject: [PATCH 095/101] block-coroutine-wrapper: use
|
||||
qemu_get_current_aio_context()
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [26/26] cde767bcdc626e90721792e3889952057a548ac5 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
Use qemu_get_current_aio_context() in mixed wrappers and coroutine
|
||||
wrappers so that code runs in the caller's AioContext instead of moving
|
||||
to the BlockDriverState's AioContext. This change is necessary for the
|
||||
multi-queue block layer where any thread can call into the block layer.
|
||||
|
||||
Most wrappers are IO_CODE where it's safe to use the current AioContext
|
||||
nowadays. BlockDrivers and the core block layer use their own locks and
|
||||
no longer depend on the AioContext lock for thread-safety.
|
||||
|
||||
The bdrv_create() wrapper invokes GLOBAL_STATE code. Using the current
|
||||
AioContext is safe because this code is only called with the BQL held
|
||||
from the main loop thread.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20230912231037.826804-6-stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
scripts/block-coroutine-wrapper.py | 6 ++----
|
||||
1 file changed, 2 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py
|
||||
index c9c09fcacd..dbbde99e39 100644
|
||||
--- a/scripts/block-coroutine-wrapper.py
|
||||
+++ b/scripts/block-coroutine-wrapper.py
|
||||
@@ -92,8 +92,6 @@ def __init__(self, wrapper_type: str, return_type: str, name: str,
|
||||
f"{self.name}")
|
||||
self.target_name = f'{subsystem}_{subname}'
|
||||
|
||||
- self.ctx = self.gen_ctx()
|
||||
-
|
||||
self.get_result = 's->ret = '
|
||||
self.ret = 'return s.ret;'
|
||||
self.co_ret = 'return '
|
||||
@@ -167,7 +165,7 @@ def create_mixed_wrapper(func: FuncDecl) -> str:
|
||||
{func.co_ret}{name}({ func.gen_list('{name}') });
|
||||
}} else {{
|
||||
{struct_name} s = {{
|
||||
- .poll_state.ctx = {func.ctx},
|
||||
+ .poll_state.ctx = qemu_get_current_aio_context(),
|
||||
.poll_state.in_progress = true,
|
||||
|
||||
{ func.gen_block(' .{name} = {name},') }
|
||||
@@ -191,7 +189,7 @@ def create_co_wrapper(func: FuncDecl) -> str:
|
||||
{func.return_type} {func.name}({ func.gen_list('{decl}') })
|
||||
{{
|
||||
{struct_name} s = {{
|
||||
- .poll_state.ctx = {func.ctx},
|
||||
+ .poll_state.ctx = qemu_get_current_aio_context(),
|
||||
.poll_state.in_progress = true,
|
||||
|
||||
{ func.gen_block(' .{name} = {name},') }
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,217 @@
|
||||
From 25cce5df341861e8ba8ec57722558e2dee3ce56a Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Thu, 14 Sep 2023 10:00:58 -0400
|
||||
Subject: [PATCH 073/101] block/file-posix: set up Linux AIO and io_uring in
|
||||
the current thread
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [4/26] 74c7daf805daefe706378308c3afeb28d861164b (kmwolf/centos-qemu-kvm)
|
||||
|
||||
The file-posix block driver currently only sets up Linux AIO and
|
||||
io_uring in the BDS's AioContext. In the multi-queue block layer we must
|
||||
be able to submit I/O requests in AioContexts that do not have Linux AIO
|
||||
and io_uring set up yet since any thread can call into the block driver.
|
||||
|
||||
Set up Linux AIO and io_uring for the current AioContext during request
|
||||
submission. We lose the ability to return an error from
|
||||
.bdrv_file_open() when Linux AIO and io_uring setup fails (e.g. due to
|
||||
resource limits). Instead the user only gets warnings and we fall back
|
||||
to aio=threads. This is still better than a fatal error after startup.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20230914140101.1065008-2-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block/file-posix.c | 103 ++++++++++++++++++++++-----------------------
|
||||
1 file changed, 51 insertions(+), 52 deletions(-)
|
||||
|
||||
diff --git a/block/file-posix.c b/block/file-posix.c
|
||||
index b862406c71..35684f7e21 100644
|
||||
--- a/block/file-posix.c
|
||||
+++ b/block/file-posix.c
|
||||
@@ -712,17 +712,11 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
|
||||
|
||||
#ifdef CONFIG_LINUX_AIO
|
||||
/* Currently Linux does AIO only for files opened with O_DIRECT */
|
||||
- if (s->use_linux_aio) {
|
||||
- if (!(s->open_flags & O_DIRECT)) {
|
||||
- error_setg(errp, "aio=native was specified, but it requires "
|
||||
- "cache.direct=on, which was not specified.");
|
||||
- ret = -EINVAL;
|
||||
- goto fail;
|
||||
- }
|
||||
- if (!aio_setup_linux_aio(bdrv_get_aio_context(bs), errp)) {
|
||||
- error_prepend(errp, "Unable to use native AIO: ");
|
||||
- goto fail;
|
||||
- }
|
||||
+ if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) {
|
||||
+ error_setg(errp, "aio=native was specified, but it requires "
|
||||
+ "cache.direct=on, which was not specified.");
|
||||
+ ret = -EINVAL;
|
||||
+ goto fail;
|
||||
}
|
||||
#else
|
||||
if (s->use_linux_aio) {
|
||||
@@ -733,14 +727,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
|
||||
}
|
||||
#endif /* !defined(CONFIG_LINUX_AIO) */
|
||||
|
||||
-#ifdef CONFIG_LINUX_IO_URING
|
||||
- if (s->use_linux_io_uring) {
|
||||
- if (!aio_setup_linux_io_uring(bdrv_get_aio_context(bs), errp)) {
|
||||
- error_prepend(errp, "Unable to use io_uring: ");
|
||||
- goto fail;
|
||||
- }
|
||||
- }
|
||||
-#else
|
||||
+#ifndef CONFIG_LINUX_IO_URING
|
||||
if (s->use_linux_io_uring) {
|
||||
error_setg(errp, "aio=io_uring was specified, but is not supported "
|
||||
"in this build.");
|
||||
@@ -2444,6 +2431,48 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
|
||||
return true;
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_LINUX_IO_URING
|
||||
+static inline bool raw_check_linux_io_uring(BDRVRawState *s)
|
||||
+{
|
||||
+ Error *local_err = NULL;
|
||||
+ AioContext *ctx;
|
||||
+
|
||||
+ if (!s->use_linux_io_uring) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ ctx = qemu_get_current_aio_context();
|
||||
+ if (unlikely(!aio_setup_linux_io_uring(ctx, &local_err))) {
|
||||
+ error_reportf_err(local_err, "Unable to use linux io_uring, "
|
||||
+ "falling back to thread pool: ");
|
||||
+ s->use_linux_io_uring = false;
|
||||
+ return false;
|
||||
+ }
|
||||
+ return true;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+#ifdef CONFIG_LINUX_AIO
|
||||
+static inline bool raw_check_linux_aio(BDRVRawState *s)
|
||||
+{
|
||||
+ Error *local_err = NULL;
|
||||
+ AioContext *ctx;
|
||||
+
|
||||
+ if (!s->use_linux_aio) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ ctx = qemu_get_current_aio_context();
|
||||
+ if (unlikely(!aio_setup_linux_aio(ctx, &local_err))) {
|
||||
+ error_reportf_err(local_err, "Unable to use Linux AIO, "
|
||||
+ "falling back to thread pool: ");
|
||||
+ s->use_linux_aio = false;
|
||||
+ return false;
|
||||
+ }
|
||||
+ return true;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
|
||||
uint64_t bytes, QEMUIOVector *qiov, int type)
|
||||
{
|
||||
@@ -2474,13 +2503,13 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
|
||||
if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) {
|
||||
type |= QEMU_AIO_MISALIGNED;
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
- } else if (s->use_linux_io_uring) {
|
||||
+ } else if (raw_check_linux_io_uring(s)) {
|
||||
assert(qiov->size == bytes);
|
||||
ret = luring_co_submit(bs, s->fd, offset, qiov, type);
|
||||
goto out;
|
||||
#endif
|
||||
#ifdef CONFIG_LINUX_AIO
|
||||
- } else if (s->use_linux_aio) {
|
||||
+ } else if (raw_check_linux_aio(s)) {
|
||||
assert(qiov->size == bytes);
|
||||
ret = laio_co_submit(s->fd, offset, qiov, type,
|
||||
s->aio_max_batch);
|
||||
@@ -2567,39 +2596,13 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
|
||||
};
|
||||
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
- if (s->use_linux_io_uring) {
|
||||
+ if (raw_check_linux_io_uring(s)) {
|
||||
return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
|
||||
}
|
||||
#endif
|
||||
return raw_thread_pool_submit(handle_aiocb_flush, &acb);
|
||||
}
|
||||
|
||||
-static void raw_aio_attach_aio_context(BlockDriverState *bs,
|
||||
- AioContext *new_context)
|
||||
-{
|
||||
- BDRVRawState __attribute__((unused)) *s = bs->opaque;
|
||||
-#ifdef CONFIG_LINUX_AIO
|
||||
- if (s->use_linux_aio) {
|
||||
- Error *local_err = NULL;
|
||||
- if (!aio_setup_linux_aio(new_context, &local_err)) {
|
||||
- error_reportf_err(local_err, "Unable to use native AIO, "
|
||||
- "falling back to thread pool: ");
|
||||
- s->use_linux_aio = false;
|
||||
- }
|
||||
- }
|
||||
-#endif
|
||||
-#ifdef CONFIG_LINUX_IO_URING
|
||||
- if (s->use_linux_io_uring) {
|
||||
- Error *local_err = NULL;
|
||||
- if (!aio_setup_linux_io_uring(new_context, &local_err)) {
|
||||
- error_reportf_err(local_err, "Unable to use linux io_uring, "
|
||||
- "falling back to thread pool: ");
|
||||
- s->use_linux_io_uring = false;
|
||||
- }
|
||||
- }
|
||||
-#endif
|
||||
-}
|
||||
-
|
||||
static void raw_close(BlockDriverState *bs)
|
||||
{
|
||||
BDRVRawState *s = bs->opaque;
|
||||
@@ -3896,7 +3899,6 @@ BlockDriver bdrv_file = {
|
||||
.bdrv_co_copy_range_from = raw_co_copy_range_from,
|
||||
.bdrv_co_copy_range_to = raw_co_copy_range_to,
|
||||
.bdrv_refresh_limits = raw_refresh_limits,
|
||||
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||||
|
||||
.bdrv_co_truncate = raw_co_truncate,
|
||||
.bdrv_co_getlength = raw_co_getlength,
|
||||
@@ -4266,7 +4268,6 @@ static BlockDriver bdrv_host_device = {
|
||||
.bdrv_co_copy_range_from = raw_co_copy_range_from,
|
||||
.bdrv_co_copy_range_to = raw_co_copy_range_to,
|
||||
.bdrv_refresh_limits = raw_refresh_limits,
|
||||
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||||
|
||||
.bdrv_co_truncate = raw_co_truncate,
|
||||
.bdrv_co_getlength = raw_co_getlength,
|
||||
@@ -4402,7 +4403,6 @@ static BlockDriver bdrv_host_cdrom = {
|
||||
.bdrv_co_pwritev = raw_co_pwritev,
|
||||
.bdrv_co_flush_to_disk = raw_co_flush_to_disk,
|
||||
.bdrv_refresh_limits = cdrom_refresh_limits,
|
||||
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||||
|
||||
.bdrv_co_truncate = raw_co_truncate,
|
||||
.bdrv_co_getlength = raw_co_getlength,
|
||||
@@ -4528,7 +4528,6 @@ static BlockDriver bdrv_host_cdrom = {
|
||||
.bdrv_co_pwritev = raw_co_pwritev,
|
||||
.bdrv_co_flush_to_disk = raw_co_flush_to_disk,
|
||||
.bdrv_refresh_limits = cdrom_refresh_limits,
|
||||
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||||
|
||||
.bdrv_co_truncate = raw_co_truncate,
|
||||
.bdrv_co_getlength = raw_co_getlength,
|
||||
--
|
||||
2.39.3
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,97 @@
|
||||
From d0514c7d5d6cc1aa140119c95d5ea2c1591b01e9 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 5 Dec 2023 13:20:04 -0500
|
||||
Subject: [PATCH 087/101] block: remove bdrv_co_lock()
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [18/26] a303f861ea5e84d8e89fd51e530fd0cb2da17b89 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
The bdrv_co_lock() and bdrv_co_unlock() functions are already no-ops.
|
||||
Remove them.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20231205182011.1976568-8-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block.c | 10 ----------
|
||||
blockdev.c | 5 -----
|
||||
include/block/block-global-state.h | 14 --------------
|
||||
3 files changed, 29 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 91ace5d2d5..434b7f4d72 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -7431,16 +7431,6 @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx)
|
||||
bdrv_dec_in_flight(bs);
|
||||
}
|
||||
|
||||
-void coroutine_fn bdrv_co_lock(BlockDriverState *bs)
|
||||
-{
|
||||
- /* TODO removed in next patch */
|
||||
-}
|
||||
-
|
||||
-void coroutine_fn bdrv_co_unlock(BlockDriverState *bs)
|
||||
-{
|
||||
- /* TODO removed in next patch */
|
||||
-}
|
||||
-
|
||||
static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
|
||||
{
|
||||
GLOBAL_STATE_CODE();
|
||||
diff --git a/blockdev.c b/blockdev.c
|
||||
index 5d8b3a23eb..3a5e7222ec 100644
|
||||
--- a/blockdev.c
|
||||
+++ b/blockdev.c
|
||||
@@ -2264,18 +2264,13 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
|
||||
return;
|
||||
}
|
||||
|
||||
- bdrv_co_lock(bs);
|
||||
bdrv_drained_begin(bs);
|
||||
- bdrv_co_unlock(bs);
|
||||
|
||||
old_ctx = bdrv_co_enter(bs);
|
||||
blk_co_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp);
|
||||
bdrv_co_leave(bs, old_ctx);
|
||||
|
||||
- bdrv_co_lock(bs);
|
||||
bdrv_drained_end(bs);
|
||||
- bdrv_co_unlock(bs);
|
||||
-
|
||||
blk_co_unref(blk);
|
||||
}
|
||||
|
||||
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
|
||||
index 0327f1c605..4ec0b217f0 100644
|
||||
--- a/include/block/block-global-state.h
|
||||
+++ b/include/block/block-global-state.h
|
||||
@@ -267,20 +267,6 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
|
||||
int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
|
||||
bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
|
||||
|
||||
-/**
|
||||
- * Locks the AioContext of @bs if it's not the current AioContext. This avoids
|
||||
- * double locking which could lead to deadlocks: This is a coroutine_fn, so we
|
||||
- * know we already own the lock of the current AioContext.
|
||||
- *
|
||||
- * May only be called in the main thread.
|
||||
- */
|
||||
-void coroutine_fn bdrv_co_lock(BlockDriverState *bs);
|
||||
-
|
||||
-/**
|
||||
- * Unlocks the AioContext of @bs if it's not the current AioContext.
|
||||
- */
|
||||
-void coroutine_fn bdrv_co_unlock(BlockDriverState *bs);
|
||||
-
|
||||
bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx,
|
||||
GHashTable *visited, Transaction *tran,
|
||||
Error **errp);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,411 @@
|
||||
From dc4eb64185957a01948217814478abc450ce5f26 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 5 Dec 2023 13:20:11 -0500
|
||||
Subject: [PATCH 094/101] block: remove outdated AioContext locking comments
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [25/26] 395e18fb40d28d4bc961acee1a00da7f60748076 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
The AioContext lock no longer exists.
|
||||
|
||||
There is one noteworthy change:
|
||||
|
||||
- * More specifically, these functions use BDRV_POLL_WHILE(bs), which
|
||||
- * requires the caller to be either in the main thread and hold
|
||||
- * the BlockdriverState (bs) AioContext lock, or directly in the
|
||||
- * home thread that runs the bs AioContext. Calling them from
|
||||
- * another thread in another AioContext would cause deadlocks.
|
||||
+ * More specifically, these functions use BDRV_POLL_WHILE(bs), which requires
|
||||
+ * the caller to be either in the main thread or directly in the home thread
|
||||
+ * that runs the bs AioContext. Calling them from another thread in another
|
||||
+ * AioContext would cause deadlocks.
|
||||
|
||||
I am not sure whether deadlocks are still possible. Maybe they have just
|
||||
moved to the fine-grained locks that have replaced the AioContext. Since
|
||||
I am not sure if the deadlocks are gone, I have kept the substance
|
||||
unchanged and just removed mention of the AioContext.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20231205182011.1976568-15-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block.c | 73 ++++++----------------------
|
||||
block/block-backend.c | 8 ---
|
||||
block/export/vhost-user-blk-server.c | 4 --
|
||||
include/block/block-common.h | 3 --
|
||||
include/block/block-io.h | 9 ++--
|
||||
include/block/block_int-common.h | 2 -
|
||||
tests/qemu-iotests/202 | 2 +-
|
||||
tests/qemu-iotests/203 | 3 +-
|
||||
8 files changed, 22 insertions(+), 82 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 434b7f4d72..a097772238 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -1616,11 +1616,6 @@ out:
|
||||
g_free(gen_node_name);
|
||||
}
|
||||
|
||||
-/*
|
||||
- * The caller must always hold @bs AioContext lock, because this function calls
|
||||
- * bdrv_refresh_total_sectors() which polls when called from non-coroutine
|
||||
- * context.
|
||||
- */
|
||||
static int no_coroutine_fn GRAPH_UNLOCKED
|
||||
bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name,
|
||||
QDict *options, int open_flags, Error **errp)
|
||||
@@ -2901,7 +2896,7 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
|
||||
* Replaces the node that a BdrvChild points to without updating permissions.
|
||||
*
|
||||
* If @new_bs is non-NULL, the parent of @child must already be drained through
|
||||
- * @child and the caller must hold the AioContext lock for @new_bs.
|
||||
+ * @child.
|
||||
*/
|
||||
static void GRAPH_WRLOCK
|
||||
bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs)
|
||||
@@ -3041,9 +3036,8 @@ static TransactionActionDrv bdrv_attach_child_common_drv = {
|
||||
*
|
||||
* Returns new created child.
|
||||
*
|
||||
- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
|
||||
- * @child_bs can move to a different AioContext in this function. Callers must
|
||||
- * make sure that their AioContext locking is still correct after this.
|
||||
+ * Both @parent_bs and @child_bs can move to a different AioContext in this
|
||||
+ * function.
|
||||
*/
|
||||
static BdrvChild * GRAPH_WRLOCK
|
||||
bdrv_attach_child_common(BlockDriverState *child_bs,
|
||||
@@ -3142,9 +3136,8 @@ bdrv_attach_child_common(BlockDriverState *child_bs,
|
||||
/*
|
||||
* Function doesn't update permissions, caller is responsible for this.
|
||||
*
|
||||
- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
|
||||
- * @child_bs can move to a different AioContext in this function. Callers must
|
||||
- * make sure that their AioContext locking is still correct after this.
|
||||
+ * Both @parent_bs and @child_bs can move to a different AioContext in this
|
||||
+ * function.
|
||||
*
|
||||
* After calling this function, the transaction @tran may only be completed
|
||||
* while holding a writer lock for the graph.
|
||||
@@ -3184,9 +3177,6 @@ bdrv_attach_child_noperm(BlockDriverState *parent_bs,
|
||||
*
|
||||
* On failure NULL is returned, errp is set and the reference to
|
||||
* child_bs is also dropped.
|
||||
- *
|
||||
- * The caller must hold the AioContext lock @child_bs, but not that of @ctx
|
||||
- * (unless @child_bs is already in @ctx).
|
||||
*/
|
||||
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
|
||||
const char *child_name,
|
||||
@@ -3226,9 +3216,6 @@ out:
|
||||
*
|
||||
* On failure NULL is returned, errp is set and the reference to
|
||||
* child_bs is also dropped.
|
||||
- *
|
||||
- * If @parent_bs and @child_bs are in different AioContexts, the caller must
|
||||
- * hold the AioContext lock for @child_bs, but not for @parent_bs.
|
||||
*/
|
||||
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
|
||||
BlockDriverState *child_bs,
|
||||
@@ -3418,9 +3405,8 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
|
||||
*
|
||||
* Function doesn't update permissions, caller is responsible for this.
|
||||
*
|
||||
- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
|
||||
- * @child_bs can move to a different AioContext in this function. Callers must
|
||||
- * make sure that their AioContext locking is still correct after this.
|
||||
+ * Both @parent_bs and @child_bs can move to a different AioContext in this
|
||||
+ * function.
|
||||
*
|
||||
* After calling this function, the transaction @tran may only be completed
|
||||
* while holding a writer lock for the graph.
|
||||
@@ -3513,9 +3499,8 @@ out:
|
||||
}
|
||||
|
||||
/*
|
||||
- * The caller must hold the AioContext lock for @backing_hd. Both @bs and
|
||||
- * @backing_hd can move to a different AioContext in this function. Callers must
|
||||
- * make sure that their AioContext locking is still correct after this.
|
||||
+ * Both @bs and @backing_hd can move to a different AioContext in this
|
||||
+ * function.
|
||||
*
|
||||
* If a backing child is already present (i.e. we're detaching a node), that
|
||||
* child node must be drained.
|
||||
@@ -3574,8 +3559,6 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
|
||||
* itself, all options starting with "${bdref_key}." are considered part of the
|
||||
* BlockdevRef.
|
||||
*
|
||||
- * The caller must hold the main AioContext lock.
|
||||
- *
|
||||
* TODO Can this be unified with bdrv_open_image()?
|
||||
*/
|
||||
int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
|
||||
@@ -3745,9 +3728,7 @@ done:
|
||||
*
|
||||
* The BlockdevRef will be removed from the options QDict.
|
||||
*
|
||||
- * The caller must hold the lock of the main AioContext and no other AioContext.
|
||||
- * @parent can move to a different AioContext in this function. Callers must
|
||||
- * make sure that their AioContext locking is still correct after this.
|
||||
+ * @parent can move to a different AioContext in this function.
|
||||
*/
|
||||
BdrvChild *bdrv_open_child(const char *filename,
|
||||
QDict *options, const char *bdref_key,
|
||||
@@ -3778,9 +3759,7 @@ BdrvChild *bdrv_open_child(const char *filename,
|
||||
/*
|
||||
* Wrapper on bdrv_open_child() for most popular case: open primary child of bs.
|
||||
*
|
||||
- * The caller must hold the lock of the main AioContext and no other AioContext.
|
||||
- * @parent can move to a different AioContext in this function. Callers must
|
||||
- * make sure that their AioContext locking is still correct after this.
|
||||
+ * @parent can move to a different AioContext in this function.
|
||||
*/
|
||||
int bdrv_open_file_child(const char *filename,
|
||||
QDict *options, const char *bdref_key,
|
||||
@@ -3923,8 +3902,6 @@ out:
|
||||
* The reference parameter may be used to specify an existing block device which
|
||||
* should be opened. If specified, neither options nor a filename may be given,
|
||||
* nor can an existing BDS be reused (that is, *pbs has to be NULL).
|
||||
- *
|
||||
- * The caller must always hold the main AioContext lock.
|
||||
*/
|
||||
static BlockDriverState * no_coroutine_fn
|
||||
bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
|
||||
@@ -4217,7 +4194,6 @@ close_and_fail:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
-/* The caller must always hold the main AioContext lock. */
|
||||
BlockDriverState *bdrv_open(const char *filename, const char *reference,
|
||||
QDict *options, int flags, Error **errp)
|
||||
{
|
||||
@@ -4665,10 +4641,7 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
|
||||
*
|
||||
* Return 0 on success, otherwise return < 0 and set @errp.
|
||||
*
|
||||
- * The caller must hold the AioContext lock of @reopen_state->bs.
|
||||
* @reopen_state->bs can move to a different AioContext in this function.
|
||||
- * Callers must make sure that their AioContext locking is still correct after
|
||||
- * this.
|
||||
*/
|
||||
static int GRAPH_UNLOCKED
|
||||
bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
|
||||
@@ -4801,8 +4774,6 @@ out_rdlock:
|
||||
* It is the responsibility of the caller to then call the abort() or
|
||||
* commit() for any other BDS that have been left in a prepare() state
|
||||
*
|
||||
- * The caller must hold the AioContext lock of @reopen_state->bs.
|
||||
- *
|
||||
* After calling this function, the transaction @change_child_tran may only be
|
||||
* completed while holding a writer lock for the graph.
|
||||
*/
|
||||
@@ -5437,8 +5408,6 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
|
||||
* child.
|
||||
*
|
||||
* This function does not create any image files.
|
||||
- *
|
||||
- * The caller must hold the AioContext lock for @bs_top.
|
||||
*/
|
||||
int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
|
||||
Error **errp)
|
||||
@@ -5545,9 +5514,8 @@ static void bdrv_delete(BlockDriverState *bs)
|
||||
* after the call (even on failure), so if the caller intends to reuse the
|
||||
* dictionary, it needs to use qobject_ref() before calling bdrv_open.
|
||||
*
|
||||
- * The caller holds the AioContext lock for @bs. It must make sure that @bs
|
||||
- * stays in the same AioContext, i.e. @options must not refer to nodes in a
|
||||
- * different AioContext.
|
||||
+ * The caller must make sure that @bs stays in the same AioContext, i.e.
|
||||
+ * @options must not refer to nodes in a different AioContext.
|
||||
*/
|
||||
BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
|
||||
int flags, Error **errp)
|
||||
@@ -7565,10 +7533,6 @@ static TransactionActionDrv set_aio_context = {
|
||||
*
|
||||
* Must be called from the main AioContext.
|
||||
*
|
||||
- * The caller must own the AioContext lock for the old AioContext of bs, but it
|
||||
- * must not own the AioContext lock for new_context (unless new_context is the
|
||||
- * same as the current context of bs).
|
||||
- *
|
||||
* @visited will accumulate all visited BdrvChild objects. The caller is
|
||||
* responsible for freeing the list afterwards.
|
||||
*/
|
||||
@@ -7621,13 +7585,6 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
|
||||
*
|
||||
* If ignore_child is not NULL, that child (and its subgraph) will not
|
||||
* be touched.
|
||||
- *
|
||||
- * This function still requires the caller to take the bs current
|
||||
- * AioContext lock, otherwise draining will fail since AIO_WAIT_WHILE
|
||||
- * assumes the lock is always held if bs is in another AioContext.
|
||||
- * For the same reason, it temporarily also holds the new AioContext, since
|
||||
- * bdrv_drained_end calls BDRV_POLL_WHILE that assumes the lock is taken too.
|
||||
- * Therefore the new AioContext lock must not be taken by the caller.
|
||||
*/
|
||||
int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
|
||||
BdrvChild *ignore_child, Error **errp)
|
||||
@@ -7653,8 +7610,8 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
|
||||
|
||||
/*
|
||||
* Linear phase: go through all callbacks collected in the transaction.
|
||||
- * Run all callbacks collected in the recursion to switch all nodes
|
||||
- * AioContext lock (transaction commit), or undo all changes done in the
|
||||
+ * Run all callbacks collected in the recursion to switch every node's
|
||||
+ * AioContext (transaction commit), or undo all changes done in the
|
||||
* recursion (transaction abort).
|
||||
*/
|
||||
|
||||
diff --git a/block/block-backend.c b/block/block-backend.c
|
||||
index f412bed274..209eb07528 100644
|
||||
--- a/block/block-backend.c
|
||||
+++ b/block/block-backend.c
|
||||
@@ -390,8 +390,6 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
|
||||
* Both sets of permissions can be changed later using blk_set_perm().
|
||||
*
|
||||
* Return the new BlockBackend on success, null on failure.
|
||||
- *
|
||||
- * Callers must hold the AioContext lock of @bs.
|
||||
*/
|
||||
BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
|
||||
uint64_t shared_perm, Error **errp)
|
||||
@@ -416,8 +414,6 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
|
||||
* Just as with bdrv_open(), after having called this function the reference to
|
||||
* @options belongs to the block layer (even on failure).
|
||||
*
|
||||
- * Called without holding an AioContext lock.
|
||||
- *
|
||||
* TODO: Remove @filename and @flags; it should be possible to specify a whole
|
||||
* BDS tree just by specifying the @options QDict (or @reference,
|
||||
* alternatively). At the time of adding this function, this is not possible,
|
||||
@@ -872,8 +868,6 @@ BlockBackend *blk_by_public(BlockBackendPublic *public)
|
||||
|
||||
/*
|
||||
* Disassociates the currently associated BlockDriverState from @blk.
|
||||
- *
|
||||
- * The caller must hold the AioContext lock for the BlockBackend.
|
||||
*/
|
||||
void blk_remove_bs(BlockBackend *blk)
|
||||
{
|
||||
@@ -915,8 +909,6 @@ void blk_remove_bs(BlockBackend *blk)
|
||||
|
||||
/*
|
||||
* Associates a new BlockDriverState with @blk.
|
||||
- *
|
||||
- * Callers must hold the AioContext lock of @bs.
|
||||
*/
|
||||
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
|
||||
{
|
||||
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
|
||||
index 16f48388d3..50c358e8cd 100644
|
||||
--- a/block/export/vhost-user-blk-server.c
|
||||
+++ b/block/export/vhost-user-blk-server.c
|
||||
@@ -278,7 +278,6 @@ static void vu_blk_exp_resize(void *opaque)
|
||||
vu_config_change_msg(&vexp->vu_server.vu_dev);
|
||||
}
|
||||
|
||||
-/* Called with vexp->export.ctx acquired */
|
||||
static void vu_blk_drained_begin(void *opaque)
|
||||
{
|
||||
VuBlkExport *vexp = opaque;
|
||||
@@ -287,7 +286,6 @@ static void vu_blk_drained_begin(void *opaque)
|
||||
vhost_user_server_detach_aio_context(&vexp->vu_server);
|
||||
}
|
||||
|
||||
-/* Called with vexp->export.blk AioContext acquired */
|
||||
static void vu_blk_drained_end(void *opaque)
|
||||
{
|
||||
VuBlkExport *vexp = opaque;
|
||||
@@ -300,8 +298,6 @@ static void vu_blk_drained_end(void *opaque)
|
||||
* Ensures that bdrv_drained_begin() waits until in-flight requests complete
|
||||
* and the server->co_trip coroutine has terminated. It will be restarted in
|
||||
* vhost_user_server_attach_aio_context().
|
||||
- *
|
||||
- * Called with vexp->export.ctx acquired.
|
||||
*/
|
||||
static bool vu_blk_drained_poll(void *opaque)
|
||||
{
|
||||
diff --git a/include/block/block-common.h b/include/block/block-common.h
|
||||
index d7599564db..a846023a09 100644
|
||||
--- a/include/block/block-common.h
|
||||
+++ b/include/block/block-common.h
|
||||
@@ -70,9 +70,6 @@
|
||||
* automatically takes the graph rdlock when calling the wrapped function. In
|
||||
* the same way, no_co_wrapper_bdrv_wrlock functions automatically take the
|
||||
* graph wrlock.
|
||||
- *
|
||||
- * If the first parameter of the function is a BlockDriverState, BdrvChild or
|
||||
- * BlockBackend pointer, the AioContext lock for it is taken in the wrapper.
|
||||
*/
|
||||
#define no_co_wrapper
|
||||
#define no_co_wrapper_bdrv_rdlock
|
||||
diff --git a/include/block/block-io.h b/include/block/block-io.h
|
||||
index 8eb39a858b..b49e0537dd 100644
|
||||
--- a/include/block/block-io.h
|
||||
+++ b/include/block/block-io.h
|
||||
@@ -332,11 +332,10 @@ bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
|
||||
* "I/O or GS" API functions. These functions can run without
|
||||
* the BQL, but only in one specific iothread/main loop.
|
||||
*
|
||||
- * More specifically, these functions use BDRV_POLL_WHILE(bs), which
|
||||
- * requires the caller to be either in the main thread and hold
|
||||
- * the BlockdriverState (bs) AioContext lock, or directly in the
|
||||
- * home thread that runs the bs AioContext. Calling them from
|
||||
- * another thread in another AioContext would cause deadlocks.
|
||||
+ * More specifically, these functions use BDRV_POLL_WHILE(bs), which requires
|
||||
+ * the caller to be either in the main thread or directly in the home thread
|
||||
+ * that runs the bs AioContext. Calling them from another thread in another
|
||||
+ * AioContext would cause deadlocks.
|
||||
*
|
||||
* Therefore, these functions are not proper I/O, because they
|
||||
* can't run in *any* iothreads, but only in a specific one.
|
||||
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
|
||||
index 4e31d161c5..151279d481 100644
|
||||
--- a/include/block/block_int-common.h
|
||||
+++ b/include/block/block_int-common.h
|
||||
@@ -1192,8 +1192,6 @@ struct BlockDriverState {
|
||||
/* The error object in use for blocking operations on backing_hd */
|
||||
Error *backing_blocker;
|
||||
|
||||
- /* Protected by AioContext lock */
|
||||
-
|
||||
/*
|
||||
* If we are reading a disk image, give its size in sectors.
|
||||
* Generally read-only; it is written to by load_snapshot and
|
||||
diff --git a/tests/qemu-iotests/202 b/tests/qemu-iotests/202
|
||||
index b784dcd791..13304242e5 100755
|
||||
--- a/tests/qemu-iotests/202
|
||||
+++ b/tests/qemu-iotests/202
|
||||
@@ -21,7 +21,7 @@
|
||||
# Check that QMP 'transaction' blockdev-snapshot-sync with multiple drives on a
|
||||
# single IOThread completes successfully. This particular command triggered a
|
||||
# hang due to recursive AioContext locking and BDRV_POLL_WHILE(). Protect
|
||||
-# against regressions.
|
||||
+# against regressions even though the AioContext lock no longer exists.
|
||||
|
||||
import iotests
|
||||
|
||||
diff --git a/tests/qemu-iotests/203 b/tests/qemu-iotests/203
|
||||
index ab80fd0e44..1ba878522b 100755
|
||||
--- a/tests/qemu-iotests/203
|
||||
+++ b/tests/qemu-iotests/203
|
||||
@@ -21,7 +21,8 @@
|
||||
# Check that QMP 'migrate' with multiple drives on a single IOThread completes
|
||||
# successfully. This particular command triggered a hang in the source QEMU
|
||||
# process due to recursive AioContext locking in bdrv_invalidate_all() and
|
||||
-# BDRV_POLL_WHILE().
|
||||
+# BDRV_POLL_WHILE(). Protect against regressions even though the AioContext
|
||||
+# lock no longer exists.
|
||||
|
||||
import iotests
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,105 @@
|
||||
From 95b2ffc5f01dc4309c2e747ed883d22cd1d26347 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Sat, 2 Mar 2024 17:00:23 +0100
|
||||
Subject: [PATCH 2/2] chardev/char-socket: Fix TLS io channels sending too much
|
||||
data to the backend
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Thomas Huth <thuth@redhat.com>
|
||||
RH-MergeRequest: 227: Fix TLS io channels sending too much data to the backend
|
||||
RH-Jira: RHEL-24614
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
RH-Commit: [1/1] fce871914e0ce52e16a6edae0e007513f9fec1ae (thuth/qemu-kvm-cs9)
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-24614
|
||||
|
||||
commit 462945cd22d2bcd233401ed3aa167d83a8e35b05
|
||||
Author: Thomas Huth <thuth@redhat.com>
|
||||
Date: Thu Feb 29 11:43:37 2024 +0100
|
||||
|
||||
chardev/char-socket: Fix TLS io channels sending too much data to the backend
|
||||
|
||||
Commit ffda5db65a ("io/channel-tls: fix handling of bigger read buffers")
|
||||
changed the behavior of the TLS io channels to schedule a second reading
|
||||
attempt if there is still incoming data pending. This caused a regression
|
||||
with backends like the sclpconsole that check in their read function that
|
||||
the sender does not try to write more bytes to it than the device can
|
||||
currently handle.
|
||||
|
||||
The problem can be reproduced like this:
|
||||
|
||||
1) In one terminal, do this:
|
||||
|
||||
mkdir qemu-pki
|
||||
cd qemu-pki
|
||||
openssl genrsa 2048 > ca-key.pem
|
||||
openssl req -new -x509 -nodes -days 365000 -key ca-key.pem -out ca-cert.pem
|
||||
# enter some dummy value for the cert
|
||||
openssl genrsa 2048 > server-key.pem
|
||||
openssl req -new -x509 -nodes -days 365000 -key server-key.pem \
|
||||
-out server-cert.pem
|
||||
# enter some other dummy values for the cert
|
||||
|
||||
gnutls-serv --echo --x509cafile ca-cert.pem --x509keyfile server-key.pem \
|
||||
--x509certfile server-cert.pem -p 8338
|
||||
|
||||
2) In another terminal, do this:
|
||||
|
||||
wget https://download.fedoraproject.org/pub/fedora-secondary/releases/39/Cloud/s390x/images/Fedora-Cloud-Base-39-1.5.s390x.qcow2
|
||||
|
||||
qemu-system-s390x -nographic -nodefaults \
|
||||
-hda Fedora-Cloud-Base-39-1.5.s390x.qcow2 \
|
||||
-object tls-creds-x509,id=tls0,endpoint=client,verify-peer=false,dir=$PWD/qemu-pki \
|
||||
-chardev socket,id=tls_chardev,host=localhost,port=8338,tls-creds=tls0 \
|
||||
-device sclpconsole,chardev=tls_chardev,id=tls_serial
|
||||
|
||||
QEMU then aborts after a second or two with:
|
||||
|
||||
qemu-system-s390x: ../hw/char/sclpconsole.c:73: chr_read: Assertion
|
||||
`size <= SIZE_BUFFER_VT220 - scon->iov_data_len' failed.
|
||||
Aborted (core dumped)
|
||||
|
||||
It looks like the second read does not trigger the chr_can_read() function
|
||||
to be called before the second read, which should normally always be done
|
||||
before sending bytes to a character device to see how much it can handle,
|
||||
so the s->max_size in tcp_chr_read() still contains the old value from the
|
||||
previous read. Let's make sure that we use the up-to-date value by calling
|
||||
tcp_chr_read_poll() again here.
|
||||
|
||||
Fixes: ffda5db65a ("io/channel-tls: fix handling of bigger read buffers")
|
||||
Buglink: https://issues.redhat.com/browse/RHEL-24614
|
||||
Reviewed-by: "Daniel P. Berrangé" <berrange@redhat.com>
|
||||
Message-ID: <20240229104339.42574-1-thuth@redhat.com>
|
||||
Reviewed-by: Antoine Damhet <antoine.damhet@blade-group.com>
|
||||
Tested-by: Antoine Damhet <antoine.damhet@blade-group.com>
|
||||
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
chardev/char-socket.c | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
|
||||
index 73947da188..034840593d 100644
|
||||
--- a/chardev/char-socket.c
|
||||
+++ b/chardev/char-socket.c
|
||||
@@ -492,9 +492,9 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
|
||||
s->max_size <= 0) {
|
||||
return TRUE;
|
||||
}
|
||||
- len = sizeof(buf);
|
||||
- if (len > s->max_size) {
|
||||
- len = s->max_size;
|
||||
+ len = tcp_chr_read_poll(opaque);
|
||||
+ if (len > sizeof(buf)) {
|
||||
+ len = sizeof(buf);
|
||||
}
|
||||
size = tcp_chr_recv(chr, (void *)buf, len);
|
||||
if (size == 0 || (size == -1 && errno != EAGAIN)) {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,78 @@
|
||||
From 4d4102f6e2f9afd6182888787ae8b570347df87d Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= <berrange@redhat.com>
|
||||
Date: Mon, 18 Mar 2024 18:06:59 +0000
|
||||
Subject: [PATCH 1/3] chardev: lower priority of the HUP GSource in socket
|
||||
chardev
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Daniel P. Berrangé <berrange@redhat.com>
|
||||
RH-MergeRequest: 233: Fix handling of TLS sessions in chardevs
|
||||
RH-Jira: RHEL-24614
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
RH-Commit: [1/3] 842f54349191b0206e68f35a7a80155f5a584942 (berrange/centos-src-qemu)
|
||||
|
||||
The socket chardev often has 2 GSource object registered against the
|
||||
same FD. One is registered all the time and is just intended to handle
|
||||
POLLHUP events, while the other gets registered & unregistered on the
|
||||
fly as the frontend is ready to receive more data or not.
|
||||
|
||||
It is very common for poll() to signal a POLLHUP event at the same time
|
||||
as there is pending incoming data from the disconnected client. It is
|
||||
therefore essential to process incoming data prior to processing HUP.
|
||||
The problem with having 2 GSource on the same FD is that there is no
|
||||
guaranteed ordering of execution between them, so the chardev code may
|
||||
process HUP first and thus discard data.
|
||||
|
||||
This failure scenario is non-deterministic but can be seen fairly
|
||||
reliably by reverting a7077b8e354d90fec26c2921aa2dea85b90dff90, and
|
||||
then running 'tests/unit/test-char', which will sometimes fail with
|
||||
missing data.
|
||||
|
||||
Ideally QEMU would only have 1 GSource, but that's a complex code
|
||||
refactoring job. The next best solution is to try to ensure ordering
|
||||
between the 2 GSource objects. This can be achieved by lowering the
|
||||
priority of the HUP GSource, so that it is never dispatched if the
|
||||
main GSource is also ready to dispatch. Counter-intuitively, lowering
|
||||
the priority of a GSource is done by raising its priority number.
|
||||
|
||||
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
(cherry picked from commit 8bd8b04adc9f18904f323dff085f8b4ec77915c6)
|
||||
---
|
||||
chardev/char-socket.c | 16 ++++++++++++++++
|
||||
1 file changed, 16 insertions(+)
|
||||
|
||||
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
|
||||
index 034840593d..f48d341ebc 100644
|
||||
--- a/chardev/char-socket.c
|
||||
+++ b/chardev/char-socket.c
|
||||
@@ -597,6 +597,22 @@ static void update_ioc_handlers(SocketChardev *s)
|
||||
|
||||
remove_hup_source(s);
|
||||
s->hup_source = qio_channel_create_watch(s->ioc, G_IO_HUP);
|
||||
+ /*
|
||||
+ * poll() is liable to return POLLHUP even when there is
|
||||
+ * still incoming data available to read on the FD. If
|
||||
+ * we have the hup_source at the same priority as the
|
||||
+ * main io_add_watch_poll GSource, then we might end up
|
||||
+ * processing the POLLHUP event first, closing the FD,
|
||||
+ * and as a result silently discard data we should have
|
||||
+ * read.
|
||||
+ *
|
||||
+ * By setting the hup_source to G_PRIORITY_DEFAULT + 1,
|
||||
+ * we ensure that io_add_watch_poll GSource will always
|
||||
+ * be dispatched first, thus guaranteeing we will be
|
||||
+ * able to process all incoming data before closing the
|
||||
+ * FD
|
||||
+ */
|
||||
+ g_source_set_priority(s->hup_source, G_PRIORITY_DEFAULT + 1);
|
||||
g_source_set_callback(s->hup_source, (GSourceFunc)tcp_chr_hup,
|
||||
chr, NULL);
|
||||
g_source_attach(s->hup_source, chr->gcontext);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,55 +0,0 @@
|
||||
From 961bc392ee60743344236ddd247ab646a0eec914 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 07/21] checkpatch: add qemu_bh_new/aio_bh_new checks
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [3/13] e0473487f0e3186c42559a5c36a8650f27ab26ae (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit ef56ffbdd6b0605dc1e305611287b948c970e236
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:08 2023 -0400
|
||||
|
||||
checkpatch: add qemu_bh_new/aio_bh_new checks
|
||||
|
||||
Advise authors to use the _guarded versions of the APIs, instead.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Message-Id: <20230427211013.2994127-4-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
scripts/checkpatch.pl | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
|
||||
index d768171dcf..eeaec436eb 100755
|
||||
--- a/scripts/checkpatch.pl
|
||||
+++ b/scripts/checkpatch.pl
|
||||
@@ -2865,6 +2865,14 @@ sub process {
|
||||
if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) {
|
||||
ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr);
|
||||
}
|
||||
+# recommend qemu_bh_new_guarded instead of qemu_bh_new
|
||||
+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) {
|
||||
+ ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr);
|
||||
+ }
|
||||
+# recommend aio_bh_new_guarded instead of aio_bh_new
|
||||
+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) {
|
||||
+ ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr);
|
||||
+ }
|
||||
# check for module_init(), use category-specific init macros explicitly please
|
||||
if ($line =~ /^module_init\s*\(/) {
|
||||
ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,412 @@
|
||||
From e99c56752a1c4021a93c92b7be78856ebefaa1b3 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Mon, 18 Mar 2024 14:34:29 -0400
|
||||
Subject: [PATCH 1/2] coroutine: cap per-thread local pool size
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 234: coroutine: cap per-thread local pool size
|
||||
RH-Jira: RHEL-28947
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Commit: [1/2] 5971de1c1e238457925bfb9c4bfc932de857b28d (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
The coroutine pool implementation can hit the Linux vm.max_map_count
|
||||
limit, causing QEMU to abort with "failed to allocate memory for stack"
|
||||
or "failed to set up stack guard page" during coroutine creation.
|
||||
|
||||
This happens because per-thread pools can grow to tens of thousands of
|
||||
coroutines. Each coroutine causes 2 virtual memory areas to be created.
|
||||
Eventually vm.max_map_count is reached and memory-related syscalls fail.
|
||||
The per-thread pool sizes are non-uniform and depend on past coroutine
|
||||
usage in each thread, so it's possible for one thread to have a large
|
||||
pool while another thread's pool is empty.
|
||||
|
||||
Switch to a new coroutine pool implementation with a global pool that
|
||||
grows to a maximum number of coroutines and per-thread local pools that
|
||||
are capped at hardcoded small number of coroutines.
|
||||
|
||||
This approach does not leave large numbers of coroutines pooled in a
|
||||
thread that may not use them again. In order to perform well it
|
||||
amortizes the cost of global pool accesses by working in batches of
|
||||
coroutines instead of individual coroutines.
|
||||
|
||||
The global pool is a list. Threads donate batches of coroutines to when
|
||||
they have too many and take batches from when they have too few:
|
||||
|
||||
.-----------------------------------.
|
||||
| Batch 1 | Batch 2 | Batch 3 | ... | global_pool
|
||||
`-----------------------------------'
|
||||
|
||||
Each thread has up to 2 batches of coroutines:
|
||||
|
||||
.-------------------.
|
||||
| Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches)
|
||||
`-------------------'
|
||||
|
||||
The goal of this change is to reduce the excessive number of pooled
|
||||
coroutines that cause QEMU to abort when vm.max_map_count is reached
|
||||
without losing the performance of an adequately sized coroutine pool.
|
||||
|
||||
Here are virtio-blk disk I/O benchmark results:
|
||||
|
||||
RW BLKSIZE IODEPTH OLD NEW CHANGE
|
||||
randread 4k 1 113725 117451 +3.3%
|
||||
randread 4k 8 192968 198510 +2.9%
|
||||
randread 4k 16 207138 209429 +1.1%
|
||||
randread 4k 32 212399 215145 +1.3%
|
||||
randread 4k 64 218319 221277 +1.4%
|
||||
randread 128k 1 17587 17535 -0.3%
|
||||
randread 128k 8 17614 17616 +0.0%
|
||||
randread 128k 16 17608 17609 +0.0%
|
||||
randread 128k 32 17552 17553 +0.0%
|
||||
randread 128k 64 17484 17484 +0.0%
|
||||
|
||||
See files/{fio.sh,test.xml.j2} for the benchmark configuration:
|
||||
https://gitlab.com/stefanha/virt-playbooks/-/tree/coroutine-pool-fix-sizing
|
||||
|
||||
Buglink: https://issues.redhat.com/browse/RHEL-28947
|
||||
Reported-by: Sanjay Rao <srao@redhat.com>
|
||||
Reported-by: Boaz Ben Shabat <bbenshab@redhat.com>
|
||||
Reported-by: Joe Mario <jmario@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20240318183429.1039340-1-stefanha@redhat.com>
|
||||
(cherry picked from commit 86a637e48104ae74d8be53bed6441ce32be33433)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
util/qemu-coroutine.c | 282 +++++++++++++++++++++++++++++++++---------
|
||||
1 file changed, 223 insertions(+), 59 deletions(-)
|
||||
|
||||
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
|
||||
index 5fd2dbaf8b..2790959eaf 100644
|
||||
--- a/util/qemu-coroutine.c
|
||||
+++ b/util/qemu-coroutine.c
|
||||
@@ -18,39 +18,200 @@
|
||||
#include "qemu/atomic.h"
|
||||
#include "qemu/coroutine_int.h"
|
||||
#include "qemu/coroutine-tls.h"
|
||||
+#include "qemu/cutils.h"
|
||||
#include "block/aio.h"
|
||||
|
||||
-/**
|
||||
- * The minimal batch size is always 64, coroutines from the release_pool are
|
||||
- * reused as soon as there are 64 coroutines in it. The maximum pool size starts
|
||||
- * with 64 and is increased on demand so that coroutines are not deleted even if
|
||||
- * they are not immediately reused.
|
||||
- */
|
||||
enum {
|
||||
- POOL_MIN_BATCH_SIZE = 64,
|
||||
- POOL_INITIAL_MAX_SIZE = 64,
|
||||
+ COROUTINE_POOL_BATCH_MAX_SIZE = 128,
|
||||
};
|
||||
|
||||
-/** Free list to speed up creation */
|
||||
-static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
|
||||
-static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE;
|
||||
-static unsigned int release_pool_size;
|
||||
+/*
|
||||
+ * Coroutine creation and deletion is expensive so a pool of unused coroutines
|
||||
+ * is kept as a cache. When the pool has coroutines available, they are
|
||||
+ * recycled instead of creating new ones from scratch. Coroutines are added to
|
||||
+ * the pool upon termination.
|
||||
+ *
|
||||
+ * The pool is global but each thread maintains a small local pool to avoid
|
||||
+ * global pool contention. Threads fetch and return batches of coroutines from
|
||||
+ * the global pool to maintain their local pool. The local pool holds up to two
|
||||
+ * batches whereas the maximum size of the global pool is controlled by the
|
||||
+ * qemu_coroutine_inc_pool_size() API.
|
||||
+ *
|
||||
+ * .-----------------------------------.
|
||||
+ * | Batch 1 | Batch 2 | Batch 3 | ... | global_pool
|
||||
+ * `-----------------------------------'
|
||||
+ *
|
||||
+ * .-------------------.
|
||||
+ * | Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches)
|
||||
+ * `-------------------'
|
||||
+ */
|
||||
+typedef struct CoroutinePoolBatch {
|
||||
+ /* Batches are kept in a list */
|
||||
+ QSLIST_ENTRY(CoroutinePoolBatch) next;
|
||||
+
|
||||
+ /* This batch holds up to @COROUTINE_POOL_BATCH_MAX_SIZE coroutines */
|
||||
+ QSLIST_HEAD(, Coroutine) list;
|
||||
+ unsigned int size;
|
||||
+} CoroutinePoolBatch;
|
||||
+
|
||||
+typedef QSLIST_HEAD(, CoroutinePoolBatch) CoroutinePool;
|
||||
+
|
||||
+/* Host operating system limit on number of pooled coroutines */
|
||||
+static unsigned int global_pool_hard_max_size;
|
||||
+
|
||||
+static QemuMutex global_pool_lock; /* protects the following variables */
|
||||
+static CoroutinePool global_pool = QSLIST_HEAD_INITIALIZER(global_pool);
|
||||
+static unsigned int global_pool_size;
|
||||
+static unsigned int global_pool_max_size = COROUTINE_POOL_BATCH_MAX_SIZE;
|
||||
+
|
||||
+QEMU_DEFINE_STATIC_CO_TLS(CoroutinePool, local_pool);
|
||||
+QEMU_DEFINE_STATIC_CO_TLS(Notifier, local_pool_cleanup_notifier);
|
||||
|
||||
-typedef QSLIST_HEAD(, Coroutine) CoroutineQSList;
|
||||
-QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool);
|
||||
-QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size);
|
||||
-QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier);
|
||||
+static CoroutinePoolBatch *coroutine_pool_batch_new(void)
|
||||
+{
|
||||
+ CoroutinePoolBatch *batch = g_new(CoroutinePoolBatch, 1);
|
||||
+
|
||||
+ QSLIST_INIT(&batch->list);
|
||||
+ batch->size = 0;
|
||||
+ return batch;
|
||||
+}
|
||||
|
||||
-static void coroutine_pool_cleanup(Notifier *n, void *value)
|
||||
+static void coroutine_pool_batch_delete(CoroutinePoolBatch *batch)
|
||||
{
|
||||
Coroutine *co;
|
||||
Coroutine *tmp;
|
||||
- CoroutineQSList *alloc_pool = get_ptr_alloc_pool();
|
||||
|
||||
- QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) {
|
||||
- QSLIST_REMOVE_HEAD(alloc_pool, pool_next);
|
||||
+ QSLIST_FOREACH_SAFE(co, &batch->list, pool_next, tmp) {
|
||||
+ QSLIST_REMOVE_HEAD(&batch->list, pool_next);
|
||||
qemu_coroutine_delete(co);
|
||||
}
|
||||
+ g_free(batch);
|
||||
+}
|
||||
+
|
||||
+static void local_pool_cleanup(Notifier *n, void *value)
|
||||
+{
|
||||
+ CoroutinePool *local_pool = get_ptr_local_pool();
|
||||
+ CoroutinePoolBatch *batch;
|
||||
+ CoroutinePoolBatch *tmp;
|
||||
+
|
||||
+ QSLIST_FOREACH_SAFE(batch, local_pool, next, tmp) {
|
||||
+ QSLIST_REMOVE_HEAD(local_pool, next);
|
||||
+ coroutine_pool_batch_delete(batch);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Ensure the atexit notifier is registered */
|
||||
+static void local_pool_cleanup_init_once(void)
|
||||
+{
|
||||
+ Notifier *notifier = get_ptr_local_pool_cleanup_notifier();
|
||||
+ if (!notifier->notify) {
|
||||
+ notifier->notify = local_pool_cleanup;
|
||||
+ qemu_thread_atexit_add(notifier);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Helper to get the next unused coroutine from the local pool */
|
||||
+static Coroutine *coroutine_pool_get_local(void)
|
||||
+{
|
||||
+ CoroutinePool *local_pool = get_ptr_local_pool();
|
||||
+ CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool);
|
||||
+ Coroutine *co;
|
||||
+
|
||||
+ if (unlikely(!batch)) {
|
||||
+ return NULL;
|
||||
+ }
|
||||
+
|
||||
+ co = QSLIST_FIRST(&batch->list);
|
||||
+ QSLIST_REMOVE_HEAD(&batch->list, pool_next);
|
||||
+ batch->size--;
|
||||
+
|
||||
+ if (batch->size == 0) {
|
||||
+ QSLIST_REMOVE_HEAD(local_pool, next);
|
||||
+ coroutine_pool_batch_delete(batch);
|
||||
+ }
|
||||
+ return co;
|
||||
+}
|
||||
+
|
||||
+/* Get the next batch from the global pool */
|
||||
+static void coroutine_pool_refill_local(void)
|
||||
+{
|
||||
+ CoroutinePool *local_pool = get_ptr_local_pool();
|
||||
+ CoroutinePoolBatch *batch;
|
||||
+
|
||||
+ WITH_QEMU_LOCK_GUARD(&global_pool_lock) {
|
||||
+ batch = QSLIST_FIRST(&global_pool);
|
||||
+
|
||||
+ if (batch) {
|
||||
+ QSLIST_REMOVE_HEAD(&global_pool, next);
|
||||
+ global_pool_size -= batch->size;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (batch) {
|
||||
+ QSLIST_INSERT_HEAD(local_pool, batch, next);
|
||||
+ local_pool_cleanup_init_once();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Add a batch of coroutines to the global pool */
|
||||
+static void coroutine_pool_put_global(CoroutinePoolBatch *batch)
|
||||
+{
|
||||
+ WITH_QEMU_LOCK_GUARD(&global_pool_lock) {
|
||||
+ unsigned int max = MIN(global_pool_max_size,
|
||||
+ global_pool_hard_max_size);
|
||||
+
|
||||
+ if (global_pool_size < max) {
|
||||
+ QSLIST_INSERT_HEAD(&global_pool, batch, next);
|
||||
+
|
||||
+ /* Overshooting the max pool size is allowed */
|
||||
+ global_pool_size += batch->size;
|
||||
+ return;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* The global pool was full, so throw away this batch */
|
||||
+ coroutine_pool_batch_delete(batch);
|
||||
+}
|
||||
+
|
||||
+/* Get the next unused coroutine from the pool or return NULL */
|
||||
+static Coroutine *coroutine_pool_get(void)
|
||||
+{
|
||||
+ Coroutine *co;
|
||||
+
|
||||
+ co = coroutine_pool_get_local();
|
||||
+ if (!co) {
|
||||
+ coroutine_pool_refill_local();
|
||||
+ co = coroutine_pool_get_local();
|
||||
+ }
|
||||
+ return co;
|
||||
+}
|
||||
+
|
||||
+static void coroutine_pool_put(Coroutine *co)
|
||||
+{
|
||||
+ CoroutinePool *local_pool = get_ptr_local_pool();
|
||||
+ CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool);
|
||||
+
|
||||
+ if (unlikely(!batch)) {
|
||||
+ batch = coroutine_pool_batch_new();
|
||||
+ QSLIST_INSERT_HEAD(local_pool, batch, next);
|
||||
+ local_pool_cleanup_init_once();
|
||||
+ }
|
||||
+
|
||||
+ if (unlikely(batch->size >= COROUTINE_POOL_BATCH_MAX_SIZE)) {
|
||||
+ CoroutinePoolBatch *next = QSLIST_NEXT(batch, next);
|
||||
+
|
||||
+ /* Is the local pool full? */
|
||||
+ if (next) {
|
||||
+ QSLIST_REMOVE_HEAD(local_pool, next);
|
||||
+ coroutine_pool_put_global(batch);
|
||||
+ }
|
||||
+
|
||||
+ batch = coroutine_pool_batch_new();
|
||||
+ QSLIST_INSERT_HEAD(local_pool, batch, next);
|
||||
+ }
|
||||
+
|
||||
+ QSLIST_INSERT_HEAD(&batch->list, co, pool_next);
|
||||
+ batch->size++;
|
||||
}
|
||||
|
||||
Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
|
||||
@@ -58,31 +219,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
|
||||
Coroutine *co = NULL;
|
||||
|
||||
if (IS_ENABLED(CONFIG_COROUTINE_POOL)) {
|
||||
- CoroutineQSList *alloc_pool = get_ptr_alloc_pool();
|
||||
-
|
||||
- co = QSLIST_FIRST(alloc_pool);
|
||||
- if (!co) {
|
||||
- if (release_pool_size > POOL_MIN_BATCH_SIZE) {
|
||||
- /* Slow path; a good place to register the destructor, too. */
|
||||
- Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier();
|
||||
- if (!notifier->notify) {
|
||||
- notifier->notify = coroutine_pool_cleanup;
|
||||
- qemu_thread_atexit_add(notifier);
|
||||
- }
|
||||
-
|
||||
- /* This is not exact; there could be a little skew between
|
||||
- * release_pool_size and the actual size of release_pool. But
|
||||
- * it is just a heuristic, it does not need to be perfect.
|
||||
- */
|
||||
- set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0));
|
||||
- QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool);
|
||||
- co = QSLIST_FIRST(alloc_pool);
|
||||
- }
|
||||
- }
|
||||
- if (co) {
|
||||
- QSLIST_REMOVE_HEAD(alloc_pool, pool_next);
|
||||
- set_alloc_pool_size(get_alloc_pool_size() - 1);
|
||||
- }
|
||||
+ co = coroutine_pool_get();
|
||||
}
|
||||
|
||||
if (!co) {
|
||||
@@ -100,19 +237,10 @@ static void coroutine_delete(Coroutine *co)
|
||||
co->caller = NULL;
|
||||
|
||||
if (IS_ENABLED(CONFIG_COROUTINE_POOL)) {
|
||||
- if (release_pool_size < qatomic_read(&pool_max_size) * 2) {
|
||||
- QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
|
||||
- qatomic_inc(&release_pool_size);
|
||||
- return;
|
||||
- }
|
||||
- if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) {
|
||||
- QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next);
|
||||
- set_alloc_pool_size(get_alloc_pool_size() + 1);
|
||||
- return;
|
||||
- }
|
||||
+ coroutine_pool_put(co);
|
||||
+ } else {
|
||||
+ qemu_coroutine_delete(co);
|
||||
}
|
||||
-
|
||||
- qemu_coroutine_delete(co);
|
||||
}
|
||||
|
||||
void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
|
||||
@@ -223,10 +351,46 @@ AioContext *qemu_coroutine_get_aio_context(Coroutine *co)
|
||||
|
||||
void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size)
|
||||
{
|
||||
- qatomic_add(&pool_max_size, additional_pool_size);
|
||||
+ QEMU_LOCK_GUARD(&global_pool_lock);
|
||||
+ global_pool_max_size += additional_pool_size;
|
||||
}
|
||||
|
||||
void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size)
|
||||
{
|
||||
- qatomic_sub(&pool_max_size, removing_pool_size);
|
||||
+ QEMU_LOCK_GUARD(&global_pool_lock);
|
||||
+ global_pool_max_size -= removing_pool_size;
|
||||
+}
|
||||
+
|
||||
+static unsigned int get_global_pool_hard_max_size(void)
|
||||
+{
|
||||
+#ifdef __linux__
|
||||
+ g_autofree char *contents = NULL;
|
||||
+ int max_map_count;
|
||||
+
|
||||
+ /*
|
||||
+ * Linux processes can have up to max_map_count virtual memory areas
|
||||
+ * (VMAs). mmap(2), mprotect(2), etc fail with ENOMEM beyond this limit. We
|
||||
+ * must limit the coroutine pool to a safe size to avoid running out of
|
||||
+ * VMAs.
|
||||
+ */
|
||||
+ if (g_file_get_contents("/proc/sys/vm/max_map_count", &contents, NULL,
|
||||
+ NULL) &&
|
||||
+ qemu_strtoi(contents, NULL, 10, &max_map_count) == 0) {
|
||||
+ /*
|
||||
+ * This is a conservative upper bound that avoids exceeding
|
||||
+ * max_map_count. Leave half for non-coroutine users like library
|
||||
+ * dependencies, vhost-user, etc. Each coroutine takes up 2 VMAs so
|
||||
+ * halve the amount again.
|
||||
+ */
|
||||
+ return max_map_count / 4;
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
+ return UINT_MAX;
|
||||
+}
|
||||
+
|
||||
+static void __attribute__((constructor)) qemu_coroutine_init(void)
|
||||
+{
|
||||
+ qemu_mutex_init(&global_pool_lock);
|
||||
+ global_pool_hard_max_size = get_global_pool_hard_max_size();
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,61 @@
|
||||
From 0aa65dc3acba481f7064df936ab49e3bceb1d5bd Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Wed, 20 Mar 2024 14:12:32 -0400
|
||||
Subject: [PATCH 2/2] coroutine: reserve 5,000 mappings
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 234: coroutine: cap per-thread local pool size
|
||||
RH-Jira: RHEL-28947
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Commit: [2/2] 78560c2b947471111cc16c313d6f38db42860a1c (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
Daniel P. Berrangé <berrange@redhat.com> pointed out that the coroutine
|
||||
pool size heuristic is very conservative. Instead of halving
|
||||
max_map_count, he suggested reserving 5,000 mappings for non-coroutine
|
||||
users based on observations of guests he has access to.
|
||||
|
||||
Fixes: 86a637e48104 ("coroutine: cap per-thread local pool size")
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Message-id: 20240320181232.1464819-1-stefanha@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 9352f80cd926fe2dde7c89b93ee33bb0356ff40e)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
util/qemu-coroutine.c | 15 ++++++++++-----
|
||||
1 file changed, 10 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
|
||||
index 2790959eaf..eb4eebefdf 100644
|
||||
--- a/util/qemu-coroutine.c
|
||||
+++ b/util/qemu-coroutine.c
|
||||
@@ -377,12 +377,17 @@ static unsigned int get_global_pool_hard_max_size(void)
|
||||
NULL) &&
|
||||
qemu_strtoi(contents, NULL, 10, &max_map_count) == 0) {
|
||||
/*
|
||||
- * This is a conservative upper bound that avoids exceeding
|
||||
- * max_map_count. Leave half for non-coroutine users like library
|
||||
- * dependencies, vhost-user, etc. Each coroutine takes up 2 VMAs so
|
||||
- * halve the amount again.
|
||||
+ * This is an upper bound that avoids exceeding max_map_count. Leave a
|
||||
+ * fixed amount for non-coroutine users like library dependencies,
|
||||
+ * vhost-user, etc. Each coroutine takes up 2 VMAs so halve the
|
||||
+ * remaining amount.
|
||||
*/
|
||||
- return max_map_count / 4;
|
||||
+ if (max_map_count > 5000) {
|
||||
+ return (max_map_count - 5000) / 2;
|
||||
+ } else {
|
||||
+ /* Disable the global pool but threads still have local pools */
|
||||
+ return 0;
|
||||
+ }
|
||||
}
|
||||
#endif
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,75 @@
|
||||
From ac9dc8ea241ef6d3a0447d696620d4d4053b71bf Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Mon, 4 Dec 2023 11:42:59 -0500
|
||||
Subject: [PATCH 080/101] dma-helpers: don't lock AioContext in dma_blk_cb()
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [11/26] a8580463ba6aee4ca248c0b947b9e72bd9e87aab (kmwolf/centos-qemu-kvm)
|
||||
|
||||
Commit abfcd2760b3e ("dma-helpers: prevent dma_blk_cb() vs
|
||||
dma_aio_cancel() race") acquired the AioContext lock inside dma_blk_cb()
|
||||
to avoid a race with scsi_device_purge_requests() running in the main
|
||||
loop thread.
|
||||
|
||||
The SCSI code no longer calls dma_aio_cancel() from the main loop thread
|
||||
while I/O is running in the IOThread AioContext. Therefore it is no
|
||||
longer necessary to take this lock to protect DMAAIOCB fields. The
|
||||
->cb() function also does not require the lock because blk_aio_*() and
|
||||
friends do not need the AioContext lock.
|
||||
|
||||
Both hw/ide/core.c and hw/ide/macio.c also call dma_blk_io() but don't
|
||||
rely on it taking the AioContext lock, so this change is safe.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-ID: <20231204164259.1515217-5-stefanha@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
system/dma-helpers.c | 7 ++-----
|
||||
1 file changed, 2 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/system/dma-helpers.c b/system/dma-helpers.c
|
||||
index 36211acc7e..528117f256 100644
|
||||
--- a/system/dma-helpers.c
|
||||
+++ b/system/dma-helpers.c
|
||||
@@ -119,13 +119,12 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||
|
||||
trace_dma_blk_cb(dbs, ret);
|
||||
|
||||
- aio_context_acquire(ctx);
|
||||
dbs->acb = NULL;
|
||||
dbs->offset += dbs->iov.size;
|
||||
|
||||
if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
|
||||
dma_complete(dbs, ret);
|
||||
- goto out;
|
||||
+ return;
|
||||
}
|
||||
dma_blk_unmap(dbs);
|
||||
|
||||
@@ -168,7 +167,7 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||
trace_dma_map_wait(dbs);
|
||||
dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
|
||||
cpu_register_map_client(dbs->bh);
|
||||
- goto out;
|
||||
+ return;
|
||||
}
|
||||
|
||||
if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
|
||||
@@ -179,8 +178,6 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
|
||||
dma_blk_cb, dbs, dbs->io_func_opaque);
|
||||
assert(dbs->acb);
|
||||
-out:
|
||||
- aio_context_release(ctx);
|
||||
}
|
||||
|
||||
static void dma_aio_cancel(BlockAIOCB *acb)
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,228 @@
|
||||
From 71aa0219f7c84cbf175eb2a091d48d5fd5daa40b Mon Sep 17 00:00:00 2001
|
||||
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Date: Tue, 21 Nov 2023 16:44:26 +0800
|
||||
Subject: [PATCH 047/101] docs/devel: Add VFIO iommufd backend documentation
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [46/67] 6cf49d00e87788f894d690a985bb6798eae24505 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Suggested-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit 98dad2b01931f6064c6c4b48ca3c2a1d9f542cd8)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
MAINTAINERS | 1 +
|
||||
docs/devel/index-internals.rst | 1 +
|
||||
docs/devel/vfio-iommufd.rst | 166 +++++++++++++++++++++++++++++++++
|
||||
3 files changed, 168 insertions(+)
|
||||
create mode 100644 docs/devel/vfio-iommufd.rst
|
||||
|
||||
diff --git a/MAINTAINERS b/MAINTAINERS
|
||||
index ca70bb4e64..0ddb20a35f 100644
|
||||
--- a/MAINTAINERS
|
||||
+++ b/MAINTAINERS
|
||||
@@ -2176,6 +2176,7 @@ F: backends/iommufd.c
|
||||
F: include/sysemu/iommufd.h
|
||||
F: include/qemu/chardev_open.h
|
||||
F: util/chardev_open.c
|
||||
+F: docs/devel/vfio-iommufd.rst
|
||||
|
||||
vhost
|
||||
M: Michael S. Tsirkin <mst@redhat.com>
|
||||
diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst
|
||||
index 6f81df92bc..3def4a138b 100644
|
||||
--- a/docs/devel/index-internals.rst
|
||||
+++ b/docs/devel/index-internals.rst
|
||||
@@ -18,5 +18,6 @@ Details about QEMU's various subsystems including how to add features to them.
|
||||
s390-dasd-ipl
|
||||
tracing
|
||||
vfio-migration
|
||||
+ vfio-iommufd
|
||||
writing-monitor-commands
|
||||
virtio-backends
|
||||
diff --git a/docs/devel/vfio-iommufd.rst b/docs/devel/vfio-iommufd.rst
|
||||
new file mode 100644
|
||||
index 0000000000..3d1c11f175
|
||||
--- /dev/null
|
||||
+++ b/docs/devel/vfio-iommufd.rst
|
||||
@@ -0,0 +1,166 @@
|
||||
+===============================
|
||||
+IOMMUFD BACKEND usage with VFIO
|
||||
+===============================
|
||||
+
|
||||
+(Same meaning for backend/container/BE)
|
||||
+
|
||||
+With the introduction of iommufd, the Linux kernel provides a generic
|
||||
+interface for user space drivers to propagate their DMA mappings to kernel
|
||||
+for assigned devices. While the legacy kernel interface is group-centric,
|
||||
+the new iommufd interface is device-centric, relying on device fd and iommufd.
|
||||
+
|
||||
+To support both interfaces in the QEMU VFIO device, introduce a base container
|
||||
+to abstract the common part of VFIO legacy and iommufd container. So that the
|
||||
+generic VFIO code can use either container.
|
||||
+
|
||||
+The base container implements generic functions such as memory_listener and
|
||||
+address space management whereas the derived container implements callbacks
|
||||
+specific to either legacy or iommufd. Each container has its own way to setup
|
||||
+secure context and dma management interface. The below diagram shows how it
|
||||
+looks like with both containers.
|
||||
+
|
||||
+::
|
||||
+
|
||||
+ VFIO AddressSpace/Memory
|
||||
+ +-------+ +----------+ +-----+ +-----+
|
||||
+ | pci | | platform | | ap | | ccw |
|
||||
+ +---+---+ +----+-----+ +--+--+ +--+--+ +----------------------+
|
||||
+ | | | | | AddressSpace |
|
||||
+ | | | | +------------+---------+
|
||||
+ +---V-----------V-----------V--------V----+ /
|
||||
+ | VFIOAddressSpace | <------------+
|
||||
+ | | | MemoryListener
|
||||
+ | VFIOContainerBase list |
|
||||
+ +-------+----------------------------+----+
|
||||
+ | |
|
||||
+ | |
|
||||
+ +-------V------+ +--------V----------+
|
||||
+ | iommufd | | vfio legacy |
|
||||
+ | container | | container |
|
||||
+ +-------+------+ +--------+----------+
|
||||
+ | |
|
||||
+ | /dev/iommu | /dev/vfio/vfio
|
||||
+ | /dev/vfio/devices/vfioX | /dev/vfio/$group_id
|
||||
+ Userspace | |
|
||||
+ ============+============================+===========================
|
||||
+ Kernel | device fd |
|
||||
+ +---------------+ | group/container fd
|
||||
+ | (BIND_IOMMUFD | | (SET_CONTAINER/SET_IOMMU)
|
||||
+ | ATTACH_IOAS) | | device fd
|
||||
+ | | |
|
||||
+ | +-------V------------V-----------------+
|
||||
+ iommufd | | vfio |
|
||||
+ (map/unmap | +---------+--------------------+-------+
|
||||
+ ioas_copy) | | | map/unmap
|
||||
+ | | |
|
||||
+ +------V------+ +-----V------+ +------V--------+
|
||||
+ | iommfd core | | device | | vfio iommu |
|
||||
+ +-------------+ +------------+ +---------------+
|
||||
+
|
||||
+* Secure Context setup
|
||||
+
|
||||
+ - iommufd BE: uses device fd and iommufd to setup secure context
|
||||
+ (bind_iommufd, attach_ioas)
|
||||
+ - vfio legacy BE: uses group fd and container fd to setup secure context
|
||||
+ (set_container, set_iommu)
|
||||
+
|
||||
+* Device access
|
||||
+
|
||||
+ - iommufd BE: device fd is opened through ``/dev/vfio/devices/vfioX``
|
||||
+ - vfio legacy BE: device fd is retrieved from group fd ioctl
|
||||
+
|
||||
+* DMA Mapping flow
|
||||
+
|
||||
+ 1. VFIOAddressSpace receives MemoryRegion add/del via MemoryListener
|
||||
+ 2. VFIO populates DMA map/unmap via the container BEs
|
||||
+ * iommufd BE: uses iommufd
|
||||
+ * vfio legacy BE: uses container fd
|
||||
+
|
||||
+Example configuration
|
||||
+=====================
|
||||
+
|
||||
+Step 1: configure the host device
|
||||
+---------------------------------
|
||||
+
|
||||
+It's exactly same as the VFIO device with legacy VFIO container.
|
||||
+
|
||||
+Step 2: configure QEMU
|
||||
+----------------------
|
||||
+
|
||||
+Interactions with the ``/dev/iommu`` are abstracted by a new iommufd
|
||||
+object (compiled in with the ``CONFIG_IOMMUFD`` option).
|
||||
+
|
||||
+Any QEMU device (e.g. VFIO device) wishing to use ``/dev/iommu`` must
|
||||
+be linked with an iommufd object. It gets a new optional property
|
||||
+named iommufd which allows to pass an iommufd object. Take ``vfio-pci``
|
||||
+device for example:
|
||||
+
|
||||
+.. code-block:: bash
|
||||
+
|
||||
+ -object iommufd,id=iommufd0
|
||||
+ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0
|
||||
+
|
||||
+Note the ``/dev/iommu`` and VFIO cdev can be externally opened by a
|
||||
+management layer. In such a case the fd is passed, the fd supports a
|
||||
+string naming the fd or a number, for example:
|
||||
+
|
||||
+.. code-block:: bash
|
||||
+
|
||||
+ -object iommufd,id=iommufd0,fd=22
|
||||
+ -device vfio-pci,iommufd=iommufd0,fd=23
|
||||
+
|
||||
+If the ``fd`` property is not passed, the fd is opened by QEMU.
|
||||
+
|
||||
+If no ``iommufd`` object is passed to the ``vfio-pci`` device, iommufd
|
||||
+is not used and the user gets the behavior based on the legacy VFIO
|
||||
+container:
|
||||
+
|
||||
+.. code-block:: bash
|
||||
+
|
||||
+ -device vfio-pci,host=0000:02:00.0
|
||||
+
|
||||
+Supported platform
|
||||
+==================
|
||||
+
|
||||
+Supports x86, ARM and s390x currently.
|
||||
+
|
||||
+Caveats
|
||||
+=======
|
||||
+
|
||||
+Dirty page sync
|
||||
+---------------
|
||||
+
|
||||
+Dirty page sync with iommufd backend is unsupported yet, live migration is
|
||||
+disabled by default. But it can be force enabled like below, low efficient
|
||||
+though.
|
||||
+
|
||||
+.. code-block:: bash
|
||||
+
|
||||
+ -object iommufd,id=iommufd0
|
||||
+ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0,enable-migration=on
|
||||
+
|
||||
+P2P DMA
|
||||
+-------
|
||||
+
|
||||
+PCI p2p DMA is unsupported as IOMMUFD doesn't support mapping hardware PCI
|
||||
+BAR region yet. Below warning shows for assigned PCI device, it's not a bug.
|
||||
+
|
||||
+.. code-block:: none
|
||||
+
|
||||
+ qemu-system-x86_64: warning: IOMMU_IOAS_MAP failed: Bad address, PCI BAR?
|
||||
+ qemu-system-x86_64: vfio_container_dma_map(0x560cb6cb1620, 0xe000000021000, 0x3000, 0x7f32ed55c000) = -14 (Bad address)
|
||||
+
|
||||
+FD passing with mdev
|
||||
+--------------------
|
||||
+
|
||||
+``vfio-pci`` device checks sysfsdev property to decide if backend is a mdev.
|
||||
+If FD passing is used, there is no way to know that and the mdev is treated
|
||||
+like a real PCI device. There is an error as below if user wants to enable
|
||||
+RAM discarding for mdev.
|
||||
+
|
||||
+.. code-block:: none
|
||||
+
|
||||
+ qemu-system-x86_64: -device vfio-pci,iommufd=iommufd0,x-balloon-allowed=on,fd=9: vfio VFIO_FD9: x-balloon-allowed only potentially compatible with mdev devices
|
||||
+
|
||||
+``vfio-ap`` and ``vfio-ccw`` devices don't have same issue as their backend
|
||||
+devices are always mdev and RAM discarding is force enabled.
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,98 @@
|
||||
From fc69df3a70bed5722643cc16828ca20beae3a20d Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 5 Dec 2023 13:20:08 -0500
|
||||
Subject: [PATCH 091/101] docs: remove AioContext lock from IOThread docs
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [22/26] ab89cda483e74ded983d26e1c6e50217405e0a55 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
Encourage the use of locking primitives and stop mentioning the
|
||||
AioContext lock since it is being removed.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20231205182011.1976568-12-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
docs/devel/multiple-iothreads.txt | 47 +++++++++++--------------------
|
||||
1 file changed, 16 insertions(+), 31 deletions(-)
|
||||
|
||||
diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt
|
||||
index a3e949f6b3..4865196bde 100644
|
||||
--- a/docs/devel/multiple-iothreads.txt
|
||||
+++ b/docs/devel/multiple-iothreads.txt
|
||||
@@ -88,27 +88,18 @@ loop, depending on which AioContext instance the caller passes in.
|
||||
|
||||
How to synchronize with an IOThread
|
||||
-----------------------------------
|
||||
-AioContext is not thread-safe so some rules must be followed when using file
|
||||
-descriptors, event notifiers, timers, or BHs across threads:
|
||||
+Variables that can be accessed by multiple threads require some form of
|
||||
+synchronization such as qemu_mutex_lock(), rcu_read_lock(), etc.
|
||||
|
||||
-1. AioContext functions can always be called safely. They handle their
|
||||
-own locking internally.
|
||||
-
|
||||
-2. Other threads wishing to access the AioContext must use
|
||||
-aio_context_acquire()/aio_context_release() for mutual exclusion. Once the
|
||||
-context is acquired no other thread can access it or run event loop iterations
|
||||
-in this AioContext.
|
||||
-
|
||||
-Legacy code sometimes nests aio_context_acquire()/aio_context_release() calls.
|
||||
-Do not use nesting anymore, it is incompatible with the BDRV_POLL_WHILE() macro
|
||||
-used in the block layer and can lead to hangs.
|
||||
-
|
||||
-There is currently no lock ordering rule if a thread needs to acquire multiple
|
||||
-AioContexts simultaneously. Therefore, it is only safe for code holding the
|
||||
-QEMU global mutex to acquire other AioContexts.
|
||||
+AioContext functions like aio_set_fd_handler(), aio_set_event_notifier(),
|
||||
+aio_bh_new(), and aio_timer_new() are thread-safe. They can be used to trigger
|
||||
+activity in an IOThread.
|
||||
|
||||
Side note: the best way to schedule a function call across threads is to call
|
||||
-aio_bh_schedule_oneshot(). No acquire/release or locking is needed.
|
||||
+aio_bh_schedule_oneshot().
|
||||
+
|
||||
+The main loop thread can wait synchronously for a condition using
|
||||
+AIO_WAIT_WHILE().
|
||||
|
||||
AioContext and the block layer
|
||||
------------------------------
|
||||
@@ -124,22 +115,16 @@ Block layer code must therefore expect to run in an IOThread and avoid using
|
||||
old APIs that implicitly use the main loop. See the "How to program for
|
||||
IOThreads" above for information on how to do that.
|
||||
|
||||
-If main loop code such as a QMP function wishes to access a BlockDriverState
|
||||
-it must first call aio_context_acquire(bdrv_get_aio_context(bs)) to ensure
|
||||
-that callbacks in the IOThread do not run in parallel.
|
||||
-
|
||||
Code running in the monitor typically needs to ensure that past
|
||||
requests from the guest are completed. When a block device is running
|
||||
in an IOThread, the IOThread can also process requests from the guest
|
||||
(via ioeventfd). To achieve both objects, wrap the code between
|
||||
bdrv_drained_begin() and bdrv_drained_end(), thus creating a "drained
|
||||
-section". The functions must be called between aio_context_acquire()
|
||||
-and aio_context_release(). You can freely release and re-acquire the
|
||||
-AioContext within a drained section.
|
||||
-
|
||||
-Long-running jobs (usually in the form of coroutines) are best scheduled in
|
||||
-the BlockDriverState's AioContext to avoid the need to acquire/release around
|
||||
-each bdrv_*() call. The functions bdrv_add/remove_aio_context_notifier,
|
||||
-or alternatively blk_add/remove_aio_context_notifier if you use BlockBackends,
|
||||
-can be used to get a notification whenever bdrv_try_change_aio_context() moves a
|
||||
+section".
|
||||
+
|
||||
+Long-running jobs (usually in the form of coroutines) are often scheduled in
|
||||
+the BlockDriverState's AioContext. The functions
|
||||
+bdrv_add/remove_aio_context_notifier, or alternatively
|
||||
+blk_add/remove_aio_context_notifier if you use BlockBackends, can be used to
|
||||
+get a notification whenever bdrv_try_change_aio_context() moves a
|
||||
BlockDriverState to a different AioContext.
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,69 +0,0 @@
|
||||
From d032e43c4cebdbeb279d2da9b514fa50c6ed4da3 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Tue, 21 Nov 2023 16:36:26 +0100
|
||||
Subject: [PATCH 2/3] dump: Add arch cleanup function
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Thomas Huth <thuth@redhat.com>
|
||||
RH-MergeRequest: 325: Fix problem that secure execution guest remains in "paused" state after dump failure
|
||||
RH-Jira: RHEL-16997
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/3] d70fcc72a69f65432f2fbfb7d864452ea37ec25d
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-16997
|
||||
|
||||
commit e72629e5149aba6f44122ea6d2a803ef136a0c6b
|
||||
Author: Janosch Frank <frankja@linux.ibm.com>
|
||||
Date: Thu Nov 9 12:04:42 2023 +0000
|
||||
|
||||
dump: Add arch cleanup function
|
||||
|
||||
Some architectures (s390x) need to cleanup after a failed dump to be
|
||||
able to continue to run the vm. Add a cleanup function pointer and
|
||||
call it if it's set.
|
||||
|
||||
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
Message-ID: <20231109120443.185979-3-frankja@linux.ibm.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
dump/dump.c | 4 ++++
|
||||
include/sysemu/dump-arch.h | 1 +
|
||||
2 files changed, 5 insertions(+)
|
||||
|
||||
diff --git a/dump/dump.c b/dump/dump.c
|
||||
index 1f1a6edcab..6a50e85f49 100644
|
||||
--- a/dump/dump.c
|
||||
+++ b/dump/dump.c
|
||||
@@ -96,6 +96,10 @@ uint64_t cpu_to_dump64(DumpState *s, uint64_t val)
|
||||
|
||||
static int dump_cleanup(DumpState *s)
|
||||
{
|
||||
+ if (s->dump_info.arch_cleanup_fn) {
|
||||
+ s->dump_info.arch_cleanup_fn(s);
|
||||
+ }
|
||||
+
|
||||
guest_phys_blocks_free(&s->guest_phys_blocks);
|
||||
memory_mapping_list_free(&s->list);
|
||||
close(s->fd);
|
||||
diff --git a/include/sysemu/dump-arch.h b/include/sysemu/dump-arch.h
|
||||
index 59bbc9be38..743916e46c 100644
|
||||
--- a/include/sysemu/dump-arch.h
|
||||
+++ b/include/sysemu/dump-arch.h
|
||||
@@ -24,6 +24,7 @@ typedef struct ArchDumpInfo {
|
||||
void (*arch_sections_add_fn)(DumpState *s);
|
||||
uint64_t (*arch_sections_write_hdr_fn)(DumpState *s, uint8_t *buff);
|
||||
int (*arch_sections_write_fn)(DumpState *s, uint8_t *buff);
|
||||
+ void (*arch_cleanup_fn)(DumpState *s);
|
||||
} ArchDumpInfo;
|
||||
|
||||
struct GuestPhysBlockList; /* memory_mapping.h */
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,153 +0,0 @@
|
||||
From 516bf44de08a13d97c08e210137078e642ce8e88 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Wed, 17 May 2023 17:28:32 +0200
|
||||
Subject: [PATCH 02/21] graph-lock: Disable locking for now
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 166: block/graph-lock: Disable locking for now
|
||||
RH-Bugzilla: 2186725
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [2/4] 39d42fb527aad0491a018743289de7b762108317 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
In QEMU 8.0, we've been seeing deadlocks in bdrv_graph_wrlock(). They
|
||||
come from callers that hold an AioContext lock, which is not allowed
|
||||
during polling. In theory, we could temporarily release the lock, but
|
||||
callers are inconsistent about whether they hold a lock, and if they do,
|
||||
some are also confused about which one they hold. While all of this is
|
||||
fixable, it's not trivial, and the best course of action for 8.0.1 is
|
||||
probably just disabling the graph locking code temporarily.
|
||||
|
||||
We don't currently rely on graph locking yet. It is supposed to replace
|
||||
the AioContext lock eventually to enable multiqueue support, but as long
|
||||
as we still have the AioContext lock, it is sufficient without the graph
|
||||
lock. Once the AioContext lock goes away, the deadlock doesn't exist any
|
||||
more either and this commit can be reverted. (Of course, it can also be
|
||||
reverted while the AioContext lock still exists if the callers have been
|
||||
fixed.)
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20230517152834.277483-2-kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 80fc5d260002432628710f8b0c7cfc7d9b97bb9d)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block/graph-lock.c | 24 ++++++++++++++++++++++++
|
||||
1 file changed, 24 insertions(+)
|
||||
|
||||
diff --git a/block/graph-lock.c b/block/graph-lock.c
|
||||
index 259a7a0bde..2490926c90 100644
|
||||
--- a/block/graph-lock.c
|
||||
+++ b/block/graph-lock.c
|
||||
@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock;
|
||||
/* Protects the list of aiocontext and orphaned_reader_count */
|
||||
static QemuMutex aio_context_list_lock;
|
||||
|
||||
+#if 0
|
||||
/* Written and read with atomic operations. */
|
||||
static int has_writer;
|
||||
+#endif
|
||||
|
||||
/*
|
||||
* A reader coroutine could move from an AioContext to another.
|
||||
@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx)
|
||||
g_free(ctx->bdrv_graph);
|
||||
}
|
||||
|
||||
+#if 0
|
||||
static uint32_t reader_count(void)
|
||||
{
|
||||
BdrvGraphRWlock *brdv_graph;
|
||||
@@ -105,10 +108,17 @@ static uint32_t reader_count(void)
|
||||
assert((int32_t)rd >= 0);
|
||||
return rd;
|
||||
}
|
||||
+#endif
|
||||
|
||||
void bdrv_graph_wrlock(void)
|
||||
{
|
||||
GLOBAL_STATE_CODE();
|
||||
+ /*
|
||||
+ * TODO Some callers hold an AioContext lock when this is called, which
|
||||
+ * causes deadlocks. Reenable once the AioContext locking is cleaned up (or
|
||||
+ * AioContext locks are gone).
|
||||
+ */
|
||||
+#if 0
|
||||
assert(!qatomic_read(&has_writer));
|
||||
|
||||
/* Make sure that constantly arriving new I/O doesn't cause starvation */
|
||||
@@ -139,11 +149,13 @@ void bdrv_graph_wrlock(void)
|
||||
} while (reader_count() >= 1);
|
||||
|
||||
bdrv_drain_all_end();
|
||||
+#endif
|
||||
}
|
||||
|
||||
void bdrv_graph_wrunlock(void)
|
||||
{
|
||||
GLOBAL_STATE_CODE();
|
||||
+#if 0
|
||||
QEMU_LOCK_GUARD(&aio_context_list_lock);
|
||||
assert(qatomic_read(&has_writer));
|
||||
|
||||
@@ -155,10 +167,13 @@ void bdrv_graph_wrunlock(void)
|
||||
|
||||
/* Wake up all coroutine that are waiting to read the graph */
|
||||
qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
void coroutine_fn bdrv_graph_co_rdlock(void)
|
||||
{
|
||||
+ /* TODO Reenable when wrlock is reenabled */
|
||||
+#if 0
|
||||
BdrvGraphRWlock *bdrv_graph;
|
||||
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
|
||||
|
||||
@@ -223,10 +238,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void)
|
||||
qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
|
||||
}
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
void coroutine_fn bdrv_graph_co_rdunlock(void)
|
||||
{
|
||||
+#if 0
|
||||
BdrvGraphRWlock *bdrv_graph;
|
||||
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
|
||||
|
||||
@@ -249,6 +266,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void)
|
||||
if (qatomic_read(&has_writer)) {
|
||||
aio_wait_kick();
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
void bdrv_graph_rdlock_main_loop(void)
|
||||
@@ -266,13 +284,19 @@ void bdrv_graph_rdunlock_main_loop(void)
|
||||
void assert_bdrv_graph_readable(void)
|
||||
{
|
||||
/* reader_count() is slow due to aio_context_list_lock lock contention */
|
||||
+ /* TODO Reenable when wrlock is reenabled */
|
||||
+#if 0
|
||||
#ifdef CONFIG_DEBUG_GRAPH_LOCK
|
||||
assert(qemu_in_main_thread() || reader_count());
|
||||
#endif
|
||||
+#endif
|
||||
}
|
||||
|
||||
void assert_bdrv_graph_writable(void)
|
||||
{
|
||||
assert(qemu_in_main_thread());
|
||||
+ /* TODO Reenable when wrlock is reenabled */
|
||||
+#if 0
|
||||
assert(qatomic_read(&has_writer));
|
||||
+#endif
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,94 @@
|
||||
From a5b4eec5f456b1ca3fe753e1d76f96cf3f8914ef Mon Sep 17 00:00:00 2001
|
||||
From: David Hildenbrand <david@redhat.com>
|
||||
Date: Wed, 17 Jan 2024 14:55:53 +0100
|
||||
Subject: [PATCH 01/22] hv-balloon: use get_min_alignment() to express 32 GiB
|
||||
alignment
|
||||
|
||||
RH-Author: David Hildenbrand <david@redhat.com>
|
||||
RH-MergeRequest: 221: memory-device: reintroduce memory region size check
|
||||
RH-Jira: RHEL-20341
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-Commit: [1/2] cbe092fe549552928270892253b31cd8fe199825
|
||||
|
||||
https://issues.redhat.com/browse/RHEL-20341
|
||||
|
||||
Let's implement the get_min_alignment() callback for memory devices, and
|
||||
copy for the device memory region the alignment of the host memory
|
||||
region. This mimics what virtio-mem does, and allows for re-introducing
|
||||
proper alignment checks for the memory region size (where we don't care
|
||||
about additional device requirements) in memory device core.
|
||||
|
||||
Message-ID: <20240117135554.787344-2-david@redhat.com>
|
||||
Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
|
||||
Signed-off-by: David Hildenbrand <david@redhat.com>
|
||||
(cherry picked from commit f77c5f38f49c71bc14cf1019ac92b0b95f572414)
|
||||
Signed-off-by: David Hildenbrand <david@redhat.com>
|
||||
---
|
||||
hw/hyperv/hv-balloon.c | 37 +++++++++++++++++++++----------------
|
||||
1 file changed, 21 insertions(+), 16 deletions(-)
|
||||
|
||||
diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c
|
||||
index 66f297c1d7..0829c495b0 100644
|
||||
--- a/hw/hyperv/hv-balloon.c
|
||||
+++ b/hw/hyperv/hv-balloon.c
|
||||
@@ -1476,22 +1476,7 @@ static void hv_balloon_ensure_mr(HvBalloon *balloon)
|
||||
balloon->mr = g_new0(MemoryRegion, 1);
|
||||
memory_region_init(balloon->mr, OBJECT(balloon), TYPE_HV_BALLOON,
|
||||
memory_region_size(hostmem_mr));
|
||||
-
|
||||
- /*
|
||||
- * The VM can indicate an alignment up to 32 GiB. Memory device core can
|
||||
- * usually only handle/guarantee 1 GiB alignment. The user will have to
|
||||
- * specify a larger maxmem eventually.
|
||||
- *
|
||||
- * The memory device core will warn the user in case maxmem might have to be
|
||||
- * increased and will fail plugging the device if there is not sufficient
|
||||
- * space after alignment.
|
||||
- *
|
||||
- * TODO: we could do the alignment ourselves in a slightly bigger region.
|
||||
- * But this feels better, although the warning might be annoying. Maybe
|
||||
- * we can optimize that in the future (e.g., with such a device on the
|
||||
- * cmdline place/size the device memory region differently.
|
||||
- */
|
||||
- balloon->mr->align = MAX(32 * GiB, memory_region_get_alignment(hostmem_mr));
|
||||
+ balloon->mr->align = memory_region_get_alignment(hostmem_mr);
|
||||
}
|
||||
|
||||
static void hv_balloon_free_mr(HvBalloon *balloon)
|
||||
@@ -1653,6 +1638,25 @@ static MemoryRegion *hv_balloon_md_get_memory_region(MemoryDeviceState *md,
|
||||
return balloon->mr;
|
||||
}
|
||||
|
||||
+static uint64_t hv_balloon_md_get_min_alignment(const MemoryDeviceState *md)
|
||||
+{
|
||||
+ /*
|
||||
+ * The VM can indicate an alignment up to 32 GiB. Memory device core can
|
||||
+ * usually only handle/guarantee 1 GiB alignment. The user will have to
|
||||
+ * specify a larger maxmem eventually.
|
||||
+ *
|
||||
+ * The memory device core will warn the user in case maxmem might have to be
|
||||
+ * increased and will fail plugging the device if there is not sufficient
|
||||
+ * space after alignment.
|
||||
+ *
|
||||
+ * TODO: we could do the alignment ourselves in a slightly bigger region.
|
||||
+ * But this feels better, although the warning might be annoying. Maybe
|
||||
+ * we can optimize that in the future (e.g., with such a device on the
|
||||
+ * cmdline place/size the device memory region differently.
|
||||
+ */
|
||||
+ return 32 * GiB;
|
||||
+}
|
||||
+
|
||||
static void hv_balloon_md_fill_device_info(const MemoryDeviceState *md,
|
||||
MemoryDeviceInfo *info)
|
||||
{
|
||||
@@ -1765,5 +1769,6 @@ static void hv_balloon_class_init(ObjectClass *klass, void *data)
|
||||
mdc->get_memory_region = hv_balloon_md_get_memory_region;
|
||||
mdc->decide_memslots = hv_balloon_decide_memslots;
|
||||
mdc->get_memslots = hv_balloon_get_memslots;
|
||||
+ mdc->get_min_alignment = hv_balloon_md_get_min_alignment;
|
||||
mdc->fill_device_info = hv_balloon_md_fill_device_info;
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,40 +0,0 @@
|
||||
From b4645e7682aa1bde6f89df0eff2a9de83720eecc Mon Sep 17 00:00:00 2001
|
||||
From: Ani Sinha <anisinha@redhat.com>
|
||||
Date: Tue, 2 May 2023 15:51:53 +0530
|
||||
Subject: [PATCH 3/3] hw/acpi: Mark acpi blobs as resizable on RHEL pc machines
|
||||
version 7.6 and above
|
||||
|
||||
RH-Author: Ani Sinha <None>
|
||||
RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3
|
||||
RH-Bugzilla: 1934134
|
||||
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Commit: [2/2] 95d443af6e75c569d89d04d028012c3c56c0c3a4 (anisinha/centos-qemu-kvm)
|
||||
|
||||
Please look at QEMU upstream commit
|
||||
1af507756bae7 ("hw/acpi: limit warning on acpi table size to pc machines older than version 2.3")
|
||||
This patch adapts the above change so that it applies to RHEL pc machines of
|
||||
version 7.6 and newer. These are the machine types that are currently supported
|
||||
in RHEL. Q35 machines are not affected.
|
||||
|
||||
Signed-off-by: Ani Sinha <anisinha@redhat.com>
|
||||
---
|
||||
hw/i386/pc_piix.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index 4d5880e249..6c7be628e1 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -914,6 +914,7 @@ static void pc_machine_rhel7_options(MachineClass *m)
|
||||
m->default_machine_opts = "firmware=bios-256k.bin,hpet=off";
|
||||
pcmc->default_nic_model = "e1000";
|
||||
pcmc->pci_root_uid = 0;
|
||||
+ pcmc->resizable_acpi_blob = true;
|
||||
m->default_display = "std";
|
||||
m->no_parallel = 1;
|
||||
m->numa_mem_supported = true;
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,101 +0,0 @@
|
||||
From 3f70da88788c398877b8ded0b27689530385302b Mon Sep 17 00:00:00 2001
|
||||
From: Ani Sinha <anisinha@redhat.com>
|
||||
Date: Wed, 29 Mar 2023 10:27:26 +0530
|
||||
Subject: [PATCH 2/3] hw/acpi: limit warning on acpi table size to pc machines
|
||||
older than version 2.3
|
||||
|
||||
RH-Author: Ani Sinha <None>
|
||||
RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3
|
||||
RH-Bugzilla: 1934134
|
||||
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Commit: [1/2] 96c3b6d51e16734eb4e8de52635e0ca036964090 (anisinha/centos-qemu-kvm)
|
||||
|
||||
i440fx machine versions 2.3 and newer supports dynamic ram
|
||||
resizing. See commit a1666142db6233 ("acpi-build: make ROMs RAM blocks resizeable") .
|
||||
Currently supported all q35 machine types (versions 2.4 and newer) supports
|
||||
resizable RAM/ROM blocks.Therefore the warning generated when the ACPI table
|
||||
size exceeds a pre-defined value does not apply to those machine versions.
|
||||
Add a check limiting the warning message to only those machines that does not
|
||||
support expandable ram blocks (that is, i440fx machines with version 2.2
|
||||
and older).
|
||||
|
||||
Signed-off-by: Ani Sinha <anisinha@redhat.com>
|
||||
Message-Id: <20230329045726.14028-1-anisinha@redhat.com>
|
||||
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit 1af507756bae775028c27d30e602e2b9c72cd074)
|
||||
---
|
||||
hw/i386/acpi-build.c | 6 ++++--
|
||||
hw/i386/pc.c | 1 +
|
||||
hw/i386/pc_piix.c | 1 +
|
||||
include/hw/i386/pc.h | 3 +++
|
||||
4 files changed, 9 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
|
||||
index ec857a117e..9bc4d8a981 100644
|
||||
--- a/hw/i386/acpi-build.c
|
||||
+++ b/hw/i386/acpi-build.c
|
||||
@@ -2695,7 +2695,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
|
||||
int legacy_table_size =
|
||||
ROUND_UP(tables_blob->len - aml_len + legacy_aml_len,
|
||||
ACPI_BUILD_ALIGN_SIZE);
|
||||
- if (tables_blob->len > legacy_table_size) {
|
||||
+ if ((tables_blob->len > legacy_table_size) &&
|
||||
+ !pcmc->resizable_acpi_blob) {
|
||||
/* Should happen only with PCI bridges and -M pc-i440fx-2.0. */
|
||||
warn_report("ACPI table size %u exceeds %d bytes,"
|
||||
" migration may not work",
|
||||
@@ -2706,7 +2707,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
|
||||
g_array_set_size(tables_blob, legacy_table_size);
|
||||
} else {
|
||||
/* Make sure we have a buffer in case we need to resize the tables. */
|
||||
- if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) {
|
||||
+ if ((tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) &&
|
||||
+ !pcmc->resizable_acpi_blob) {
|
||||
/* As of QEMU 2.1, this fires with 160 VCPUs and 255 memory slots. */
|
||||
warn_report("ACPI table size %u exceeds %d bytes,"
|
||||
" migration may not work",
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index f216922cee..7db5a2348f 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -2092,6 +2092,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
|
||||
pcmc->acpi_data_size = 0x20000 + 0x8000;
|
||||
pcmc->pvh_enabled = true;
|
||||
pcmc->kvmclock_create_always = true;
|
||||
+ pcmc->resizable_acpi_blob = true;
|
||||
assert(!mc->get_hotplug_handler);
|
||||
mc->async_pf_vmexit_disable = false;
|
||||
mc->get_hotplug_handler = pc_get_hotplug_handler;
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index fc704d783f..4d5880e249 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -750,6 +750,7 @@ static void pc_i440fx_2_2_machine_options(MachineClass *m)
|
||||
compat_props_add(m->compat_props, hw_compat_2_2, hw_compat_2_2_len);
|
||||
compat_props_add(m->compat_props, pc_compat_2_2, pc_compat_2_2_len);
|
||||
pcmc->rsdp_in_ram = false;
|
||||
+ pcmc->resizable_acpi_blob = false;
|
||||
}
|
||||
|
||||
DEFINE_I440FX_MACHINE(v2_2, "pc-i440fx-2.2", pc_compat_2_2_fn,
|
||||
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
|
||||
index d218ad1628..2f514d13d8 100644
|
||||
--- a/include/hw/i386/pc.h
|
||||
+++ b/include/hw/i386/pc.h
|
||||
@@ -130,6 +130,9 @@ struct PCMachineClass {
|
||||
|
||||
/* create kvmclock device even when KVM PV features are not exposed */
|
||||
bool kvmclock_create_always;
|
||||
+
|
||||
+ /* resizable acpi blob compat */
|
||||
+ bool resizable_acpi_blob;
|
||||
};
|
||||
|
||||
#define TYPE_PC_MACHINE "generic-pc-machine"
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,42 @@
|
||||
From ceaee9c4372bbdc4196cb6808515047388f7aa26 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Tue, 21 Nov 2023 16:44:18 +0800
|
||||
Subject: [PATCH 039/101] hw/arm: Activate IOMMUFD for virt machines
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [38/67] 0a059ae661616e95eb8455e17f35774495cae8e7 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||||
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit 0970238343af45a8b547695bfc22f18d4eb7da7e)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/arm/Kconfig | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
|
||||
index 3ada335a24..660f49db49 100644
|
||||
--- a/hw/arm/Kconfig
|
||||
+++ b/hw/arm/Kconfig
|
||||
@@ -8,6 +8,7 @@ config ARM_VIRT
|
||||
imply TPM_TIS_SYSBUS
|
||||
imply TPM_TIS_I2C
|
||||
imply NVDIMM
|
||||
+ imply IOMMUFD
|
||||
select ARM_GIC
|
||||
select ACPI
|
||||
select ARM_SMMUV3
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,60 +0,0 @@
|
||||
From 7b57aec372fc238cbaafe86557f9fb4b560895b1 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Tue, 27 Jun 2023 20:20:09 +1000
|
||||
Subject: [PATCH 2/6] hw/arm: Validate cluster and NUMA node boundary
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines
|
||||
RH-Bugzilla: 2171363
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [2/3] fcac7ea85d9f73613989903c642fc1bf6c51946b
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363
|
||||
|
||||
There are two ARM machines where NUMA is aware: 'virt' and 'sbsa-ref'.
|
||||
Both of them are required to follow cluster-NUMA-node boundary. To
|
||||
enable the validation to warn about the irregular configuration where
|
||||
multiple CPUs in one cluster have been associated with different NUMA
|
||||
nodes.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Message-Id: <20230509002739.18388-3-gshan@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit fecff672351ace5e39adf7dbcf7a8ee748b201cb)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/sbsa-ref.c | 2 ++
|
||||
hw/arm/virt.c | 2 ++
|
||||
2 files changed, 4 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
|
||||
index 0b93558dde..efb380e7c8 100644
|
||||
--- a/hw/arm/sbsa-ref.c
|
||||
+++ b/hw/arm/sbsa-ref.c
|
||||
@@ -864,6 +864,8 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data)
|
||||
mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids;
|
||||
mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props;
|
||||
mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id;
|
||||
+ /* platform instead of architectural choice */
|
||||
+ mc->cpu_cluster_has_numa_boundary = true;
|
||||
}
|
||||
|
||||
static const TypeInfo sbsa_ref_info = {
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 9be53e9355..df6a0231bc 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3083,6 +3083,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
|
||||
mc->smp_props.clusters_supported = true;
|
||||
mc->auto_enable_numa_with_memhp = true;
|
||||
mc->auto_enable_numa_with_memdev = true;
|
||||
+ /* platform instead of architectural choice */
|
||||
+ mc->cpu_cluster_has_numa_boundary = true;
|
||||
mc->default_ram_id = "mach-virt.ram";
|
||||
|
||||
object_class_property_add(oc, "acpi", "OnOffAuto",
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,166 +0,0 @@
|
||||
From a3412036477e8c91e0b71fcd91de4e24a9904077 Mon Sep 17 00:00:00 2001
|
||||
From: Peter Maydell <peter.maydell@linaro.org>
|
||||
Date: Tue, 25 Jul 2023 10:56:51 +0100
|
||||
Subject: [PATCH 09/14] hw/arm/smmu: Handle big-endian hosts correctly
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes
|
||||
RH-Bugzilla: 2229133
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
RH-Commit: [3/3] df9c8d228b25273e0c4927a10b21e66fb4bef5f0 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133
|
||||
|
||||
The implementation of the SMMUv3 has multiple places where it reads a
|
||||
data structure from the guest and directly operates on it without
|
||||
doing a guest-to-host endianness conversion. Since all SMMU data
|
||||
structures are little-endian, this means that the SMMU doesn't work
|
||||
on a big-endian host. In particular, this causes the Avocado test
|
||||
machine_aarch64_virt.py:Aarch64VirtMachine.test_alpine_virt_tcg_gic_max
|
||||
to fail on an s390x host.
|
||||
|
||||
Add appropriate byte-swapping on reads and writes of guest in-memory
|
||||
data structures so that the device works correctly on big-endian
|
||||
hosts.
|
||||
|
||||
As part of this we constrain queue_read() to operate only on Cmd
|
||||
structs and queue_write() on Evt structs, because in practice these
|
||||
are the only data structures the two functions are used with, and we
|
||||
need to know what the data structure is to be able to byte-swap its
|
||||
parts correctly.
|
||||
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Tested-by: Thomas Huth <thuth@redhat.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Message-id: 20230717132641.764660-1-peter.maydell@linaro.org
|
||||
Cc: qemu-stable@nongnu.org
|
||||
(cherry picked from commit c6445544d4cea2628fbad3bad09f3d3a03c749d3)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/arm/smmu-common.c | 3 +--
|
||||
hw/arm/smmuv3.c | 39 +++++++++++++++++++++++++++++++--------
|
||||
2 files changed, 32 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
|
||||
index e7f1c1f219..daa02ce798 100644
|
||||
--- a/hw/arm/smmu-common.c
|
||||
+++ b/hw/arm/smmu-common.c
|
||||
@@ -192,8 +192,7 @@ static int get_pte(dma_addr_t baseaddr, uint32_t index, uint64_t *pte,
|
||||
dma_addr_t addr = baseaddr + index * sizeof(*pte);
|
||||
|
||||
/* TODO: guarantee 64-bit single-copy atomicity */
|
||||
- ret = dma_memory_read(&address_space_memory, addr, pte, sizeof(*pte),
|
||||
- MEMTXATTRS_UNSPECIFIED);
|
||||
+ ret = ldq_le_dma(&address_space_memory, addr, pte, MEMTXATTRS_UNSPECIFIED);
|
||||
|
||||
if (ret != MEMTX_OK) {
|
||||
info->type = SMMU_PTW_ERR_WALK_EABT;
|
||||
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
|
||||
index 270c80b665..cfb56725a6 100644
|
||||
--- a/hw/arm/smmuv3.c
|
||||
+++ b/hw/arm/smmuv3.c
|
||||
@@ -98,20 +98,34 @@ static void smmuv3_write_gerrorn(SMMUv3State *s, uint32_t new_gerrorn)
|
||||
trace_smmuv3_write_gerrorn(toggled & pending, s->gerrorn);
|
||||
}
|
||||
|
||||
-static inline MemTxResult queue_read(SMMUQueue *q, void *data)
|
||||
+static inline MemTxResult queue_read(SMMUQueue *q, Cmd *cmd)
|
||||
{
|
||||
dma_addr_t addr = Q_CONS_ENTRY(q);
|
||||
+ MemTxResult ret;
|
||||
+ int i;
|
||||
|
||||
- return dma_memory_read(&address_space_memory, addr, data, q->entry_size,
|
||||
- MEMTXATTRS_UNSPECIFIED);
|
||||
+ ret = dma_memory_read(&address_space_memory, addr, cmd, sizeof(Cmd),
|
||||
+ MEMTXATTRS_UNSPECIFIED);
|
||||
+ if (ret != MEMTX_OK) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+ for (i = 0; i < ARRAY_SIZE(cmd->word); i++) {
|
||||
+ le32_to_cpus(&cmd->word[i]);
|
||||
+ }
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
-static MemTxResult queue_write(SMMUQueue *q, void *data)
|
||||
+static MemTxResult queue_write(SMMUQueue *q, Evt *evt_in)
|
||||
{
|
||||
dma_addr_t addr = Q_PROD_ENTRY(q);
|
||||
MemTxResult ret;
|
||||
+ Evt evt = *evt_in;
|
||||
+ int i;
|
||||
|
||||
- ret = dma_memory_write(&address_space_memory, addr, data, q->entry_size,
|
||||
+ for (i = 0; i < ARRAY_SIZE(evt.word); i++) {
|
||||
+ cpu_to_le32s(&evt.word[i]);
|
||||
+ }
|
||||
+ ret = dma_memory_write(&address_space_memory, addr, &evt, sizeof(Evt),
|
||||
MEMTXATTRS_UNSPECIFIED);
|
||||
if (ret != MEMTX_OK) {
|
||||
return ret;
|
||||
@@ -291,7 +305,7 @@ static void smmuv3_init_regs(SMMUv3State *s)
|
||||
static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf,
|
||||
SMMUEventInfo *event)
|
||||
{
|
||||
- int ret;
|
||||
+ int ret, i;
|
||||
|
||||
trace_smmuv3_get_ste(addr);
|
||||
/* TODO: guarantee 64-bit single-copy atomicity */
|
||||
@@ -304,6 +318,9 @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf,
|
||||
event->u.f_ste_fetch.addr = addr;
|
||||
return -EINVAL;
|
||||
}
|
||||
+ for (i = 0; i < ARRAY_SIZE(buf->word); i++) {
|
||||
+ le32_to_cpus(&buf->word[i]);
|
||||
+ }
|
||||
return 0;
|
||||
|
||||
}
|
||||
@@ -313,7 +330,7 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
|
||||
CD *buf, SMMUEventInfo *event)
|
||||
{
|
||||
dma_addr_t addr = STE_CTXPTR(ste);
|
||||
- int ret;
|
||||
+ int ret, i;
|
||||
|
||||
trace_smmuv3_get_cd(addr);
|
||||
/* TODO: guarantee 64-bit single-copy atomicity */
|
||||
@@ -326,6 +343,9 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
|
||||
event->u.f_ste_fetch.addr = addr;
|
||||
return -EINVAL;
|
||||
}
|
||||
+ for (i = 0; i < ARRAY_SIZE(buf->word); i++) {
|
||||
+ le32_to_cpus(&buf->word[i]);
|
||||
+ }
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -407,7 +427,7 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
|
||||
return -EINVAL;
|
||||
}
|
||||
if (s->features & SMMU_FEATURE_2LVL_STE) {
|
||||
- int l1_ste_offset, l2_ste_offset, max_l2_ste, span;
|
||||
+ int l1_ste_offset, l2_ste_offset, max_l2_ste, span, i;
|
||||
dma_addr_t l1ptr, l2ptr;
|
||||
STEDesc l1std;
|
||||
|
||||
@@ -431,6 +451,9 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
|
||||
event->u.f_ste_fetch.addr = l1ptr;
|
||||
return -EINVAL;
|
||||
}
|
||||
+ for (i = 0; i < ARRAY_SIZE(l1std.word); i++) {
|
||||
+ le32_to_cpus(&l1std.word[i]);
|
||||
+ }
|
||||
|
||||
span = L1STD_SPAN(&l1std);
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,88 @@
|
||||
From e670722b9a6460d41497688d820d5a9a9b51d8e9 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Tue, 9 Jan 2024 11:36:42 +1000
|
||||
Subject: [PATCH 001/101] hw/arm/virt: Add properties to disable high memory
|
||||
regions
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 210: hw/arm/virt: Add properties to disable high memory regions
|
||||
RH-Jira: RHEL-19738
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [1/1] 4097ba5133a67126e30b84202cb40df4e019c5f4
|
||||
|
||||
Upstream: RHEL-only
|
||||
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=57927352
|
||||
|
||||
There are 3 high memory regions for GICv3 or GICv4 redistributor, PCI
|
||||
ECAM and PCI MMIO. Each of them has a property introduced by upstream
|
||||
commit 6a48c64eec ("hw/arm/virt: Add properties to disable high memory
|
||||
regions") so that the corresponding high memory region can be disabled.
|
||||
|
||||
It's notable that another property ("compact-highmem") introduced by
|
||||
upstream commit f40408a9fe ("hw/arm/virt: Add 'compact-highmem' property")
|
||||
so that the compact high memory region layout during assignment can be
|
||||
disabled, compatible to the old machine types. However, we don't have
|
||||
the compatible issue since the compact high memory region layout is
|
||||
always kept as disabled until RHEL9.2.0 machine type and onwards.
|
||||
|
||||
Expose those 3 properties: "highmem-redists", "highmem-ecam" and
|
||||
"highmem-mmio". The property "compact-highmem" is kept as hidden.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 24 +++++++++++++++++++++++-
|
||||
1 file changed, 23 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 5cab00b4cd..60f117f0d2 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -2456,6 +2456,7 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp)
|
||||
|
||||
vms->highmem_compact = value;
|
||||
}
|
||||
+#endif /* disabled for RHEL */
|
||||
|
||||
static bool virt_get_highmem_redists(Object *obj, Error **errp)
|
||||
{
|
||||
@@ -2498,7 +2499,6 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp)
|
||||
|
||||
vms->highmem_mmio = value;
|
||||
}
|
||||
-#endif /* disabled for RHEL */
|
||||
|
||||
static bool virt_get_its(Object *obj, Error **errp)
|
||||
{
|
||||
@@ -3521,6 +3521,28 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data)
|
||||
"Set on/off to enable/disable using "
|
||||
"physical address space above 32 bits");
|
||||
|
||||
+ object_class_property_add_bool(oc, "highmem-redists",
|
||||
+ virt_get_highmem_redists,
|
||||
+ virt_set_highmem_redists);
|
||||
+ object_class_property_set_description(oc, "highmem-redists",
|
||||
+ "Set on/off to enable/disable high "
|
||||
+ "memory region for GICv3 or GICv4 "
|
||||
+ "redistributor");
|
||||
+
|
||||
+ object_class_property_add_bool(oc, "highmem-ecam",
|
||||
+ virt_get_highmem_ecam,
|
||||
+ virt_set_highmem_ecam);
|
||||
+ object_class_property_set_description(oc, "highmem-ecam",
|
||||
+ "Set on/off to enable/disable high "
|
||||
+ "memory region for PCI ECAM");
|
||||
+
|
||||
+ object_class_property_add_bool(oc, "highmem-mmio",
|
||||
+ virt_get_highmem_mmio,
|
||||
+ virt_set_highmem_mmio);
|
||||
+ object_class_property_set_description(oc, "highmem-mmio",
|
||||
+ "Set on/off to enable/disable high "
|
||||
+ "memory region for PCI MMIO");
|
||||
+
|
||||
object_class_property_add_str(oc, "gic-version", virt_get_gic_version,
|
||||
virt_set_gic_version);
|
||||
object_class_property_set_description(oc, "gic-version",
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,132 @@
|
||||
From 3f58194f8642a71c47d91d3c00a34faf44ea2c11 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Wed, 3 Jan 2024 05:57:38 -0500
|
||||
Subject: [PATCH] hw/arm/virt: Fix compats
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 209: hw/arm/virt: Fix compats
|
||||
RH-Jira: RHEL-17168
|
||||
RH-Acked-by: Gavin Shan <gshan@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [1/1] bcdf6493bbd6d7b52b0b88ff44441d22aeddfde2 (eauger1/centos-qemu-kvm)
|
||||
|
||||
arm_rhel_compat is not added for virt-rhel9.4.0 machine causing
|
||||
the efi-virtio.rom to be looked for when instantiating a virtio-net-pci
|
||||
device and it won't be found since not shipped on ARM. This is a
|
||||
regression compared to 9.2.
|
||||
|
||||
Actually we do not need any rom file for any virtio-net-pci variant
|
||||
because edk2 already brings the functionality. So for 9.4 onwards, we
|
||||
want to set romfiles to "" for all of them.
|
||||
|
||||
However at the moment we apply arm_rhel_compat from the latest
|
||||
rhel*_virt_options(). This is not aligned with the generic compat
|
||||
usage which sets compats for a given machine type to accomodate for
|
||||
changes that occured after its advent. Here we are somehow abusing
|
||||
the compat infra to set general driver options that should apply for
|
||||
all machines. On top of that this is really error prone and we have
|
||||
forgotten to add arm_rhel_compat several times in the past.
|
||||
|
||||
So let's introduce set_arm_rhel_compat() being called before any
|
||||
*virt_options in the non abstract machine class. That way the setting
|
||||
will apply to any machine type without any need to add it in any
|
||||
future machine types.
|
||||
|
||||
For < 9.4 machines we don't really care keeping non void romfiles
|
||||
for transitional and non transitional devices because anyway this was
|
||||
not working. So let's keep things simple and apply the new defaults for
|
||||
all RHEL9 machine types.
|
||||
|
||||
Finally, to follow the generic pattern we should set hw_compat_rhel_9_0
|
||||
in 9.0 machine as it is done on x86 or ccw. This has no consequence on
|
||||
aarch64 because it only contains x86 stuff but that helps understanding
|
||||
the consistency.
|
||||
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 43 +++++++++++++++++++++++++++++--------------
|
||||
1 file changed, 29 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 0b17c94ad7..5cab00b4cd 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -111,11 +111,39 @@
|
||||
DEFINE_VIRT_MACHINE_LATEST(major, minor, false)
|
||||
#endif /* disabled for RHEL */
|
||||
|
||||
+/*
|
||||
+ * This variable is for changes to properties that are RHEL specific,
|
||||
+ * different to the current upstream and to be applied to the latest
|
||||
+ * machine type. They may be overriden by older machine compats.
|
||||
+ *
|
||||
+ * virtio-net-pci variant romfiles are not needed because edk2 does
|
||||
+ * fully support the pxe boot. Besides virtio romfiles are not shipped
|
||||
+ * on rhel/aarch64.
|
||||
+ */
|
||||
+GlobalProperty arm_rhel_compat[] = {
|
||||
+ {"virtio-net-pci", "romfile", "" },
|
||||
+ {"virtio-net-pci-transitional", "romfile", "" },
|
||||
+ {"virtio-net-pci-non-transitional", "romfile", "" },
|
||||
+};
|
||||
+const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat);
|
||||
+
|
||||
+/*
|
||||
+ * This cannot be called from the rhel_virt_class_init() because
|
||||
+ * TYPE_RHEL_MACHINE is abstract and mc->compat_props g_ptr_array_new()
|
||||
+ * only is called on virt-rhelm.n.s non abstract class init.
|
||||
+ */
|
||||
+static void arm_rhel_compat_set(MachineClass *mc)
|
||||
+{
|
||||
+ compat_props_add(mc->compat_props, arm_rhel_compat,
|
||||
+ arm_rhel_compat_len);
|
||||
+}
|
||||
+
|
||||
#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \
|
||||
static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \
|
||||
void *data) \
|
||||
{ \
|
||||
MachineClass *mc = MACHINE_CLASS(oc); \
|
||||
+ arm_rhel_compat_set(mc); \
|
||||
rhel##m##n##s##_virt_options(mc); \
|
||||
mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \
|
||||
if (latest) { \
|
||||
@@ -139,19 +167,6 @@
|
||||
#define DEFINE_RHEL_MACHINE(major, minor, subminor) \
|
||||
DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false)
|
||||
|
||||
-/* This variable is for changes to properties that are RHEL specific,
|
||||
- * different to the current upstream and to be applied to the latest
|
||||
- * machine type.
|
||||
- */
|
||||
-GlobalProperty arm_rhel_compat[] = {
|
||||
- {
|
||||
- .driver = "virtio-net-pci",
|
||||
- .property = "romfile",
|
||||
- .value = "",
|
||||
- },
|
||||
-};
|
||||
-const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat);
|
||||
-
|
||||
/* Number of external interrupt lines to configure the GIC with */
|
||||
#define NUM_IRQS 256
|
||||
|
||||
@@ -3639,7 +3654,6 @@ static void rhel920_virt_options(MachineClass *mc)
|
||||
{
|
||||
rhel940_virt_options(mc);
|
||||
|
||||
- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
|
||||
@@ -3653,6 +3667,7 @@ static void rhel900_virt_options(MachineClass *mc)
|
||||
rhel920_virt_options(mc);
|
||||
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
|
||||
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len);
|
||||
|
||||
/* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */
|
||||
vmc->no_tcg_lpa2 = true;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,41 +0,0 @@
|
||||
From 022529f6d0ee306da857825c72a98bf7ddf5de22 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Tue, 27 Jun 2023 20:20:09 +1000
|
||||
Subject: [PATCH 3/6] hw/arm/virt: Validate cluster and NUMA node boundary for
|
||||
RHEL machines
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines
|
||||
RH-Bugzilla: 2171363
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [3/3] a396c499259b566861ca007b01f8539bf6113711
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363
|
||||
Upstream Status: RHEL only
|
||||
|
||||
Set mc->cpu_cluster_has_numa_boundary to true so that the boundary of
|
||||
CPU cluster and NUMA node will be validated for 'virt-rhel*' machines.
|
||||
A warning message will be printed if the boundary is broken.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index df6a0231bc..faf68488d5 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3530,6 +3530,8 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data)
|
||||
mc->smp_props.clusters_supported = true;
|
||||
mc->auto_enable_numa_with_memhp = true;
|
||||
mc->auto_enable_numa_with_memdev = true;
|
||||
+ /* platform instead of architectural choice */
|
||||
+ mc->cpu_cluster_has_numa_boundary = true;
|
||||
mc->default_ram_id = "mach-virt.ram";
|
||||
|
||||
object_class_property_add(oc, "acpi", "OnOffAuto",
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,41 @@
|
||||
From 4c1d07995a7afb6fae68a7e7a8b6b6c94fa0a7bb Mon Sep 17 00:00:00 2001
|
||||
From: Cornelia Huck <cohuck@redhat.com>
|
||||
Date: Mon, 12 Feb 2024 10:37:54 +0100
|
||||
Subject: [PATCH 5/6] hw/arm/virt: deprecate virt-rhel9.{0,2}.0 machine types
|
||||
|
||||
RH-Author: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-MergeRequest: 225: hw/arm/virt: deprecate virt-rhel9.{0,2}.0 machine types
|
||||
RH-Jira: RHEL-24988
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [1/1] f15579db44808fa8a2d7bc01b3915aa59c064411 (cohuck/qemu-kvm-c9s)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-24988
|
||||
Upstream: RHEL only
|
||||
|
||||
We do not plan to support any machine types prior to 9.4.0; leave them
|
||||
in, but mark as deprecated.
|
||||
|
||||
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 60f117f0d2..943c563391 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3679,6 +3679,10 @@ static void rhel920_virt_options(MachineClass *mc)
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
|
||||
+
|
||||
+ /* RHEL 9.4 is the first supported release */
|
||||
+ mc->deprecation_reason =
|
||||
+ "machine types for versions prior to 9.4 are deprecated";
|
||||
}
|
||||
DEFINE_RHEL_MACHINE(9, 2, 0)
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,41 @@
|
||||
From 7a6be312c11911bdd2ce82566be22a3e014947c2 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Tue, 21 Nov 2023 16:44:20 +0800
|
||||
Subject: [PATCH 041/101] hw/i386: Activate IOMMUFD for q35 machines
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [40/67] b15764ab24fd57389a8d219736613484acd7d29e (eauger1/centos-qemu-kvm)
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit 64ad06f6eba66c514477f490bcba409439a480d8)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/i386/Kconfig | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
|
||||
index 55850791df..a1846be6f7 100644
|
||||
--- a/hw/i386/Kconfig
|
||||
+++ b/hw/i386/Kconfig
|
||||
@@ -95,6 +95,7 @@ config Q35
|
||||
imply E1000E_PCI_EXPRESS
|
||||
imply VMPORT
|
||||
imply VMMOUSE
|
||||
+ imply IOMMUFD
|
||||
select PC_PCI
|
||||
select PC_ACPI
|
||||
select PCI_EXPRESS_Q35
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,186 @@
|
||||
From ea2e2368dcf4140be47288472f2c2a094358e0c7 Mon Sep 17 00:00:00 2001
|
||||
From: Igor Mammedov <imammedo@redhat.com>
|
||||
Date: Thu, 8 Feb 2024 23:03:45 +0100
|
||||
Subject: [PATCH 03/20] hw/i386/pc: Defer smbios_set_defaults() to machine_done
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS
|
||||
RH-Jira: RHEL-21705
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Acked-by: Ani Sinha <None>
|
||||
RH-Commit: [1/18] 9d4c1d1a910fec7d310429d6fc0b10c798932db7
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-21705
|
||||
|
||||
commit: a0204a5ed091dfe79aced7ec8f3ce1931fd25816
|
||||
Author: Bernhard Beschow <shentey@gmail.com>
|
||||
|
||||
Handling most of smbios data generation in the machine_done notifier is similar
|
||||
to how the ARM virt machine handles it which also calls smbios_set_defaults()
|
||||
there. The result is that all pc machines are freed from explicitly worrying
|
||||
about smbios setup.
|
||||
|
||||
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Message-ID: <20240208220349.4948-6-shentey@gmail.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
|
||||
Conflicts: hw/i386/pc_q35.c, hw/i386/pc_piix.c
|
||||
due to missing 4d3457fef9 (w/i386/pc: Merge pc_guest_info_init() into pc_machine_initfn())
|
||||
and different signature of smbios_set_defaults() downstream
|
||||
Fixup: hw/i386/fw_cfg.c to account for downstream changes smbios_set_defaults()
|
||||
|
||||
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
|
||||
---
|
||||
hw/i386/fw_cfg.c | 14 +++++++++++++-
|
||||
hw/i386/fw_cfg.h | 3 ++-
|
||||
hw/i386/pc.c | 2 +-
|
||||
hw/i386/pc_piix.c | 12 ------------
|
||||
hw/i386/pc_q35.c | 11 -----------
|
||||
include/hw/i386/pc.h | 1 -
|
||||
6 files changed, 16 insertions(+), 27 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c
|
||||
index 7362daa45a..6a5466faf0 100644
|
||||
--- a/hw/i386/fw_cfg.c
|
||||
+++ b/hw/i386/fw_cfg.c
|
||||
@@ -48,15 +48,27 @@ const char *fw_cfg_arch_key_name(uint16_t key)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
-void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg)
|
||||
+void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg)
|
||||
{
|
||||
#ifdef CONFIG_SMBIOS
|
||||
uint8_t *smbios_tables, *smbios_anchor;
|
||||
size_t smbios_tables_len, smbios_anchor_len;
|
||||
struct smbios_phys_mem_area *mem_array;
|
||||
unsigned i, array_count;
|
||||
+ MachineState *ms = MACHINE(pcms);
|
||||
+ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
|
||||
+ MachineClass *mc = MACHINE_GET_CLASS(pcms);
|
||||
X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu);
|
||||
|
||||
+ if (pcmc->smbios_defaults) {
|
||||
+ /* These values are guest ABI, do not change */
|
||||
+ smbios_set_defaults("QEMU", mc->desc, mc->name,
|
||||
+ pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded,
|
||||
+ pcmc->smbios_stream_product,
|
||||
+ pcmc->smbios_stream_version,
|
||||
+ pcms->smbios_entry_point_type);
|
||||
+ }
|
||||
+
|
||||
/* tell smbios about cpuid version and features */
|
||||
smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]);
|
||||
|
||||
diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h
|
||||
index 86ca7c1c0c..1e1de6b4a3 100644
|
||||
--- a/hw/i386/fw_cfg.h
|
||||
+++ b/hw/i386/fw_cfg.h
|
||||
@@ -10,6 +10,7 @@
|
||||
#define HW_I386_FW_CFG_H
|
||||
|
||||
#include "hw/boards.h"
|
||||
+#include "hw/i386/pc.h"
|
||||
#include "hw/nvram/fw_cfg.h"
|
||||
|
||||
#define FW_CFG_IO_BASE 0x510
|
||||
@@ -22,7 +23,7 @@
|
||||
FWCfgState *fw_cfg_arch_create(MachineState *ms,
|
||||
uint16_t boot_cpus,
|
||||
uint16_t apic_id_limit);
|
||||
-void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg);
|
||||
+void fw_cfg_build_smbios(PCMachineState *ms, FWCfgState *fw_cfg);
|
||||
void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg);
|
||||
void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg);
|
||||
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index a1faa9e92c..16de2a59e8 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -847,7 +847,7 @@ void pc_machine_done(Notifier *notifier, void *data)
|
||||
|
||||
acpi_setup();
|
||||
if (x86ms->fw_cfg) {
|
||||
- fw_cfg_build_smbios(MACHINE(pcms), x86ms->fw_cfg);
|
||||
+ fw_cfg_build_smbios(pcms, x86ms->fw_cfg);
|
||||
fw_cfg_build_feature_control(MACHINE(pcms), x86ms->fw_cfg);
|
||||
/* update FW_CFG_NB_CPUS to account for -device added CPUs */
|
||||
fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index 09d02cc91f..7344b35cf1 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -36,7 +36,6 @@
|
||||
#include "hw/rtc/mc146818rtc.h"
|
||||
#include "hw/southbridge/piix.h"
|
||||
#include "hw/display/ramfb.h"
|
||||
-#include "hw/firmware/smbios.h"
|
||||
#include "hw/pci/pci.h"
|
||||
#include "hw/pci/pci_ids.h"
|
||||
#include "hw/usb.h"
|
||||
@@ -233,17 +232,6 @@ static void pc_init1(MachineState *machine,
|
||||
|
||||
pc_guest_info_init(pcms);
|
||||
|
||||
- if (pcmc->smbios_defaults) {
|
||||
- MachineClass *mc = MACHINE_GET_CLASS(machine);
|
||||
- /* These values are guest ABI, do not change */
|
||||
- smbios_set_defaults("Red Hat", "KVM",
|
||||
- mc->desc, pcmc->smbios_legacy_mode,
|
||||
- pcmc->smbios_uuid_encoded,
|
||||
- pcmc->smbios_stream_product,
|
||||
- pcmc->smbios_stream_version,
|
||||
- pcms->smbios_entry_point_type);
|
||||
- }
|
||||
-
|
||||
/* allocate ram and load rom/bios */
|
||||
if (!xen_enabled()) {
|
||||
pc_memory_init(pcms, system_memory, rom_memory, hole64_size);
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index c6967e1846..9a22ff5dd6 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -45,7 +45,6 @@
|
||||
#include "hw/i386/amd_iommu.h"
|
||||
#include "hw/i386/intel_iommu.h"
|
||||
#include "hw/display/ramfb.h"
|
||||
-#include "hw/firmware/smbios.h"
|
||||
#include "hw/ide/pci.h"
|
||||
#include "hw/ide/ahci.h"
|
||||
#include "hw/intc/ioapic.h"
|
||||
@@ -201,16 +200,6 @@ static void pc_q35_init(MachineState *machine)
|
||||
|
||||
pc_guest_info_init(pcms);
|
||||
|
||||
- if (pcmc->smbios_defaults) {
|
||||
- /* These values are guest ABI, do not change */
|
||||
- smbios_set_defaults("Red Hat", "KVM",
|
||||
- mc->desc, pcmc->smbios_legacy_mode,
|
||||
- pcmc->smbios_uuid_encoded,
|
||||
- pcmc->smbios_stream_product,
|
||||
- pcmc->smbios_stream_version,
|
||||
- pcms->smbios_entry_point_type);
|
||||
- }
|
||||
-
|
||||
/* create pci host bus */
|
||||
phb = OBJECT(qdev_new(TYPE_Q35_HOST_DEVICE));
|
||||
|
||||
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
|
||||
index 37644ede7e..c286c10bc3 100644
|
||||
--- a/include/hw/i386/pc.h
|
||||
+++ b/include/hw/i386/pc.h
|
||||
@@ -12,7 +12,6 @@
|
||||
#include "hw/hotplug.h"
|
||||
#include "qom/object.h"
|
||||
#include "hw/i386/sgx-epc.h"
|
||||
-#include "hw/firmware/smbios.h"
|
||||
#include "hw/cxl/cxl.h"
|
||||
|
||||
#define HPET_INTCAP "hpet-intcap"
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,44 +0,0 @@
|
||||
From 491cf9e251026d135f315b7fe0d8771841f06e9f Mon Sep 17 00:00:00 2001
|
||||
From: Leonardo Bras <leobras@redhat.com>
|
||||
Date: Tue, 25 Jul 2023 15:34:45 -0300
|
||||
Subject: [PATCH 8/9] hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type
|
||||
<= pc-q35-rhel9.2.0
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Leonardo Brás <leobras@redhat.com>
|
||||
RH-MergeRequest: 192: hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type <= pc-q35-rhel9.2.0
|
||||
RH-Bugzilla: 2223691
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: quintela1 <quintela@redhat.com>
|
||||
RH-Commit: [1/1] e57816f8ad15a9ce5f342b061c103ae011ec1223 (LeoBras/centos-qemu-kvm)
|
||||
|
||||
This is a downstream-only patch to that sets off the property
|
||||
x-pcie-err-unc-mask for machine types <= pc-q35-rhel9.2.0, allowing
|
||||
live migrations to RHEL9.2 happen successfully.
|
||||
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2223691
|
||||
Fixes: 293a34b4be ("hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine
|
||||
type < 8.0")
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
---
|
||||
hw/core/machine.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 5ea52317b9..6f5117669d 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -62,6 +62,8 @@ GlobalProperty hw_compat_rhel_9_2[] = {
|
||||
{ "virtio-mem", "x-early-migration", "false" },
|
||||
/* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
{ "migration", "x-preempt-pre-7-2", "true" },
|
||||
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
+ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" },
|
||||
};
|
||||
const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2);
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,118 +0,0 @@
|
||||
From 3ac01bb90da12538898f95b2fb4e7f6bc1557eb3 Mon Sep 17 00:00:00 2001
|
||||
From: Leonardo Bras <leobras@redhat.com>
|
||||
Date: Tue, 2 May 2023 21:27:02 -0300
|
||||
Subject: [PATCH 18/21] hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine
|
||||
type < 8.0
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Leonardo Brás <leobras@redhat.com>
|
||||
RH-MergeRequest: 170: hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0
|
||||
RH-Bugzilla: 2189423
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/1] ad62dd5a8567f386770577513c00a0bf36bd3df1 (LeoBras/centos-qemu-kvm)
|
||||
|
||||
Since it's implementation on v8.0.0-rc0, having the PCI_ERR_UNCOR_MASK
|
||||
set for machine types < 8.0 will cause migration to fail if the target
|
||||
QEMU version is < 8.0.0 :
|
||||
|
||||
qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10a read: 40 device: 0 cmask: ff wmask: 0 w1cmask:0
|
||||
qemu-system-x86_64: Failed to load PCIDevice:config
|
||||
qemu-system-x86_64: Failed to load e1000e:parent_obj
|
||||
qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:02.0/e1000e'
|
||||
qemu-system-x86_64: load of migration failed: Invalid argument
|
||||
|
||||
The above test migrated a 7.2 machine type from QEMU master to QEMU 7.2.0,
|
||||
with this cmdline:
|
||||
|
||||
./qemu-system-x86_64 -M pc-q35-7.2 [-incoming XXX]
|
||||
|
||||
In order to fix this, property x-pcie-err-unc-mask was introduced to
|
||||
control when PCI_ERR_UNCOR_MASK is enabled. This property is enabled by
|
||||
default, but is disabled if machine type <= 7.2.
|
||||
|
||||
Fixes: 010746ae1d ("hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register")
|
||||
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
Message-Id: <20230503002701.854329-1-leobras@redhat.com>
|
||||
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Reviewed-by: Juan Quintela <quintela@redhat.com>
|
||||
Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1576
|
||||
Tested-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit 5ed3dabe57dd9f4c007404345e5f5bf0e347317f)
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
---
|
||||
hw/core/machine.c | 1 +
|
||||
hw/pci/pci.c | 2 ++
|
||||
hw/pci/pcie_aer.c | 11 +++++++----
|
||||
include/hw/pci/pci.h | 2 ++
|
||||
4 files changed, 12 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 0e0120b7f2..c28702b690 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -43,6 +43,7 @@ GlobalProperty hw_compat_7_2[] = {
|
||||
{ "e1000e", "migrate-timadj", "off" },
|
||||
{ "virtio-mem", "x-early-migration", "false" },
|
||||
{ "migration", "x-preempt-pre-7-2", "true" },
|
||||
+ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" },
|
||||
};
|
||||
const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
|
||||
|
||||
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
|
||||
index def5000e7b..8ad4349e96 100644
|
||||
--- a/hw/pci/pci.c
|
||||
+++ b/hw/pci/pci.c
|
||||
@@ -79,6 +79,8 @@ static Property pci_props[] = {
|
||||
DEFINE_PROP_STRING("failover_pair_id", PCIDevice,
|
||||
failover_pair_id),
|
||||
DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0),
|
||||
+ DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
|
||||
+ QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
|
||||
DEFINE_PROP_END_OF_LIST()
|
||||
};
|
||||
|
||||
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
|
||||
index 103667c368..374d593ead 100644
|
||||
--- a/hw/pci/pcie_aer.c
|
||||
+++ b/hw/pci/pcie_aer.c
|
||||
@@ -112,10 +112,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset,
|
||||
|
||||
pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
|
||||
PCI_ERR_UNC_SUPPORTED);
|
||||
- pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK,
|
||||
- PCI_ERR_UNC_MASK_DEFAULT);
|
||||
- pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
|
||||
- PCI_ERR_UNC_SUPPORTED);
|
||||
+
|
||||
+ if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) {
|
||||
+ pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK,
|
||||
+ PCI_ERR_UNC_MASK_DEFAULT);
|
||||
+ pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
|
||||
+ PCI_ERR_UNC_SUPPORTED);
|
||||
+ }
|
||||
|
||||
pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
|
||||
PCI_ERR_UNC_SEVERITY_DEFAULT);
|
||||
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
|
||||
index d5a40cd058..6dc6742fc4 100644
|
||||
--- a/include/hw/pci/pci.h
|
||||
+++ b/include/hw/pci/pci.h
|
||||
@@ -207,6 +207,8 @@ enum {
|
||||
QEMU_PCIE_EXTCAP_INIT = (1 << QEMU_PCIE_EXTCAP_INIT_BITNR),
|
||||
#define QEMU_PCIE_CXL_BITNR 10
|
||||
QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR),
|
||||
+#define QEMU_PCIE_ERR_UNC_MASK_BITNR 11
|
||||
+ QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
|
||||
};
|
||||
|
||||
typedef struct PCIINTxRoute {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,116 @@
|
||||
From 84f378c41832602dcf9bad6167b1f532c7c53e37 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Tue, 21 Nov 2023 15:03:55 +0100
|
||||
Subject: [PATCH 048/101] hw/ppc/Kconfig: Imply VFIO_PCI
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [47/67] c1a40cdab9bf62b16cb428d57a20b3e0eaa6de38 (eauger1/centos-qemu-kvm)
|
||||
|
||||
When the legacy and iommufd backends were introduced, a set of common
|
||||
vfio-pci routines were exported in pci.c for both backends to use :
|
||||
|
||||
vfio_pci_pre_reset
|
||||
vfio_pci_get_pci_hot_reset_info
|
||||
vfio_pci_host_match
|
||||
vfio_pci_post_reset
|
||||
|
||||
This introduced a build failure on PPC when --without-default-devices
|
||||
is use because VFIO is always selected in ppc/Kconfig but VFIO_PCI is
|
||||
not.
|
||||
|
||||
Use an 'imply VFIO_PCI' in ppc/Kconfig and bypass compilation of the
|
||||
VFIO EEH hooks routines defined in hw/ppc/spapr_pci_vfio.c with
|
||||
CONFIG_VFIO_PCI.
|
||||
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit 4278df9d1d2383b738338c857406357660f11e42)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/ppc/Kconfig | 2 +-
|
||||
hw/ppc/spapr_pci_vfio.c | 36 ++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 37 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
|
||||
index 56f0475a8e..44263a58c4 100644
|
||||
--- a/hw/ppc/Kconfig
|
||||
+++ b/hw/ppc/Kconfig
|
||||
@@ -3,11 +3,11 @@ config PSERIES
|
||||
imply PCI_DEVICES
|
||||
imply TEST_DEVICES
|
||||
imply VIRTIO_VGA
|
||||
+ imply VFIO_PCI if LINUX # needed by spapr_pci_vfio.c
|
||||
select NVDIMM
|
||||
select DIMM
|
||||
select PCI
|
||||
select SPAPR_VSCSI
|
||||
- select VFIO if LINUX # needed by spapr_pci_vfio.c
|
||||
select XICS
|
||||
select XIVE
|
||||
select MSI_NONBROKEN
|
||||
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
|
||||
index d1d07bec46..76b2a3487b 100644
|
||||
--- a/hw/ppc/spapr_pci_vfio.c
|
||||
+++ b/hw/ppc/spapr_pci_vfio.c
|
||||
@@ -26,10 +26,12 @@
|
||||
#include "hw/pci/pci_device.h"
|
||||
#include "hw/vfio/vfio-common.h"
|
||||
#include "qemu/error-report.h"
|
||||
+#include CONFIG_DEVICES /* CONFIG_VFIO_PCI */
|
||||
|
||||
/*
|
||||
* Interfaces for IBM EEH (Enhanced Error Handling)
|
||||
*/
|
||||
+#ifdef CONFIG_VFIO_PCI
|
||||
static bool vfio_eeh_container_ok(VFIOContainer *container)
|
||||
{
|
||||
/*
|
||||
@@ -314,3 +316,37 @@ int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
|
||||
|
||||
return RTAS_OUT_SUCCESS;
|
||||
}
|
||||
+
|
||||
+#else
|
||||
+
|
||||
+bool spapr_phb_eeh_available(SpaprPhbState *sphb)
|
||||
+{
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+void spapr_phb_vfio_reset(DeviceState *qdev)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
|
||||
+ unsigned int addr, int option)
|
||||
+{
|
||||
+ return RTAS_OUT_NOT_SUPPORTED;
|
||||
+}
|
||||
+
|
||||
+int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state)
|
||||
+{
|
||||
+ return RTAS_OUT_NOT_SUPPORTED;
|
||||
+}
|
||||
+
|
||||
+int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option)
|
||||
+{
|
||||
+ return RTAS_OUT_NOT_SUPPORTED;
|
||||
+}
|
||||
+
|
||||
+int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
|
||||
+{
|
||||
+ return RTAS_OUT_NOT_SUPPORTED;
|
||||
+}
|
||||
+
|
||||
+#endif /* CONFIG_VFIO_PCI */
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,470 +0,0 @@
|
||||
From d1b7a9b25c0df9016cd8e93d40837314b1a81d70 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 08/21] hw: replace most qemu_bh_new calls with
|
||||
qemu_bh_new_guarded
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [4/13] bcbc67dd0023aee2b3a342665237daa83b183c7b (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit f63192b0544af5d3e4d5edfd85ab520fcf671377
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:09 2023 -0400
|
||||
|
||||
hw: replace most qemu_bh_new calls with qemu_bh_new_guarded
|
||||
|
||||
This protects devices from bh->mmio reentrancy issues.
|
||||
|
||||
Thanks: Thomas Huth <thuth@redhat.com> for diagnosing OS X test failure.
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Reviewed-by: Paul Durrant <paul@xen.org>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Message-Id: <20230427211013.2994127-5-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/9pfs/xen-9p-backend.c | 5 ++++-
|
||||
hw/block/dataplane/virtio-blk.c | 3 ++-
|
||||
hw/block/dataplane/xen-block.c | 5 +++--
|
||||
hw/char/virtio-serial-bus.c | 3 ++-
|
||||
hw/display/qxl.c | 9 ++++++---
|
||||
hw/display/virtio-gpu.c | 6 ++++--
|
||||
hw/ide/ahci.c | 3 ++-
|
||||
hw/ide/ahci_internal.h | 1 +
|
||||
hw/ide/core.c | 4 +++-
|
||||
hw/misc/imx_rngc.c | 6 ++++--
|
||||
hw/misc/macio/mac_dbdma.c | 2 +-
|
||||
hw/net/virtio-net.c | 3 ++-
|
||||
hw/nvme/ctrl.c | 6 ++++--
|
||||
hw/scsi/mptsas.c | 3 ++-
|
||||
hw/scsi/scsi-bus.c | 3 ++-
|
||||
hw/scsi/vmw_pvscsi.c | 3 ++-
|
||||
hw/usb/dev-uas.c | 3 ++-
|
||||
hw/usb/hcd-dwc2.c | 3 ++-
|
||||
hw/usb/hcd-ehci.c | 3 ++-
|
||||
hw/usb/hcd-uhci.c | 2 +-
|
||||
hw/usb/host-libusb.c | 6 ++++--
|
||||
hw/usb/redirect.c | 6 ++++--
|
||||
hw/usb/xen-usb.c | 3 ++-
|
||||
hw/virtio/virtio-balloon.c | 5 +++--
|
||||
hw/virtio/virtio-crypto.c | 3 ++-
|
||||
25 files changed, 66 insertions(+), 33 deletions(-)
|
||||
|
||||
diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
|
||||
index 74f3a05f88..0e266c552b 100644
|
||||
--- a/hw/9pfs/xen-9p-backend.c
|
||||
+++ b/hw/9pfs/xen-9p-backend.c
|
||||
@@ -61,6 +61,7 @@ typedef struct Xen9pfsDev {
|
||||
|
||||
int num_rings;
|
||||
Xen9pfsRing *rings;
|
||||
+ MemReentrancyGuard mem_reentrancy_guard;
|
||||
} Xen9pfsDev;
|
||||
|
||||
static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev);
|
||||
@@ -443,7 +444,9 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
|
||||
xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data +
|
||||
XEN_FLEX_RING_SIZE(ring_order);
|
||||
|
||||
- xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]);
|
||||
+ xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh,
|
||||
+ &xen_9pdev->rings[i],
|
||||
+ &xen_9pdev->mem_reentrancy_guard);
|
||||
xen_9pdev->rings[i].out_cons = 0;
|
||||
xen_9pdev->rings[i].out_size = 0;
|
||||
xen_9pdev->rings[i].inprogress = false;
|
||||
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
|
||||
index b28d81737e..a6202997ee 100644
|
||||
--- a/hw/block/dataplane/virtio-blk.c
|
||||
+++ b/hw/block/dataplane/virtio-blk.c
|
||||
@@ -127,7 +127,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
|
||||
} else {
|
||||
s->ctx = qemu_get_aio_context();
|
||||
}
|
||||
- s->bh = aio_bh_new(s->ctx, notify_guest_bh, s);
|
||||
+ s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s,
|
||||
+ &DEVICE(vdev)->mem_reentrancy_guard);
|
||||
s->batch_notify_vqs = bitmap_new(conf->num_queues);
|
||||
|
||||
*dataplane = s;
|
||||
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
|
||||
index 734da42ea7..d8bc39d359 100644
|
||||
--- a/hw/block/dataplane/xen-block.c
|
||||
+++ b/hw/block/dataplane/xen-block.c
|
||||
@@ -633,8 +633,9 @@ XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev,
|
||||
} else {
|
||||
dataplane->ctx = qemu_get_aio_context();
|
||||
}
|
||||
- dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh,
|
||||
- dataplane);
|
||||
+ dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh,
|
||||
+ dataplane,
|
||||
+ &DEVICE(xendev)->mem_reentrancy_guard);
|
||||
|
||||
return dataplane;
|
||||
}
|
||||
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
|
||||
index 7d4601cb5d..dd619f0731 100644
|
||||
--- a/hw/char/virtio-serial-bus.c
|
||||
+++ b/hw/char/virtio-serial-bus.c
|
||||
@@ -985,7 +985,8 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp)
|
||||
return;
|
||||
}
|
||||
|
||||
- port->bh = qemu_bh_new(flush_queued_data_bh, port);
|
||||
+ port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
port->elem = NULL;
|
||||
}
|
||||
|
||||
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
|
||||
index 80ce1e9a93..f1c0eb7dfc 100644
|
||||
--- a/hw/display/qxl.c
|
||||
+++ b/hw/display/qxl.c
|
||||
@@ -2201,11 +2201,14 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp)
|
||||
|
||||
qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl);
|
||||
|
||||
- qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl);
|
||||
+ qxl->update_irq = qemu_bh_new_guarded(qxl_update_irq_bh, qxl,
|
||||
+ &DEVICE(qxl)->mem_reentrancy_guard);
|
||||
qxl_reset_state(qxl);
|
||||
|
||||
- qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl);
|
||||
- qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd);
|
||||
+ qxl->update_area_bh = qemu_bh_new_guarded(qxl_render_update_area_bh, qxl,
|
||||
+ &DEVICE(qxl)->mem_reentrancy_guard);
|
||||
+ qxl->ssd.cursor_bh = qemu_bh_new_guarded(qemu_spice_cursor_refresh_bh, &qxl->ssd,
|
||||
+ &DEVICE(qxl)->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static void qxl_realize_primary(PCIDevice *dev, Error **errp)
|
||||
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
|
||||
index 5e15c79b94..66ac9b6cc5 100644
|
||||
--- a/hw/display/virtio-gpu.c
|
||||
+++ b/hw/display/virtio-gpu.c
|
||||
@@ -1339,8 +1339,10 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
|
||||
|
||||
g->ctrl_vq = virtio_get_queue(vdev, 0);
|
||||
g->cursor_vq = virtio_get_queue(vdev, 1);
|
||||
- g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g);
|
||||
- g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g);
|
||||
+ g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g,
|
||||
+ &qdev->mem_reentrancy_guard);
|
||||
+ g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g,
|
||||
+ &qdev->mem_reentrancy_guard);
|
||||
QTAILQ_INIT(&g->reslist);
|
||||
QTAILQ_INIT(&g->cmdq);
|
||||
QTAILQ_INIT(&g->fenceq);
|
||||
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
|
||||
index 55902e1df7..4e76d6b191 100644
|
||||
--- a/hw/ide/ahci.c
|
||||
+++ b/hw/ide/ahci.c
|
||||
@@ -1509,7 +1509,8 @@ static void ahci_cmd_done(const IDEDMA *dma)
|
||||
ahci_write_fis_d2h(ad);
|
||||
|
||||
if (ad->port_regs.cmd_issue && !ad->check_bh) {
|
||||
- ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad);
|
||||
+ ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad,
|
||||
+ &ad->mem_reentrancy_guard);
|
||||
qemu_bh_schedule(ad->check_bh);
|
||||
}
|
||||
}
|
||||
diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h
|
||||
index 303fcd7235..2480455372 100644
|
||||
--- a/hw/ide/ahci_internal.h
|
||||
+++ b/hw/ide/ahci_internal.h
|
||||
@@ -321,6 +321,7 @@ struct AHCIDevice {
|
||||
bool init_d2h_sent;
|
||||
AHCICmdHdr *cur_cmd;
|
||||
NCQTransferState ncq_tfs[AHCI_MAX_CMDS];
|
||||
+ MemReentrancyGuard mem_reentrancy_guard;
|
||||
};
|
||||
|
||||
struct AHCIPCIState {
|
||||
diff --git a/hw/ide/core.c b/hw/ide/core.c
|
||||
index 45d14a25e9..de48ff9f86 100644
|
||||
--- a/hw/ide/core.c
|
||||
+++ b/hw/ide/core.c
|
||||
@@ -513,6 +513,7 @@ BlockAIOCB *ide_issue_trim(
|
||||
BlockCompletionFunc *cb, void *cb_opaque, void *opaque)
|
||||
{
|
||||
IDEState *s = opaque;
|
||||
+ IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master;
|
||||
TrimAIOCB *iocb;
|
||||
|
||||
/* Paired with a decrement in ide_trim_bh_cb() */
|
||||
@@ -520,7 +521,8 @@ BlockAIOCB *ide_issue_trim(
|
||||
|
||||
iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque);
|
||||
iocb->s = s;
|
||||
- iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
|
||||
+ iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
iocb->ret = 0;
|
||||
iocb->qiov = qiov;
|
||||
iocb->i = -1;
|
||||
diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c
|
||||
index 632c03779c..082c6980ad 100644
|
||||
--- a/hw/misc/imx_rngc.c
|
||||
+++ b/hw/misc/imx_rngc.c
|
||||
@@ -228,8 +228,10 @@ static void imx_rngc_realize(DeviceState *dev, Error **errp)
|
||||
sysbus_init_mmio(sbd, &s->iomem);
|
||||
|
||||
sysbus_init_irq(sbd, &s->irq);
|
||||
- s->self_test_bh = qemu_bh_new(imx_rngc_self_test, s);
|
||||
- s->seed_bh = qemu_bh_new(imx_rngc_seed, s);
|
||||
+ s->self_test_bh = qemu_bh_new_guarded(imx_rngc_self_test, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
+ s->seed_bh = qemu_bh_new_guarded(imx_rngc_seed, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static void imx_rngc_reset(DeviceState *dev)
|
||||
diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c
|
||||
index 43bb1f56ba..80a789f32b 100644
|
||||
--- a/hw/misc/macio/mac_dbdma.c
|
||||
+++ b/hw/misc/macio/mac_dbdma.c
|
||||
@@ -914,7 +914,7 @@ static void mac_dbdma_realize(DeviceState *dev, Error **errp)
|
||||
{
|
||||
DBDMAState *s = MAC_DBDMA(dev);
|
||||
|
||||
- s->bh = qemu_bh_new(DBDMA_run_bh, s);
|
||||
+ s->bh = qemu_bh_new_guarded(DBDMA_run_bh, s, &dev->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static void mac_dbdma_class_init(ObjectClass *oc, void *data)
|
||||
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
|
||||
index 53e1c32643..447f669921 100644
|
||||
--- a/hw/net/virtio-net.c
|
||||
+++ b/hw/net/virtio-net.c
|
||||
@@ -2917,7 +2917,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index)
|
||||
n->vqs[index].tx_vq =
|
||||
virtio_add_queue(vdev, n->net_conf.tx_queue_size,
|
||||
virtio_net_handle_tx_bh);
|
||||
- n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
|
||||
+ n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
|
||||
+ &DEVICE(vdev)->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
n->vqs[index].tx_waiting = 0;
|
||||
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
|
||||
index ac24eeb5ed..e5a468975e 100644
|
||||
--- a/hw/nvme/ctrl.c
|
||||
+++ b/hw/nvme/ctrl.c
|
||||
@@ -4607,7 +4607,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
|
||||
QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry);
|
||||
}
|
||||
|
||||
- sq->bh = qemu_bh_new(nvme_process_sq, sq);
|
||||
+ sq->bh = qemu_bh_new_guarded(nvme_process_sq, sq,
|
||||
+ &DEVICE(sq->ctrl)->mem_reentrancy_guard);
|
||||
|
||||
if (n->dbbuf_enabled) {
|
||||
sq->db_addr = n->dbbuf_dbs + (sqid << 3);
|
||||
@@ -5253,7 +5254,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
|
||||
}
|
||||
}
|
||||
n->cq[cqid] = cq;
|
||||
- cq->bh = qemu_bh_new(nvme_post_cqes, cq);
|
||||
+ cq->bh = qemu_bh_new_guarded(nvme_post_cqes, cq,
|
||||
+ &DEVICE(cq->ctrl)->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
|
||||
diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c
|
||||
index c485da792c..3de288b454 100644
|
||||
--- a/hw/scsi/mptsas.c
|
||||
+++ b/hw/scsi/mptsas.c
|
||||
@@ -1322,7 +1322,8 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp)
|
||||
}
|
||||
s->max_devices = MPTSAS_NUM_PORTS;
|
||||
|
||||
- s->request_bh = qemu_bh_new(mptsas_fetch_requests, s);
|
||||
+ s->request_bh = qemu_bh_new_guarded(mptsas_fetch_requests, s,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
|
||||
scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info);
|
||||
}
|
||||
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
|
||||
index c97176110c..3c20b47ad0 100644
|
||||
--- a/hw/scsi/scsi-bus.c
|
||||
+++ b/hw/scsi/scsi-bus.c
|
||||
@@ -193,7 +193,8 @@ static void scsi_dma_restart_cb(void *opaque, bool running, RunState state)
|
||||
AioContext *ctx = blk_get_aio_context(s->conf.blk);
|
||||
/* The reference is dropped in scsi_dma_restart_bh.*/
|
||||
object_ref(OBJECT(s));
|
||||
- s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s);
|
||||
+ s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s,
|
||||
+ &DEVICE(s)->mem_reentrancy_guard);
|
||||
qemu_bh_schedule(s->bh);
|
||||
}
|
||||
}
|
||||
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
|
||||
index fa76696855..4de34536e9 100644
|
||||
--- a/hw/scsi/vmw_pvscsi.c
|
||||
+++ b/hw/scsi/vmw_pvscsi.c
|
||||
@@ -1184,7 +1184,8 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp)
|
||||
pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET);
|
||||
}
|
||||
|
||||
- s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s);
|
||||
+ s->completion_worker = qemu_bh_new_guarded(pvscsi_process_completion_queue, s,
|
||||
+ &DEVICE(pci_dev)->mem_reentrancy_guard);
|
||||
|
||||
scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info);
|
||||
/* override default SCSI bus hotplug-handler, with pvscsi's one */
|
||||
diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
|
||||
index 88f99c05d5..f013ded91e 100644
|
||||
--- a/hw/usb/dev-uas.c
|
||||
+++ b/hw/usb/dev-uas.c
|
||||
@@ -937,7 +937,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp)
|
||||
|
||||
QTAILQ_INIT(&uas->results);
|
||||
QTAILQ_INIT(&uas->requests);
|
||||
- uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas);
|
||||
+ uas->status_bh = qemu_bh_new_guarded(usb_uas_send_status_bh, uas,
|
||||
+ &d->mem_reentrancy_guard);
|
||||
|
||||
dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE);
|
||||
scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info);
|
||||
diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c
|
||||
index 8755e9cbb0..a0c4e782b2 100644
|
||||
--- a/hw/usb/hcd-dwc2.c
|
||||
+++ b/hw/usb/hcd-dwc2.c
|
||||
@@ -1364,7 +1364,8 @@ static void dwc2_realize(DeviceState *dev, Error **errp)
|
||||
s->fi = USB_FRMINTVL - 1;
|
||||
s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s);
|
||||
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s);
|
||||
- s->async_bh = qemu_bh_new(dwc2_work_bh, s);
|
||||
+ s->async_bh = qemu_bh_new_guarded(dwc2_work_bh, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
|
||||
sysbus_init_irq(sbd, &s->irq);
|
||||
}
|
||||
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
|
||||
index d4da8dcb8d..c930c60921 100644
|
||||
--- a/hw/usb/hcd-ehci.c
|
||||
+++ b/hw/usb/hcd-ehci.c
|
||||
@@ -2533,7 +2533,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp)
|
||||
}
|
||||
|
||||
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s);
|
||||
- s->async_bh = qemu_bh_new(ehci_work_bh, s);
|
||||
+ s->async_bh = qemu_bh_new_guarded(ehci_work_bh, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
s->device = dev;
|
||||
|
||||
s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s);
|
||||
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
|
||||
index 8ac1175ad2..77baaa7a6b 100644
|
||||
--- a/hw/usb/hcd-uhci.c
|
||||
+++ b/hw/usb/hcd-uhci.c
|
||||
@@ -1190,7 +1190,7 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp)
|
||||
USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL);
|
||||
}
|
||||
}
|
||||
- s->bh = qemu_bh_new(uhci_bh, s);
|
||||
+ s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard);
|
||||
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s);
|
||||
s->num_ports_vmstate = NB_PORTS;
|
||||
QTAILQ_INIT(&s->queues);
|
||||
diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c
|
||||
index 176868d345..f500db85ab 100644
|
||||
--- a/hw/usb/host-libusb.c
|
||||
+++ b/hw/usb/host-libusb.c
|
||||
@@ -1141,7 +1141,8 @@ static void usb_host_nodev_bh(void *opaque)
|
||||
static void usb_host_nodev(USBHostDevice *s)
|
||||
{
|
||||
if (!s->bh_nodev) {
|
||||
- s->bh_nodev = qemu_bh_new(usb_host_nodev_bh, s);
|
||||
+ s->bh_nodev = qemu_bh_new_guarded(usb_host_nodev_bh, s,
|
||||
+ &DEVICE(s)->mem_reentrancy_guard);
|
||||
}
|
||||
qemu_bh_schedule(s->bh_nodev);
|
||||
}
|
||||
@@ -1739,7 +1740,8 @@ static int usb_host_post_load(void *opaque, int version_id)
|
||||
USBHostDevice *dev = opaque;
|
||||
|
||||
if (!dev->bh_postld) {
|
||||
- dev->bh_postld = qemu_bh_new(usb_host_post_load_bh, dev);
|
||||
+ dev->bh_postld = qemu_bh_new_guarded(usb_host_post_load_bh, dev,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
}
|
||||
qemu_bh_schedule(dev->bh_postld);
|
||||
dev->bh_postld_pending = true;
|
||||
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
|
||||
index fd7df599bc..39fbaaab16 100644
|
||||
--- a/hw/usb/redirect.c
|
||||
+++ b/hw/usb/redirect.c
|
||||
@@ -1441,8 +1441,10 @@ static void usbredir_realize(USBDevice *udev, Error **errp)
|
||||
}
|
||||
}
|
||||
|
||||
- dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev);
|
||||
- dev->device_reject_bh = qemu_bh_new(usbredir_device_reject_bh, dev);
|
||||
+ dev->chardev_close_bh = qemu_bh_new_guarded(usbredir_chardev_close_bh, dev,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
+ dev->device_reject_bh = qemu_bh_new_guarded(usbredir_device_reject_bh, dev,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
dev->attach_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, usbredir_do_attach, dev);
|
||||
|
||||
packet_id_queue_init(&dev->cancelled, dev, "cancelled");
|
||||
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
|
||||
index 66cb3f7c24..38ee660a30 100644
|
||||
--- a/hw/usb/xen-usb.c
|
||||
+++ b/hw/usb/xen-usb.c
|
||||
@@ -1032,7 +1032,8 @@ static void usbback_alloc(struct XenLegacyDevice *xendev)
|
||||
|
||||
QTAILQ_INIT(&usbif->req_free_q);
|
||||
QSIMPLEQ_INIT(&usbif->hotplug_q);
|
||||
- usbif->bh = qemu_bh_new(usbback_bh, usbif);
|
||||
+ usbif->bh = qemu_bh_new_guarded(usbback_bh, usbif,
|
||||
+ &DEVICE(xendev)->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static int usbback_free(struct XenLegacyDevice *xendev)
|
||||
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
|
||||
index 43092aa634..5186e831dd 100644
|
||||
--- a/hw/virtio/virtio-balloon.c
|
||||
+++ b/hw/virtio/virtio-balloon.c
|
||||
@@ -909,8 +909,9 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
|
||||
precopy_add_notifier(&s->free_page_hint_notify);
|
||||
|
||||
object_ref(OBJECT(s->iothread));
|
||||
- s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
|
||||
- virtio_ballloon_get_free_page_hints, s);
|
||||
+ s->free_page_bh = aio_bh_new_guarded(iothread_get_aio_context(s->iothread),
|
||||
+ virtio_ballloon_get_free_page_hints, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) {
|
||||
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
|
||||
index 802e1b9659..2fe804510f 100644
|
||||
--- a/hw/virtio/virtio-crypto.c
|
||||
+++ b/hw/virtio/virtio-crypto.c
|
||||
@@ -1074,7 +1074,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp)
|
||||
vcrypto->vqs[i].dataq =
|
||||
virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh);
|
||||
vcrypto->vqs[i].dataq_bh =
|
||||
- qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]);
|
||||
+ qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i],
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
vcrypto->vqs[i].vcrypto = vcrypto;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,141 +0,0 @@
|
||||
From 8075a9e05699ef0c4e078017eefc20db3186328f Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Mon, 29 May 2023 14:21:08 -0400
|
||||
Subject: [PATCH 17/21] hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI
|
||||
controller (CVE-2023-0330)
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [13/13] 0b6fa742075ef2db3a354ee672dccca3747051cc (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit b987718bbb1d0eabf95499b976212dd5f0120d75
|
||||
Author: Thomas Huth <thuth@redhat.com>
|
||||
Date: Mon May 22 11:10:11 2023 +0200
|
||||
|
||||
hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI controller (CVE-2023-0330)
|
||||
|
||||
We cannot use the generic reentrancy guard in the LSI code, so
|
||||
we have to manually prevent endless reentrancy here. The problematic
|
||||
lsi_execute_script() function has already a way to detect whether
|
||||
too many instructions have been executed - we just have to slightly
|
||||
change the logic here that it also takes into account if the function
|
||||
has been called too often in a reentrant way.
|
||||
|
||||
The code in fuzz-lsi53c895a-test.c has been taken from an earlier
|
||||
patch by Mauro Matteo Cascella.
|
||||
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1563
|
||||
Message-Id: <20230522091011.1082574-1-thuth@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/scsi/lsi53c895a.c | 23 +++++++++++++++------
|
||||
tests/qtest/fuzz-lsi53c895a-test.c | 33 ++++++++++++++++++++++++++++++
|
||||
2 files changed, 50 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
|
||||
index 048436352b..f7d45b0b20 100644
|
||||
--- a/hw/scsi/lsi53c895a.c
|
||||
+++ b/hw/scsi/lsi53c895a.c
|
||||
@@ -1134,15 +1134,24 @@ static void lsi_execute_script(LSIState *s)
|
||||
uint32_t addr, addr_high;
|
||||
int opcode;
|
||||
int insn_processed = 0;
|
||||
+ static int reentrancy_level;
|
||||
+
|
||||
+ reentrancy_level++;
|
||||
|
||||
s->istat1 |= LSI_ISTAT1_SRUN;
|
||||
again:
|
||||
- if (++insn_processed > LSI_MAX_INSN) {
|
||||
- /* Some windows drivers make the device spin waiting for a memory
|
||||
- location to change. If we have been executed a lot of code then
|
||||
- assume this is the case and force an unexpected device disconnect.
|
||||
- This is apparently sufficient to beat the drivers into submission.
|
||||
- */
|
||||
+ /*
|
||||
+ * Some windows drivers make the device spin waiting for a memory location
|
||||
+ * to change. If we have executed more than LSI_MAX_INSN instructions then
|
||||
+ * assume this is the case and force an unexpected device disconnect. This
|
||||
+ * is apparently sufficient to beat the drivers into submission.
|
||||
+ *
|
||||
+ * Another issue (CVE-2023-0330) can occur if the script is programmed to
|
||||
+ * trigger itself again and again. Avoid this problem by stopping after
|
||||
+ * being called multiple times in a reentrant way (8 is an arbitrary value
|
||||
+ * which should be enough for all valid use cases).
|
||||
+ */
|
||||
+ if (++insn_processed > LSI_MAX_INSN || reentrancy_level > 8) {
|
||||
if (!(s->sien0 & LSI_SIST0_UDC)) {
|
||||
qemu_log_mask(LOG_GUEST_ERROR,
|
||||
"lsi_scsi: inf. loop with UDC masked");
|
||||
@@ -1596,6 +1605,8 @@ again:
|
||||
}
|
||||
}
|
||||
trace_lsi_execute_script_stop();
|
||||
+
|
||||
+ reentrancy_level--;
|
||||
}
|
||||
|
||||
static uint8_t lsi_reg_readb(LSIState *s, int offset)
|
||||
diff --git a/tests/qtest/fuzz-lsi53c895a-test.c b/tests/qtest/fuzz-lsi53c895a-test.c
|
||||
index 2012bd54b7..1b55928b9f 100644
|
||||
--- a/tests/qtest/fuzz-lsi53c895a-test.c
|
||||
+++ b/tests/qtest/fuzz-lsi53c895a-test.c
|
||||
@@ -8,6 +8,36 @@
|
||||
#include "qemu/osdep.h"
|
||||
#include "libqtest.h"
|
||||
|
||||
+/*
|
||||
+ * This used to trigger a DMA reentrancy issue
|
||||
+ * leading to memory corruption bugs like stack
|
||||
+ * overflow or use-after-free
|
||||
+ * https://gitlab.com/qemu-project/qemu/-/issues/1563
|
||||
+ */
|
||||
+static void test_lsi_dma_reentrancy(void)
|
||||
+{
|
||||
+ QTestState *s;
|
||||
+
|
||||
+ s = qtest_init("-M q35 -m 512M -nodefaults "
|
||||
+ "-blockdev driver=null-co,node-name=null0 "
|
||||
+ "-device lsi53c810 -device scsi-cd,drive=null0");
|
||||
+
|
||||
+ qtest_outl(s, 0xcf8, 0x80000804); /* PCI Command Register */
|
||||
+ qtest_outw(s, 0xcfc, 0x7); /* Enables accesses */
|
||||
+ qtest_outl(s, 0xcf8, 0x80000814); /* Memory Bar 1 */
|
||||
+ qtest_outl(s, 0xcfc, 0xff100000); /* Set MMIO Address*/
|
||||
+ qtest_outl(s, 0xcf8, 0x80000818); /* Memory Bar 2 */
|
||||
+ qtest_outl(s, 0xcfc, 0xff000000); /* Set RAM Address*/
|
||||
+ qtest_writel(s, 0xff000000, 0xc0000024);
|
||||
+ qtest_writel(s, 0xff000114, 0x00000080);
|
||||
+ qtest_writel(s, 0xff00012c, 0xff000000);
|
||||
+ qtest_writel(s, 0xff000004, 0xff000114);
|
||||
+ qtest_writel(s, 0xff000008, 0xff100014);
|
||||
+ qtest_writel(s, 0xff10002f, 0x000000ff);
|
||||
+
|
||||
+ qtest_quit(s);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* This used to trigger a UAF in lsi_do_msgout()
|
||||
* https://gitlab.com/qemu-project/qemu/-/issues/972
|
||||
@@ -124,5 +154,8 @@ int main(int argc, char **argv)
|
||||
qtest_add_func("fuzz/lsi53c895a/lsi_do_msgout_cancel_req",
|
||||
test_lsi_do_msgout_cancel_req);
|
||||
|
||||
+ qtest_add_func("fuzz/lsi53c895a/lsi_dma_reentrancy",
|
||||
+ test_lsi_dma_reentrancy);
|
||||
+
|
||||
return g_test_run();
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,73 @@
|
||||
From 8f27893a37e55a31180bb66cd9eae7199911881b Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Volker=20R=C3=BCmelin?= <vr_qemu@t-online.de>
|
||||
Date: Fri, 29 Dec 2023 21:38:54 +0100
|
||||
Subject: [PATCH 060/101] hw/vfio: fix iteration over global VFIODevice list
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [59/67] f926e1233c8c5ad418e8794b1a103371c9dc5eb0 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Commit 3d779abafe ("vfio/common: Introduce a global VFIODevice list")
|
||||
introduced a global VFIODevice list, but forgot to update the list
|
||||
element field name when iterating over the new list. Change the code
|
||||
to use the correct list element field.
|
||||
|
||||
Fixes: 3d779abafe ("vfio/common: Introduce a global VFIODevice list")
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2061
|
||||
Signed-off-by: Volker Rümelin <vr_qemu@t-online.de>
|
||||
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
(cherry picked from commit 9353b6da430f90e47f352dbf6dc31120c8914da6)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/vfio/common.c | 8 ++++----
|
||||
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||
index 0d4d8b8416..0b3352f2a9 100644
|
||||
--- a/hw/vfio/common.c
|
||||
+++ b/hw/vfio/common.c
|
||||
@@ -73,7 +73,7 @@ bool vfio_mig_active(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
|
||||
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
|
||||
if (vbasedev->migration_blocker) {
|
||||
return false;
|
||||
}
|
||||
@@ -94,7 +94,7 @@ static bool vfio_multiple_devices_migration_is_supported(void)
|
||||
unsigned int device_num = 0;
|
||||
bool all_support_p2p = true;
|
||||
|
||||
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
|
||||
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
|
||||
if (vbasedev->migration) {
|
||||
device_num++;
|
||||
|
||||
@@ -1366,13 +1366,13 @@ void vfio_reset_handler(void *opaque)
|
||||
{
|
||||
VFIODevice *vbasedev;
|
||||
|
||||
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
|
||||
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
|
||||
if (vbasedev->dev->realized) {
|
||||
vbasedev->ops->vfio_compute_needs_reset(vbasedev);
|
||||
}
|
||||
}
|
||||
|
||||
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
|
||||
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
|
||||
if (vbasedev->dev->realized && vbasedev->needs_reset) {
|
||||
vbasedev->ops->vfio_hot_reset_multi(vbasedev);
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,76 +0,0 @@
|
||||
From fcd6219a95851d17fd8bde69d87e78c6533be990 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Wed, 12 Jul 2023 17:46:57 +0200
|
||||
Subject: [PATCH 24/37] hw/vfio/pci-quirks: Sanitize capability pointer
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 179: vfio: live migration support
|
||||
RH-Bugzilla: 2192818
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [22/28] cb080409c1912f4365f8e31cd23c914b48f91575 (clegoate/qemu-kvm-c9s)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/2192818
|
||||
|
||||
commit 0ddcb39c9357
|
||||
Author: Alex Williamson <alex.williamson@redhat.com>
|
||||
Date: Fri Jun 30 16:36:08 2023 -0600
|
||||
|
||||
hw/vfio/pci-quirks: Sanitize capability pointer
|
||||
|
||||
Coverity reports a tained scalar when traversing the capabilities
|
||||
chain (CID 1516589). In practice I've never seen a device with a
|
||||
chain so broken as to cause an issue, but it's also pretty easy to
|
||||
sanitize.
|
||||
|
||||
Fixes: f6b30c1984f7 ("hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques")
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
hw/vfio/pci-quirks.c | 10 ++++++++--
|
||||
1 file changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
|
||||
index 0ed2fcd531..f4ff836805 100644
|
||||
--- a/hw/vfio/pci-quirks.c
|
||||
+++ b/hw/vfio/pci-quirks.c
|
||||
@@ -1530,6 +1530,12 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = {
|
||||
.set = set_nv_gpudirect_clique_id,
|
||||
};
|
||||
|
||||
+static bool is_valid_std_cap_offset(uint8_t pos)
|
||||
+{
|
||||
+ return (pos >= PCI_STD_HEADER_SIZEOF &&
|
||||
+ pos <= (PCI_CFG_SPACE_SIZE - PCI_CAP_SIZEOF));
|
||||
+}
|
||||
+
|
||||
static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
{
|
||||
PCIDevice *pdev = &vdev->pdev;
|
||||
@@ -1563,7 +1569,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
*/
|
||||
ret = pread(vdev->vbasedev.fd, &tmp, 1,
|
||||
vdev->config_offset + PCI_CAPABILITY_LIST);
|
||||
- if (ret != 1 || !tmp) {
|
||||
+ if (ret != 1 || !is_valid_std_cap_offset(tmp)) {
|
||||
error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list");
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -1575,7 +1581,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
d4_conflict = true;
|
||||
}
|
||||
tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT];
|
||||
- } while (tmp);
|
||||
+ } while (is_valid_std_cap_offset(tmp));
|
||||
|
||||
if (!c8_conflict) {
|
||||
pos = 0xC8;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,110 +0,0 @@
|
||||
From dd38230a0a375fb8427fa106ff79562e56c51b6c Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Wed, 12 Jul 2023 17:46:57 +0200
|
||||
Subject: [PATCH 18/37] hw/vfio/pci-quirks: Support alternate offset for
|
||||
GPUDirect Cliques
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 179: vfio: live migration support
|
||||
RH-Bugzilla: 2192818
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [16/28] 9befb7c9adaeb58e9d0b49686cf54b751c742832 (clegoate/qemu-kvm-c9s)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/2192818
|
||||
|
||||
commit f6b30c1984f7
|
||||
Author: Alex Williamson <alex.williamson@redhat.com>
|
||||
Date: Thu Jun 8 12:05:07 2023 -0600
|
||||
|
||||
hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques
|
||||
|
||||
NVIDIA Turing and newer GPUs implement the MSI-X capability at the offset
|
||||
previously reserved for use by hypervisors to implement the GPUDirect
|
||||
Cliques capability. A revised specification provides an alternate
|
||||
location. Add a config space walk to the quirk to check for conflicts,
|
||||
allowing us to fall back to the new location or generate an error at the
|
||||
quirk setup rather than when the real conflicting capability is added
|
||||
should there be no available location.
|
||||
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
hw/vfio/pci-quirks.c | 41 ++++++++++++++++++++++++++++++++++++++++-
|
||||
1 file changed, 40 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
|
||||
index f0147a050a..0ed2fcd531 100644
|
||||
--- a/hw/vfio/pci-quirks.c
|
||||
+++ b/hw/vfio/pci-quirks.c
|
||||
@@ -1490,6 +1490,9 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev)
|
||||
* +---------------------------------+---------------------------------+
|
||||
*
|
||||
* https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf
|
||||
+ *
|
||||
+ * Specification for Turning and later GPU architectures:
|
||||
+ * https://lists.gnu.org/archive/html/qemu-devel/2023-06/pdf142OR4O4c2.pdf
|
||||
*/
|
||||
static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v,
|
||||
const char *name, void *opaque,
|
||||
@@ -1530,7 +1533,9 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = {
|
||||
static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
{
|
||||
PCIDevice *pdev = &vdev->pdev;
|
||||
- int ret, pos = 0xC8;
|
||||
+ int ret, pos;
|
||||
+ bool c8_conflict = false, d4_conflict = false;
|
||||
+ uint8_t tmp;
|
||||
|
||||
if (vdev->nv_gpudirect_clique == 0xFF) {
|
||||
return 0;
|
||||
@@ -1547,6 +1552,40 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * Per the updated specification above, it's recommended to use offset
|
||||
+ * D4h for Turing and later GPU architectures due to a conflict of the
|
||||
+ * MSI-X capability at C8h. We don't know how to determine the GPU
|
||||
+ * architecture, instead we walk the capability chain to mark conflicts
|
||||
+ * and choose one or error based on the result.
|
||||
+ *
|
||||
+ * NB. Cap list head in pdev->config is already cleared, read from device.
|
||||
+ */
|
||||
+ ret = pread(vdev->vbasedev.fd, &tmp, 1,
|
||||
+ vdev->config_offset + PCI_CAPABILITY_LIST);
|
||||
+ if (ret != 1 || !tmp) {
|
||||
+ error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ do {
|
||||
+ if (tmp == 0xC8) {
|
||||
+ c8_conflict = true;
|
||||
+ } else if (tmp == 0xD4) {
|
||||
+ d4_conflict = true;
|
||||
+ }
|
||||
+ tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT];
|
||||
+ } while (tmp);
|
||||
+
|
||||
+ if (!c8_conflict) {
|
||||
+ pos = 0xC8;
|
||||
+ } else if (!d4_conflict) {
|
||||
+ pos = 0xD4;
|
||||
+ } else {
|
||||
+ error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid config space");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp);
|
||||
if (ret < 0) {
|
||||
error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: ");
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,62 +0,0 @@
|
||||
From 0a731ac1191182546e80af5f39d178a5a2f3688f Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Mon, 17 Jul 2023 18:21:26 +0200
|
||||
Subject: [PATCH 07/14] hw/virtio-iommu: Fix potential OOB access in
|
||||
virtio_iommu_handle_command()
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes
|
||||
RH-Bugzilla: 2229133
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
RH-Commit: [1/3] ecdb1e1aa6b93761dc87ea79bc0a1093ad649a74 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133
|
||||
|
||||
In the virtio_iommu_handle_command() when a PROBE request is handled,
|
||||
output_size takes a value greater than the tail size and on a subsequent
|
||||
iteration we can get a stack out-of-band access. Initialize the
|
||||
output_size on each iteration.
|
||||
|
||||
The issue was found with ASAN. Credits to:
|
||||
Yiming Tao(Zhejiang University)
|
||||
Gaoning Pan(Zhejiang University)
|
||||
|
||||
Fixes: 1733eebb9e7 ("virtio-iommu: Implement RESV_MEM probe request")
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reported-by: Mauro Matteo Cascella <mcascell@redhat.com>
|
||||
Cc: qemu-stable@nongnu.org
|
||||
|
||||
Message-Id: <20230717162126.11693-1-eric.auger@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit cf2f89edf36a59183166ae8721a8d7ab5cd286bd)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/virtio/virtio-iommu.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
|
||||
index 421e2a944f..17ce630200 100644
|
||||
--- a/hw/virtio/virtio-iommu.c
|
||||
+++ b/hw/virtio/virtio-iommu.c
|
||||
@@ -728,13 +728,15 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
|
||||
VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
|
||||
struct virtio_iommu_req_head head;
|
||||
struct virtio_iommu_req_tail tail = {};
|
||||
- size_t output_size = sizeof(tail), sz;
|
||||
VirtQueueElement *elem;
|
||||
unsigned int iov_cnt;
|
||||
struct iovec *iov;
|
||||
void *buf = NULL;
|
||||
+ size_t sz;
|
||||
|
||||
for (;;) {
|
||||
+ size_t output_size = sizeof(tail);
|
||||
+
|
||||
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
|
||||
if (!elem) {
|
||||
return;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,52 +0,0 @@
|
||||
From f9d982fae156aa9db0506e1e098c1e8a7f7eec94 Mon Sep 17 00:00:00 2001
|
||||
From: Bandan Das <bsd@redhat.com>
|
||||
Date: Thu, 3 Aug 2023 14:29:15 -0400
|
||||
Subject: [PATCH 13/14] i386/cpu: Update how the EBX register of CPUID
|
||||
0x8000001F is set
|
||||
|
||||
RH-Author: Bandan Das <None>
|
||||
RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter
|
||||
RH-Bugzilla: 2214839
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [4/4] efc368b2c844fd4fbc3c755a5e2da288329e7a2c (bdas1/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839
|
||||
|
||||
commit fb6bbafc0f19385fb257ee073ed13dcaf613f2f8
|
||||
Author: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Date: Fri Sep 30 10:14:30 2022 -0500
|
||||
|
||||
i386/cpu: Update how the EBX register of CPUID 0x8000001F is set
|
||||
|
||||
Update the setting of CPUID 0x8000001F EBX to clearly document the ranges
|
||||
associated with fields being set.
|
||||
|
||||
Fixes: 6cb8f2a663 ("cpu/i386: populate CPUID 0x8000_001F when SEV is active")
|
||||
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Message-Id: <5822fd7d02b575121380e1f493a8f6d9eba2b11a.1664550870.git.thomas.lendacky@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Bandan Das <bsd@redhat.com>
|
||||
---
|
||||
target/i386/cpu.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
|
||||
index 839706b430..4ac3046313 100644
|
||||
--- a/target/i386/cpu.c
|
||||
+++ b/target/i386/cpu.c
|
||||
@@ -6008,8 +6008,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
|
||||
if (sev_enabled()) {
|
||||
*eax = 0x2;
|
||||
*eax |= sev_es_enabled() ? 0x8 : 0;
|
||||
- *ebx = sev_get_cbit_position();
|
||||
- *ebx |= sev_get_reduced_phys_bits() << 6;
|
||||
+ *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */
|
||||
+ *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */
|
||||
}
|
||||
break;
|
||||
default:
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,77 +0,0 @@
|
||||
From 5c0d254762caaffd574bd95dbfc1df416e6e2509 Mon Sep 17 00:00:00 2001
|
||||
From: Bandan Das <bsd@redhat.com>
|
||||
Date: Thu, 3 Aug 2023 14:22:55 -0400
|
||||
Subject: [PATCH 12/14] i386/sev: Update checks and information related to
|
||||
reduced-phys-bits
|
||||
|
||||
RH-Author: Bandan Das <None>
|
||||
RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter
|
||||
RH-Bugzilla: 2214839
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [3/4] 7c5e7ea9f6cd39e84e5b60417c849430296399fd (bdas1/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839
|
||||
|
||||
commit 8168fed9f84e3128f7628969ae78af49433d5ce7
|
||||
Author: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Date: Fri Sep 30 10:14:29 2022 -0500
|
||||
|
||||
i386/sev: Update checks and information related to reduced-phys-bits
|
||||
|
||||
The value of the reduced-phys-bits parameter is propogated to the CPUID
|
||||
information exposed to the guest. Update the current validation check to
|
||||
account for the size of the CPUID field (6-bits), ensuring the value is
|
||||
in the range of 1 to 63.
|
||||
|
||||
Maintain backward compatibility, to an extent, by allowing a value greater
|
||||
than 1 (so that the previously documented value of 5 still works), but not
|
||||
allowing anything over 63.
|
||||
|
||||
Fixes: d8575c6c02 ("sev/i386: add command to initialize the memory encryption context")
|
||||
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Message-Id: <cca5341a95ac73f904e6300f10b04f9c62e4e8ff.1664550870.git.thomas.lendacky@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Bandan Das <bsd@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 17 ++++++++++++++---
|
||||
1 file changed, 14 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 859e06f6ad..fe2144c038 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -932,15 +932,26 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL);
|
||||
host_cbitpos = ebx & 0x3f;
|
||||
|
||||
+ /*
|
||||
+ * The cbitpos value will be placed in bit positions 5:0 of the EBX
|
||||
+ * register of CPUID 0x8000001F. No need to verify the range as the
|
||||
+ * comparison against the host value accomplishes that.
|
||||
+ */
|
||||
if (host_cbitpos != sev->cbitpos) {
|
||||
error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'",
|
||||
__func__, host_cbitpos, sev->cbitpos);
|
||||
goto err;
|
||||
}
|
||||
|
||||
- if (sev->reduced_phys_bits < 1) {
|
||||
- error_setg(errp, "%s: reduced_phys_bits check failed, it should be >=1,"
|
||||
- " requested '%d'", __func__, sev->reduced_phys_bits);
|
||||
+ /*
|
||||
+ * The reduced-phys-bits value will be placed in bit positions 11:6 of
|
||||
+ * the EBX register of CPUID 0x8000001F, so verify the supplied value
|
||||
+ * is in the range of 1 to 63.
|
||||
+ */
|
||||
+ if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) {
|
||||
+ error_setg(errp, "%s: reduced_phys_bits check failed,"
|
||||
+ " it should be in the range of 1 to 63, requested '%d'",
|
||||
+ __func__, sev->reduced_phys_bits);
|
||||
goto err;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,54 @@
|
||||
From 51b8f29cddb73eb02f91af5f52a205fdd3af6583 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Wed, 17 Jan 2024 21:08:59 +0100
|
||||
Subject: [PATCH 099/101] include/ui/rect.h: fix qemu_rect_init()
|
||||
mis-assignment
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Thomas Huth <thuth@redhat.com>
|
||||
RH-MergeRequest: 216: Fix regression in QEMU's virtio-gpu VNC sessions
|
||||
RH-Jira: RHEL-21570
|
||||
RH-Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Commit: [1/1] a9d487be04e2c1847b80c479b5cc790af81e3428 (thuth/qemu-kvm-cs9)
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-21570
|
||||
|
||||
commit 9d5b42beb6978dc6219d5dc029c9d453c6b8d503
|
||||
Author: Elen Avan <elen.avan@bk.ru>
|
||||
Date: Fri Dec 22 22:17:21 2023 +0300
|
||||
|
||||
include/ui/rect.h: fix qemu_rect_init() mis-assignment
|
||||
|
||||
Signed-off-by: Elen Avan <elen.avan@bk.ru>
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2051
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2050
|
||||
Fixes: a200d53b1fde "virtio-gpu: replace PIXMAN for region/rect test"
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Reviewed-by: Michael Tokarev <mjt@tls.msk.ru>
|
||||
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
|
||||
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
include/ui/rect.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/ui/rect.h b/include/ui/rect.h
|
||||
index 94898f92d0..68f05d78a8 100644
|
||||
--- a/include/ui/rect.h
|
||||
+++ b/include/ui/rect.h
|
||||
@@ -19,7 +19,7 @@ static inline void qemu_rect_init(QemuRect *rect,
|
||||
uint16_t width, uint16_t height)
|
||||
{
|
||||
rect->x = x;
|
||||
- rect->y = x;
|
||||
+ rect->y = y;
|
||||
rect->width = width;
|
||||
rect->height = height;
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,102 +0,0 @@
|
||||
From 0306736e3afbe7be99d01e4d70d1a5f2e38c32c2 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 15 Aug 2023 00:08:55 +0000
|
||||
Subject: [PATCH] io: remove io watch if TLS channel is closed during handshake
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 315: io: remove io watch if TLS channel is closed during handshake
|
||||
RH-Bugzilla: 2216504
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/1] 5f23602074b2edde0d445d529f07434bd156202d (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2216504
|
||||
CVE: CVE-2023-3354
|
||||
Upstream: Merged
|
||||
|
||||
commit 10be627d2b5ec2d6b3dce045144aa739eef678b4
|
||||
Author: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Date: Tue Jun 20 09:45:34 2023 +0100
|
||||
|
||||
io: remove io watch if TLS channel is closed during handshake
|
||||
|
||||
The TLS handshake make take some time to complete, during which time an
|
||||
I/O watch might be registered with the main loop. If the owner of the
|
||||
I/O channel invokes qio_channel_close() while the handshake is waiting
|
||||
to continue the I/O watch must be removed. Failing to remove it will
|
||||
later trigger the completion callback which the owner is not expecting
|
||||
to receive. In the case of the VNC server, this results in a SEGV as
|
||||
vnc_disconnect_start() tries to shutdown a client connection that is
|
||||
already gone / NULL.
|
||||
|
||||
CVE-2023-3354
|
||||
Reported-by: jiangyegen <jiangyegen@huawei.com>
|
||||
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
include/io/channel-tls.h | 1 +
|
||||
io/channel-tls.c | 18 ++++++++++++------
|
||||
2 files changed, 13 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/include/io/channel-tls.h b/include/io/channel-tls.h
|
||||
index 5672479e9e..26c67f17e2 100644
|
||||
--- a/include/io/channel-tls.h
|
||||
+++ b/include/io/channel-tls.h
|
||||
@@ -48,6 +48,7 @@ struct QIOChannelTLS {
|
||||
QIOChannel *master;
|
||||
QCryptoTLSSession *session;
|
||||
QIOChannelShutdown shutdown;
|
||||
+ guint hs_ioc_tag;
|
||||
};
|
||||
|
||||
/**
|
||||
diff --git a/io/channel-tls.c b/io/channel-tls.c
|
||||
index 9805dd0a3f..847d5297c3 100644
|
||||
--- a/io/channel-tls.c
|
||||
+++ b/io/channel-tls.c
|
||||
@@ -198,12 +198,13 @@ static void qio_channel_tls_handshake_task(QIOChannelTLS *ioc,
|
||||
}
|
||||
|
||||
trace_qio_channel_tls_handshake_pending(ioc, status);
|
||||
- qio_channel_add_watch_full(ioc->master,
|
||||
- condition,
|
||||
- qio_channel_tls_handshake_io,
|
||||
- data,
|
||||
- NULL,
|
||||
- context);
|
||||
+ ioc->hs_ioc_tag =
|
||||
+ qio_channel_add_watch_full(ioc->master,
|
||||
+ condition,
|
||||
+ qio_channel_tls_handshake_io,
|
||||
+ data,
|
||||
+ NULL,
|
||||
+ context);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -218,6 +219,7 @@ static gboolean qio_channel_tls_handshake_io(QIOChannel *ioc,
|
||||
QIOChannelTLS *tioc = QIO_CHANNEL_TLS(
|
||||
qio_task_get_source(task));
|
||||
|
||||
+ tioc->hs_ioc_tag = 0;
|
||||
g_free(data);
|
||||
qio_channel_tls_handshake_task(tioc, task, context);
|
||||
|
||||
@@ -378,6 +380,10 @@ static int qio_channel_tls_close(QIOChannel *ioc,
|
||||
{
|
||||
QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
|
||||
|
||||
+ if (tioc->hs_ioc_tag) {
|
||||
+ g_clear_handle_id(&tioc->hs_ioc_tag, g_source_remove);
|
||||
+ }
|
||||
+
|
||||
return qio_channel_close(tioc->master, errp);
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,133 @@
|
||||
From 65d58819ff7b012e43b5f1da1356b559d3f5a962 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 14 Mar 2024 17:58:25 +0100
|
||||
Subject: [PATCH 3/4] iotests: Add test for reset/AioContext switches with NBD
|
||||
exports
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 231: Fix deadlock and crash during storage migration
|
||||
RH-Jira: RHEL-28125
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [3/3] 7266acdf85271d157497bfe452aa6eeb58fbe7c6 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
This replicates the scenario in which the bug was reported.
|
||||
Unfortunately this relies on actually executing a guest (so that the
|
||||
firmware initialises the virtio-blk device and moves it to its
|
||||
configured iothread), so this can't make use of the qtest accelerator
|
||||
like most other test cases. I tried to find a different easy way to
|
||||
trigger the bug, but couldn't find one.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-ID: <20240314165825.40261-3-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit e8fce34eccf68a32f4ecf2c6f121ff2ac383d6bf)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/tests/iothreads-nbd-export | 66 +++++++++++++++++++
|
||||
.../tests/iothreads-nbd-export.out | 19 ++++++
|
||||
2 files changed, 85 insertions(+)
|
||||
create mode 100755 tests/qemu-iotests/tests/iothreads-nbd-export
|
||||
create mode 100644 tests/qemu-iotests/tests/iothreads-nbd-export.out
|
||||
|
||||
diff --git a/tests/qemu-iotests/tests/iothreads-nbd-export b/tests/qemu-iotests/tests/iothreads-nbd-export
|
||||
new file mode 100755
|
||||
index 0000000000..037260729c
|
||||
--- /dev/null
|
||||
+++ b/tests/qemu-iotests/tests/iothreads-nbd-export
|
||||
@@ -0,0 +1,66 @@
|
||||
+#!/usr/bin/env python3
|
||||
+# group: rw quick
|
||||
+#
|
||||
+# Copyright (C) 2024 Red Hat, Inc.
|
||||
+#
|
||||
+# This program is free software; you can redistribute it and/or modify
|
||||
+# it under the terms of the GNU General Public License as published by
|
||||
+# the Free Software Foundation; either version 2 of the License, or
|
||||
+# (at your option) any later version.
|
||||
+#
|
||||
+# This program is distributed in the hope that it will be useful,
|
||||
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
+# GNU General Public License for more details.
|
||||
+#
|
||||
+# You should have received a copy of the GNU General Public License
|
||||
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
+#
|
||||
+# Creator/Owner: Kevin Wolf <kwolf@redhat.com>
|
||||
+
|
||||
+import time
|
||||
+import qemu
|
||||
+import iotests
|
||||
+
|
||||
+iotests.script_initialize(supported_fmts=['qcow2'],
|
||||
+ supported_platforms=['linux'])
|
||||
+
|
||||
+with iotests.FilePath('disk1.img') as path, \
|
||||
+ iotests.FilePath('nbd.sock', base_dir=iotests.sock_dir) as nbd_sock, \
|
||||
+ qemu.machine.QEMUMachine(iotests.qemu_prog) as vm:
|
||||
+
|
||||
+ img_size = '10M'
|
||||
+
|
||||
+ iotests.log('Preparing disk...')
|
||||
+ iotests.qemu_img_create('-f', iotests.imgfmt, path, img_size)
|
||||
+ vm.add_args('-blockdev', f'file,node-name=disk-file,filename={path}')
|
||||
+ vm.add_args('-blockdev', 'qcow2,node-name=disk,file=disk-file')
|
||||
+ vm.add_args('-object', 'iothread,id=iothread0')
|
||||
+ vm.add_args('-device',
|
||||
+ 'virtio-blk,drive=disk,iothread=iothread0,share-rw=on')
|
||||
+
|
||||
+ iotests.log('Launching VM...')
|
||||
+ vm.add_args('-accel', 'kvm', '-accel', 'tcg')
|
||||
+ #vm.add_args('-accel', 'qtest')
|
||||
+ vm.launch()
|
||||
+
|
||||
+ iotests.log('Exporting to NBD...')
|
||||
+ iotests.log(vm.qmp('nbd-server-start',
|
||||
+ addr={'type': 'unix', 'data': {'path': nbd_sock}}))
|
||||
+ iotests.log(vm.qmp('block-export-add', type='nbd', id='exp0',
|
||||
+ node_name='disk', writable=True))
|
||||
+
|
||||
+ iotests.log('Connecting qemu-img...')
|
||||
+ qemu_io = iotests.QemuIoInteractive('-f', 'raw',
|
||||
+ f'nbd+unix:///disk?socket={nbd_sock}')
|
||||
+
|
||||
+ iotests.log('Moving the NBD export to a different iothread...')
|
||||
+ for i in range(0, 10):
|
||||
+ iotests.log(vm.qmp('system_reset'))
|
||||
+ time.sleep(0.1)
|
||||
+
|
||||
+ iotests.log('Checking that it is still alive...')
|
||||
+ iotests.log(vm.qmp('query-status'))
|
||||
+
|
||||
+ qemu_io.close()
|
||||
+ vm.shutdown()
|
||||
diff --git a/tests/qemu-iotests/tests/iothreads-nbd-export.out b/tests/qemu-iotests/tests/iothreads-nbd-export.out
|
||||
new file mode 100644
|
||||
index 0000000000..bc514e35e5
|
||||
--- /dev/null
|
||||
+++ b/tests/qemu-iotests/tests/iothreads-nbd-export.out
|
||||
@@ -0,0 +1,19 @@
|
||||
+Preparing disk...
|
||||
+Launching VM...
|
||||
+Exporting to NBD...
|
||||
+{"return": {}}
|
||||
+{"return": {}}
|
||||
+Connecting qemu-img...
|
||||
+Moving the NBD export to a different iothread...
|
||||
+{"return": {}}
|
||||
+{"return": {}}
|
||||
+{"return": {}}
|
||||
+{"return": {}}
|
||||
+{"return": {}}
|
||||
+{"return": {}}
|
||||
+{"return": {}}
|
||||
+{"return": {}}
|
||||
+{"return": {}}
|
||||
+{"return": {}}
|
||||
+Checking that it is still alive...
|
||||
+{"return": {"running": true, "status": "running"}}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,144 +0,0 @@
|
||||
From 399bfc04fb8352af6d2f4c984e68c334d2043368 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Wed, 17 May 2023 17:28:34 +0200
|
||||
Subject: [PATCH 04/21] iotests: Test commit with iothreads and ongoing I/O
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 166: block/graph-lock: Disable locking for now
|
||||
RH-Bugzilla: 2186725
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [4/4] 1e42fde5951ae12bddc4eea2320f066f7079878f (kmwolf/centos-qemu-kvm)
|
||||
|
||||
This tests exercises graph locking, draining, and graph modifications
|
||||
with AioContext switches a lot. Amongst others, it serves as a
|
||||
regression test for bdrv_graph_wrlock() deadlocking because it is called
|
||||
with a locked AioContext and for AioContext handling in the NBD server.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20230517152834.277483-4-kwolf@redhat.com>
|
||||
Tested-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 95fdd8db61848d31fde1d9b32da7f3f76babfa25)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/iotests.py | 4 ++
|
||||
.../qemu-iotests/tests/graph-changes-while-io | 56 +++++++++++++++++--
|
||||
.../tests/graph-changes-while-io.out | 4 +-
|
||||
3 files changed, 58 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
|
||||
index 3e82c634cf..7073579a7d 100644
|
||||
--- a/tests/qemu-iotests/iotests.py
|
||||
+++ b/tests/qemu-iotests/iotests.py
|
||||
@@ -462,6 +462,10 @@ def qmp(self, cmd: str, args: Optional[Dict[str, object]] = None) \
|
||||
assert self._qmp is not None
|
||||
return self._qmp.cmd(cmd, args)
|
||||
|
||||
+ def get_qmp(self) -> QEMUMonitorProtocol:
|
||||
+ assert self._qmp is not None
|
||||
+ return self._qmp
|
||||
+
|
||||
def stop(self, kill_signal=15):
|
||||
self._p.send_signal(kill_signal)
|
||||
self._p.wait()
|
||||
diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io
|
||||
index 7664f33689..750e7d4d38 100755
|
||||
--- a/tests/qemu-iotests/tests/graph-changes-while-io
|
||||
+++ b/tests/qemu-iotests/tests/graph-changes-while-io
|
||||
@@ -22,19 +22,19 @@
|
||||
import os
|
||||
from threading import Thread
|
||||
import iotests
|
||||
-from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \
|
||||
- QemuStorageDaemon
|
||||
+from iotests import imgfmt, qemu_img, qemu_img_create, qemu_io, \
|
||||
+ QMPTestCase, QemuStorageDaemon
|
||||
|
||||
|
||||
top = os.path.join(iotests.test_dir, 'top.img')
|
||||
nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock')
|
||||
|
||||
|
||||
-def do_qemu_img_bench() -> None:
|
||||
+def do_qemu_img_bench(count: int = 2000000) -> None:
|
||||
"""
|
||||
Do some I/O requests on `nbd_sock`.
|
||||
"""
|
||||
- qemu_img('bench', '-f', 'raw', '-c', '2000000',
|
||||
+ qemu_img('bench', '-f', 'raw', '-c', str(count),
|
||||
f'nbd+unix:///node0?socket={nbd_sock}')
|
||||
|
||||
|
||||
@@ -84,6 +84,54 @@ class TestGraphChangesWhileIO(QMPTestCase):
|
||||
|
||||
bench_thr.join()
|
||||
|
||||
+ def test_commit_while_io(self) -> None:
|
||||
+ # Run qemu-img bench in the background
|
||||
+ bench_thr = Thread(target=do_qemu_img_bench, args=(200000, ))
|
||||
+ bench_thr.start()
|
||||
+
|
||||
+ qemu_io('-c', 'write 0 64k', top)
|
||||
+ qemu_io('-c', 'write 128k 64k', top)
|
||||
+
|
||||
+ result = self.qsd.qmp('blockdev-add', {
|
||||
+ 'driver': imgfmt,
|
||||
+ 'node-name': 'overlay',
|
||||
+ 'backing': None,
|
||||
+ 'file': {
|
||||
+ 'driver': 'file',
|
||||
+ 'filename': top
|
||||
+ }
|
||||
+ })
|
||||
+ self.assert_qmp(result, 'return', {})
|
||||
+
|
||||
+ result = self.qsd.qmp('blockdev-snapshot', {
|
||||
+ 'node': 'node0',
|
||||
+ 'overlay': 'overlay',
|
||||
+ })
|
||||
+ self.assert_qmp(result, 'return', {})
|
||||
+
|
||||
+ # While qemu-img bench is running, repeatedly commit overlay to node0
|
||||
+ while bench_thr.is_alive():
|
||||
+ result = self.qsd.qmp('block-commit', {
|
||||
+ 'job-id': 'job0',
|
||||
+ 'device': 'overlay',
|
||||
+ })
|
||||
+ self.assert_qmp(result, 'return', {})
|
||||
+
|
||||
+ result = self.qsd.qmp('block-job-cancel', {
|
||||
+ 'device': 'job0',
|
||||
+ })
|
||||
+ self.assert_qmp(result, 'return', {})
|
||||
+
|
||||
+ cancelled = False
|
||||
+ while not cancelled:
|
||||
+ for event in self.qsd.get_qmp().get_events(wait=10.0):
|
||||
+ if event['event'] != 'JOB_STATUS_CHANGE':
|
||||
+ continue
|
||||
+ if event['data']['status'] == 'null':
|
||||
+ cancelled = True
|
||||
+
|
||||
+ bench_thr.join()
|
||||
+
|
||||
if __name__ == '__main__':
|
||||
# Format must support raw backing files
|
||||
iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'],
|
||||
diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out
|
||||
index ae1213e6f8..fbc63e62f8 100644
|
||||
--- a/tests/qemu-iotests/tests/graph-changes-while-io.out
|
||||
+++ b/tests/qemu-iotests/tests/graph-changes-while-io.out
|
||||
@@ -1,5 +1,5 @@
|
||||
-.
|
||||
+..
|
||||
----------------------------------------------------------------------
|
||||
-Ran 1 tests
|
||||
+Ran 2 tests
|
||||
|
||||
OK
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,132 +0,0 @@
|
||||
From 2c9e6892369ff99decd4030642b8dcf3875e9ebf Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Tue, 9 May 2023 15:41:33 +0200
|
||||
Subject: [PATCH 55/56] iotests: Test resizing image attached to an iothread
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize()
|
||||
RH-Bugzilla: 2185688
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [4/4] 8d31752d1e6e8c6a422d68d9cb2251fbc34b7aef (kmwolf/centos-qemu-kvm)
|
||||
|
||||
This tests that trying to resize an image with QMP block_resize doesn't
|
||||
hang or otherwise fail when the image is attached to a device running in
|
||||
an iothread.
|
||||
|
||||
This is a regression test for the recent fix that changed
|
||||
qmp_block_resize, which is a coroutine based QMP handler, to avoid
|
||||
calling no_coroutine_fns directly.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20230509134133.373408-1-kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit e113362e4cdfdcfe1d497e569527f70a0021333a)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/tests/iothreads-resize | 71 +++++++++++++++++++
|
||||
tests/qemu-iotests/tests/iothreads-resize.out | 11 +++
|
||||
2 files changed, 82 insertions(+)
|
||||
create mode 100755 tests/qemu-iotests/tests/iothreads-resize
|
||||
create mode 100644 tests/qemu-iotests/tests/iothreads-resize.out
|
||||
|
||||
diff --git a/tests/qemu-iotests/tests/iothreads-resize b/tests/qemu-iotests/tests/iothreads-resize
|
||||
new file mode 100755
|
||||
index 0000000000..36e4598c62
|
||||
--- /dev/null
|
||||
+++ b/tests/qemu-iotests/tests/iothreads-resize
|
||||
@@ -0,0 +1,71 @@
|
||||
+#!/usr/bin/env bash
|
||||
+# group: rw auto quick
|
||||
+#
|
||||
+# Test resizing an image that is attached to a separate iothread
|
||||
+#
|
||||
+# Copyright (C) 2023 Red Hat, Inc.
|
||||
+#
|
||||
+# This program is free software; you can redistribute it and/or modify
|
||||
+# it under the terms of the GNU General Public License as published by
|
||||
+# the Free Software Foundation; either version 2 of the License, or
|
||||
+# (at your option) any later version.
|
||||
+#
|
||||
+# This program is distributed in the hope that it will be useful,
|
||||
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
+# GNU General Public License for more details.
|
||||
+#
|
||||
+# You should have received a copy of the GNU General Public License
|
||||
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
+#
|
||||
+
|
||||
+# creator
|
||||
+owner=kwolf@redhat.com
|
||||
+
|
||||
+seq=`basename $0`
|
||||
+echo "QA output created by $seq"
|
||||
+
|
||||
+status=1 # failure is the default!
|
||||
+
|
||||
+_cleanup()
|
||||
+{
|
||||
+ _cleanup_test_img
|
||||
+}
|
||||
+trap "_cleanup; exit \$status" 0 1 2 3 15
|
||||
+
|
||||
+# get standard environment, filters and checks
|
||||
+cd ..
|
||||
+. ./common.rc
|
||||
+. ./common.filter
|
||||
+
|
||||
+# Resizing images is only supported by a few block drivers
|
||||
+_supported_fmt raw qcow2 qed
|
||||
+_supported_proto file
|
||||
+_require_devices virtio-scsi-pci
|
||||
+
|
||||
+size=64M
|
||||
+_make_test_img $size
|
||||
+
|
||||
+qmp() {
|
||||
+cat <<EOF
|
||||
+{"execute":"qmp_capabilities"}
|
||||
+{'execute': 'block_resize',
|
||||
+ 'arguments': {'node-name': 'img', 'size': 134217728}}
|
||||
+{"execute":"quit"}
|
||||
+EOF
|
||||
+}
|
||||
+
|
||||
+qmp | $QEMU -S -display none \
|
||||
+ -drive if=none,format=$IMGFMT,file="$TEST_IMG",node-name=img \
|
||||
+ -object iothread,id=t0 \
|
||||
+ -device virtio-scsi-pci,iothread=t0 \
|
||||
+ -device scsi-hd,drive=none0 \
|
||||
+ -qmp stdio \
|
||||
+ | _filter_qmp
|
||||
+
|
||||
+_img_info | _filter_img_info
|
||||
+
|
||||
+# success, all done
|
||||
+echo "*** done"
|
||||
+rm -f $seq.full
|
||||
+status=0
|
||||
diff --git a/tests/qemu-iotests/tests/iothreads-resize.out b/tests/qemu-iotests/tests/iothreads-resize.out
|
||||
new file mode 100644
|
||||
index 0000000000..2ca5a9d964
|
||||
--- /dev/null
|
||||
+++ b/tests/qemu-iotests/tests/iothreads-resize.out
|
||||
@@ -0,0 +1,11 @@
|
||||
+QA output created by iothreads-resize
|
||||
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
|
||||
+QMP_VERSION
|
||||
+{"return": {}}
|
||||
+{"return": {}}
|
||||
+{"return": {}}
|
||||
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
|
||||
+image: TEST_DIR/t.IMGFMT
|
||||
+file format: IMGFMT
|
||||
+virtual size: 128 MiB (134217728 bytes)
|
||||
+*** done
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,44 +0,0 @@
|
||||
From 21d1d93abd48f613c18df7dacd986693da774175 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 11 May 2023 13:03:22 +0200
|
||||
Subject: [PATCH 54/56] iotests: Use alternative CPU type that is not
|
||||
deprecated in RHEL
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize()
|
||||
RH-Bugzilla: 2185688
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [3/4] 038d4718c0ee7a17ff5e6f4af8fc04d07e452f8d (kmwolf/centos-qemu-kvm)
|
||||
|
||||
This is a downstream-only patch that is necessary because the default
|
||||
CPU in RHEL is marked as deprecated. This makes test cases fail due to
|
||||
the warning in the output:
|
||||
|
||||
qemu-system-x86_64: warning: CPU model qemu64-x86_64-cpu is deprecated -- use at least 'Nehalem' / 'Opteron_G4', or 'host' / 'max'
|
||||
|
||||
Fixes: 318178778db60b6475d1484509bee136317156d3
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/testenv.py | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py
|
||||
index 9a37ad9152..963514aab3 100644
|
||||
--- a/tests/qemu-iotests/testenv.py
|
||||
+++ b/tests/qemu-iotests/testenv.py
|
||||
@@ -244,6 +244,9 @@ def __init__(self, source_dir: str, build_dir: str,
|
||||
if self.qemu_prog.endswith(f'qemu-system-{suffix}'):
|
||||
self.qemu_options += f' -machine {machine}'
|
||||
|
||||
+ if self.qemu_prog.endswith('qemu-system-x86_64'):
|
||||
+ self.qemu_options += ' -cpu Nehalem'
|
||||
+
|
||||
# QEMU_DEFAULT_MACHINE
|
||||
self.qemu_default_machine = get_default_machine(self.qemu_prog)
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,49 @@
|
||||
From a9be663beaace1c31d75ca353e5d3bb0657a4f6c Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Thu, 18 Jan 2024 09:48:21 -0500
|
||||
Subject: [PATCH 11/22] iotests: add filter_qmp_generated_node_ids()
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter
|
||||
RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Commit: [7/17] 8dd20acc5b1e992294ed422e80897a9c221940dd (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
Add a filter function for QMP responses that contain QEMU's
|
||||
automatically generated node ids. The ids change between runs and must
|
||||
be masked in the reference output.
|
||||
|
||||
The next commit will use this new function.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20240118144823.1497953-2-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit da62b507a20510d819bcfbe8f5e573409b954006)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/iotests.py | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
|
||||
index e5c5798c71..ea48af4a7b 100644
|
||||
--- a/tests/qemu-iotests/iotests.py
|
||||
+++ b/tests/qemu-iotests/iotests.py
|
||||
@@ -651,6 +651,13 @@ def _filter(_key, value):
|
||||
def filter_generated_node_ids(msg):
|
||||
return re.sub("#block[0-9]+", "NODE_NAME", msg)
|
||||
|
||||
+def filter_qmp_generated_node_ids(qmsg):
|
||||
+ def _filter(_key, value):
|
||||
+ if is_str(value):
|
||||
+ return filter_generated_node_ids(value)
|
||||
+ return value
|
||||
+ return filter_qmp(qmsg, _filter)
|
||||
+
|
||||
def filter_img_info(output: str, filename: str,
|
||||
drop_child_info: bool = True) -> str:
|
||||
lines = []
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,49 @@
|
||||
From 453da839a7d81896d03b827a95c1991a60740dc5 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 25 Jan 2024 16:21:50 +0100
|
||||
Subject: [PATCH 21/22] iotests/iothreads-stream: Use the right TimeoutError
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter
|
||||
RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Commit: [17/17] ca5a512ccccb668089b726d7499562d1e294c828 (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
Since Python 3.11 asyncio.TimeoutError is an alias for TimeoutError, but
|
||||
in older versions it's not. We really have to catch asyncio.TimeoutError
|
||||
here, otherwise a slow test run will fail (as has happened multiple
|
||||
times on CI recently).
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-ID: <20240125152150.42389-1-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit c9c0b37ff4c11b712b21efabe8e5381d223d0295)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/tests/iothreads-stream | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/tests/qemu-iotests/tests/iothreads-stream b/tests/qemu-iotests/tests/iothreads-stream
|
||||
index 503f221f16..231195b5e8 100755
|
||||
--- a/tests/qemu-iotests/tests/iothreads-stream
|
||||
+++ b/tests/qemu-iotests/tests/iothreads-stream
|
||||
@@ -18,6 +18,7 @@
|
||||
#
|
||||
# Creator/Owner: Kevin Wolf <kwolf@redhat.com>
|
||||
|
||||
+import asyncio
|
||||
import iotests
|
||||
|
||||
iotests.script_initialize(supported_fmts=['qcow2'],
|
||||
@@ -69,6 +70,6 @@ with iotests.FilePath('disk1.img') as base1_path, \
|
||||
# The test is done once both jobs are gone
|
||||
if finished == 2:
|
||||
break
|
||||
- except TimeoutError:
|
||||
+ except asyncio.TimeoutError:
|
||||
pass
|
||||
vm.cmd('query-jobs')
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,186 +0,0 @@
|
||||
From add833b5de202d6765dda56c8773985fbe7f40a6 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Czenczek <hreitz@redhat.com>
|
||||
Date: Tue, 11 Apr 2023 19:34:18 +0200
|
||||
Subject: [PATCH 4/9] iotests/iov-padding: New test
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX
|
||||
RH-Bugzilla: 2174676
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [4/5] b32715b5c2a3e2add39c5ed6e8f71df56e0b91a0 (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
Test that even vectored IO requests with 1024 vector elements that are
|
||||
not aligned to the device's request alignment will succeed.
|
||||
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Message-Id: <20230411173418.19549-5-hreitz@redhat.com>
|
||||
(cherry picked from commit d7e1905e3f54ff9512db4c7a946a8603b62b108d)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/tests/iov-padding | 85 ++++++++++++++++++++++++
|
||||
tests/qemu-iotests/tests/iov-padding.out | 59 ++++++++++++++++
|
||||
2 files changed, 144 insertions(+)
|
||||
create mode 100755 tests/qemu-iotests/tests/iov-padding
|
||||
create mode 100644 tests/qemu-iotests/tests/iov-padding.out
|
||||
|
||||
diff --git a/tests/qemu-iotests/tests/iov-padding b/tests/qemu-iotests/tests/iov-padding
|
||||
new file mode 100755
|
||||
index 0000000000..b9604900c7
|
||||
--- /dev/null
|
||||
+++ b/tests/qemu-iotests/tests/iov-padding
|
||||
@@ -0,0 +1,85 @@
|
||||
+#!/usr/bin/env bash
|
||||
+# group: rw quick
|
||||
+#
|
||||
+# Check the interaction of request padding (to fit alignment restrictions) with
|
||||
+# vectored I/O from the guest
|
||||
+#
|
||||
+# Copyright Red Hat
|
||||
+#
|
||||
+# This program is free software; you can redistribute it and/or modify
|
||||
+# it under the terms of the GNU General Public License as published by
|
||||
+# the Free Software Foundation; either version 2 of the License, or
|
||||
+# (at your option) any later version.
|
||||
+#
|
||||
+# This program is distributed in the hope that it will be useful,
|
||||
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
+# GNU General Public License for more details.
|
||||
+#
|
||||
+# You should have received a copy of the GNU General Public License
|
||||
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
+#
|
||||
+
|
||||
+seq=$(basename $0)
|
||||
+echo "QA output created by $seq"
|
||||
+
|
||||
+status=1 # failure is the default!
|
||||
+
|
||||
+_cleanup()
|
||||
+{
|
||||
+ _cleanup_test_img
|
||||
+}
|
||||
+trap "_cleanup; exit \$status" 0 1 2 3 15
|
||||
+
|
||||
+# get standard environment, filters and checks
|
||||
+cd ..
|
||||
+. ./common.rc
|
||||
+. ./common.filter
|
||||
+
|
||||
+_supported_fmt raw
|
||||
+_supported_proto file
|
||||
+
|
||||
+_make_test_img 1M
|
||||
+
|
||||
+IMGSPEC="driver=blkdebug,align=4096,image.driver=file,image.filename=$TEST_IMG"
|
||||
+
|
||||
+# Four combinations:
|
||||
+# - Offset 4096, length 1023 * 512 + 512: Fully aligned to 4k
|
||||
+# - Offset 4096, length 1023 * 512 + 4096: Head is aligned, tail is not
|
||||
+# - Offset 512, length 1023 * 512 + 512: Neither head nor tail are aligned
|
||||
+# - Offset 512, length 1023 * 512 + 4096: Tail is aligned, head is not
|
||||
+for start_offset in 4096 512; do
|
||||
+ for last_element_length in 512 4096; do
|
||||
+ length=$((1023 * 512 + $last_element_length))
|
||||
+
|
||||
+ echo
|
||||
+ echo "== performing 1024-element vectored requests to image (offset: $start_offset; length: $length) =="
|
||||
+
|
||||
+ # Fill with data for testing
|
||||
+ $QEMU_IO -c 'write -P 1 0 1M' "$TEST_IMG" | _filter_qemu_io
|
||||
+
|
||||
+ # 1023 512-byte buffers, and then one with length $last_element_length
|
||||
+ cmd_params="-P 2 $start_offset $(yes 512 | head -n 1023 | tr '\n' ' ') $last_element_length"
|
||||
+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \
|
||||
+ -c "writev $cmd_params" \
|
||||
+ --image-opts \
|
||||
+ "$IMGSPEC" \
|
||||
+ | _filter_qemu_io
|
||||
+
|
||||
+ # Read all patterns -- read the part we just wrote with writev twice,
|
||||
+ # once "normally", and once with a readv, so we see that that works, too
|
||||
+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \
|
||||
+ -c "read -P 1 0 $start_offset" \
|
||||
+ -c "read -P 2 $start_offset $length" \
|
||||
+ -c "readv $cmd_params" \
|
||||
+ -c "read -P 1 $((start_offset + length)) $((1024 * 1024 - length - start_offset))" \
|
||||
+ --image-opts \
|
||||
+ "$IMGSPEC" \
|
||||
+ | _filter_qemu_io
|
||||
+ done
|
||||
+done
|
||||
+
|
||||
+# success, all done
|
||||
+echo "*** done"
|
||||
+rm -f $seq.full
|
||||
+status=0
|
||||
diff --git a/tests/qemu-iotests/tests/iov-padding.out b/tests/qemu-iotests/tests/iov-padding.out
|
||||
new file mode 100644
|
||||
index 0000000000..e07a91fac7
|
||||
--- /dev/null
|
||||
+++ b/tests/qemu-iotests/tests/iov-padding.out
|
||||
@@ -0,0 +1,59 @@
|
||||
+QA output created by iov-padding
|
||||
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
|
||||
+
|
||||
+== performing 1024-element vectored requests to image (offset: 4096; length: 524288) ==
|
||||
+wrote 1048576/1048576 bytes at offset 0
|
||||
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+wrote 524288/524288 bytes at offset 4096
|
||||
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 4096/4096 bytes at offset 0
|
||||
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 524288/524288 bytes at offset 4096
|
||||
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 524288/524288 bytes at offset 4096
|
||||
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 520192/520192 bytes at offset 528384
|
||||
+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+
|
||||
+== performing 1024-element vectored requests to image (offset: 4096; length: 527872) ==
|
||||
+wrote 1048576/1048576 bytes at offset 0
|
||||
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+wrote 527872/527872 bytes at offset 4096
|
||||
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 4096/4096 bytes at offset 0
|
||||
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 527872/527872 bytes at offset 4096
|
||||
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 527872/527872 bytes at offset 4096
|
||||
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 516608/516608 bytes at offset 531968
|
||||
+504.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+
|
||||
+== performing 1024-element vectored requests to image (offset: 512; length: 524288) ==
|
||||
+wrote 1048576/1048576 bytes at offset 0
|
||||
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+wrote 524288/524288 bytes at offset 512
|
||||
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 512/512 bytes at offset 0
|
||||
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 524288/524288 bytes at offset 512
|
||||
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 524288/524288 bytes at offset 512
|
||||
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 523776/523776 bytes at offset 524800
|
||||
+511.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+
|
||||
+== performing 1024-element vectored requests to image (offset: 512; length: 527872) ==
|
||||
+wrote 1048576/1048576 bytes at offset 0
|
||||
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+wrote 527872/527872 bytes at offset 512
|
||||
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 512/512 bytes at offset 0
|
||||
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 527872/527872 bytes at offset 512
|
||||
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 527872/527872 bytes at offset 512
|
||||
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 520192/520192 bytes at offset 528384
|
||||
+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+*** done
|
||||
--
|
||||
2.39.3
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue