forked from rpms/qemu-kvm
parent
83a513bac2
commit
d2fffb6dfb
@ -1 +1 @@
|
||||
SOURCES/qemu-8.0.0.tar.xz
|
||||
SOURCES/qemu-8.2.0.tar.xz
|
||||
|
@ -1 +1 @@
|
||||
17d54a85aa5d7f5dcfc619aa34049f9a91ceed0d SOURCES/qemu-8.0.0.tar.xz
|
||||
1615e59b1bd68324e0819245fe003e33c14a52f9 SOURCES/qemu-8.2.0.tar.xz
|
||||
|
@ -0,0 +1,44 @@
|
||||
From 3b9b38339346ebfaf3e8ddf0822eba1cc9e78408 Mon Sep 17 00:00:00 2001
|
||||
From: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
Date: Thu, 14 Dec 2023 04:42:01 -0500
|
||||
Subject: Introduce RHEL 9.4.0 qemu-kvm machine type for aarch64
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-17168
|
||||
|
||||
Adding new machine type to support enabling new features.
|
||||
|
||||
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 9 ++++++++-
|
||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index c541efee5e..0b17c94ad7 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3630,14 +3630,21 @@ static void rhel_machine_init(void)
|
||||
}
|
||||
type_init(rhel_machine_init);
|
||||
|
||||
+static void rhel940_virt_options(MachineClass *mc)
|
||||
+{
|
||||
+}
|
||||
+DEFINE_RHEL_MACHINE_AS_LATEST(9, 4, 0)
|
||||
+
|
||||
static void rhel920_virt_options(MachineClass *mc)
|
||||
{
|
||||
+ rhel940_virt_options(mc);
|
||||
+
|
||||
compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
|
||||
}
|
||||
-DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0)
|
||||
+DEFINE_RHEL_MACHINE(9, 2, 0)
|
||||
|
||||
static void rhel900_virt_options(MachineClass *mc)
|
||||
{
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,53 +0,0 @@
|
||||
From 78a42cf27aa519bb71214443ab570b40e156fa9c Mon Sep 17 00:00:00 2001
|
||||
From: Kfir Manor <kfir@daynix.com>
|
||||
Date: Sun, 22 Jan 2023 17:33:07 +0200
|
||||
Subject: qga/linux: add usb support to guest-get-fsinfo
|
||||
|
||||
RH-Author: Kostiantyn Kostiuk <kkostiuk@redhat.com>
|
||||
RH-MergeRequest: 140: qga/linux: add usb support to guest-get-fsinfo
|
||||
RH-Bugzilla: 2149191
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: yvugenfi <None>
|
||||
RH-Commit: [1/1] bae929a2d0d0ad20e7308ede69c26499fc2119c7 (kostyanf14/redhat_centos-stream_src_qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2149191
|
||||
Upstream patch: https://patchew.org/QEMU/20230122153307.1050593-1-kfir@daynix.com/
|
||||
|
||||
Signed-off-by: Kfir Manor <kfir@daynix.com>
|
||||
Reviewed-by: Konstantin Kostiuk <kkostiuk@redhat.com>
|
||||
Signed-off-by: Konstantin Kostiuk <kkostiuk@redhat.com>
|
||||
|
||||
Patch-name: kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch
|
||||
Patch-id: 72
|
||||
Patch-present-in-specfile: True
|
||||
---
|
||||
qga/commands-posix.c | 6 +++++-
|
||||
1 file changed, 5 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
|
||||
index 079689d79a..97754930c1 100644
|
||||
--- a/qga/commands-posix.c
|
||||
+++ b/qga/commands-posix.c
|
||||
@@ -879,7 +879,9 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
|
||||
g_str_equal(driver, "sym53c8xx") ||
|
||||
g_str_equal(driver, "virtio-pci") ||
|
||||
g_str_equal(driver, "ahci") ||
|
||||
- g_str_equal(driver, "nvme"))) {
|
||||
+ g_str_equal(driver, "nvme") ||
|
||||
+ g_str_equal(driver, "xhci_hcd") ||
|
||||
+ g_str_equal(driver, "ehci-pci"))) {
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -976,6 +978,8 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
|
||||
}
|
||||
} else if (strcmp(driver, "nvme") == 0) {
|
||||
disk->bus_type = GUEST_DISK_BUS_TYPE_NVME;
|
||||
+ } else if (strcmp(driver, "ehci-pci") == 0 || strcmp(driver, "xhci_hcd") == 0) {
|
||||
+ disk->bus_type = GUEST_DISK_BUS_TYPE_USB;
|
||||
} else {
|
||||
g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath);
|
||||
goto cleanup;
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,110 +0,0 @@
|
||||
From bd5d81d2865c239ffea0fecf32476732149ad05c Mon Sep 17 00:00:00 2001
|
||||
From: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
Date: Wed, 15 Feb 2023 02:03:17 -0500
|
||||
Subject: Add RHEL 9.2.0 compat structure
|
||||
|
||||
Adding compatibility bits necessary to keep 9.2.0 machine
|
||||
types same after rebase to 8.0.
|
||||
|
||||
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
|
||||
Rebase notes (8.0.0 rc4):
|
||||
- Added migration.x-preempt-pre-7-2 compat)
|
||||
---
|
||||
hw/arm/virt.c | 1 +
|
||||
hw/core/machine.c | 10 ++++++++++
|
||||
hw/i386/pc_piix.c | 2 ++
|
||||
hw/i386/pc_q35.c | 3 +++
|
||||
hw/s390x/s390-virtio-ccw.c | 1 +
|
||||
include/hw/boards.h | 3 +++
|
||||
6 files changed, 20 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 1ae1654be5..9be53e9355 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3669,6 +3669,7 @@ type_init(rhel_machine_init);
|
||||
static void rhel920_virt_options(MachineClass *mc)
|
||||
{
|
||||
compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
|
||||
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
|
||||
}
|
||||
DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 5aa567fad3..0e0120b7f2 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -52,6 +52,16 @@ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
|
||||
const char *rhel_old_machine_deprecation =
|
||||
"machine types for previous major releases are deprecated";
|
||||
|
||||
+GlobalProperty hw_compat_rhel_9_2[] = {
|
||||
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
+ { "e1000e", "migrate-timadj", "off" },
|
||||
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
+ { "virtio-mem", "x-early-migration", "false" },
|
||||
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
+ { "migration", "x-preempt-pre-7-2", "true" },
|
||||
+};
|
||||
+const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2);
|
||||
+
|
||||
/*
|
||||
* Mostly the same as hw_compat_7_0
|
||||
*/
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index 3e330fd36f..90fb6e2e03 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -947,6 +947,8 @@ static void pc_machine_rhel760_options(MachineClass *m)
|
||||
/* From pc_i440fx_5_1_machine_options() */
|
||||
pcmc->pci_root_uid = 1;
|
||||
pcmc->enforce_amd_1tb_hole = false;
|
||||
+ compat_props_add(m->compat_props, hw_compat_rhel_9_2,
|
||||
+ hw_compat_rhel_9_2_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
|
||||
hw_compat_rhel_9_1_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index 98601bb76f..8945b69175 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -701,6 +701,9 @@ static void pc_q35_machine_rhel920_options(MachineClass *m)
|
||||
m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)";
|
||||
pcmc->smbios_stream_product = "RHEL";
|
||||
pcmc->smbios_stream_version = "9.2.0";
|
||||
+
|
||||
+ compat_props_add(m->compat_props, hw_compat_rhel_9_2,
|
||||
+ hw_compat_rhel_9_2_len);
|
||||
}
|
||||
|
||||
DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
|
||||
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
||||
index dcd3b966b0..6a0b93c63d 100644
|
||||
--- a/hw/s390x/s390-virtio-ccw.c
|
||||
+++ b/hw/s390x/s390-virtio-ccw.c
|
||||
@@ -1211,6 +1211,7 @@ static void ccw_machine_rhel920_instance_options(MachineState *machine)
|
||||
|
||||
static void ccw_machine_rhel920_class_options(MachineClass *mc)
|
||||
{
|
||||
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
|
||||
}
|
||||
DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true);
|
||||
|
||||
diff --git a/include/hw/boards.h b/include/hw/boards.h
|
||||
index 5e7446ee40..5f08bd7550 100644
|
||||
--- a/include/hw/boards.h
|
||||
+++ b/include/hw/boards.h
|
||||
@@ -461,6 +461,9 @@ extern const size_t hw_compat_2_2_len;
|
||||
extern GlobalProperty hw_compat_2_1[];
|
||||
extern const size_t hw_compat_2_1_len;
|
||||
|
||||
+extern GlobalProperty hw_compat_rhel_9_2[];
|
||||
+extern const size_t hw_compat_rhel_9_2_len;
|
||||
+
|
||||
extern GlobalProperty hw_compat_rhel_9_1[];
|
||||
extern const size_t hw_compat_rhel_9_1_len;
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,76 +0,0 @@
|
||||
From c6eaf73adda2e87fe91c9a3836f45dd58a553e06 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Mon, 27 Mar 2023 15:14:03 +0200
|
||||
Subject: redhat: hw/i386/pc: Update x86 machine type compatibility for QEMU
|
||||
8.0.0 update
|
||||
|
||||
Add pc_rhel_9_2_compat based on upstream pc_compat_7_2.
|
||||
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
hw/i386/pc.c | 6 ++++++
|
||||
hw/i386/pc_piix.c | 2 ++
|
||||
hw/i386/pc_q35.c | 2 ++
|
||||
include/hw/i386/pc.h | 3 +++
|
||||
4 files changed, 13 insertions(+)
|
||||
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index 8abb1f872e..f216922cee 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -429,6 +429,12 @@ GlobalProperty pc_rhel_compat[] = {
|
||||
};
|
||||
const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat);
|
||||
|
||||
+GlobalProperty pc_rhel_9_2_compat[] = {
|
||||
+ /* pc_rhel_9_2_compat from pc_compat_7_2 */
|
||||
+ { "ICH9-LPC", "noreboot", "true" },
|
||||
+};
|
||||
+const size_t pc_rhel_9_2_compat_len = G_N_ELEMENTS(pc_rhel_9_2_compat);
|
||||
+
|
||||
GlobalProperty pc_rhel_9_0_compat[] = {
|
||||
/* pc_rhel_9_0_compat from pc_compat_6_2 */
|
||||
{ "virtio-mem", "unplugged-inaccessible", "off" },
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index 90fb6e2e03..fc704d783f 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -949,6 +949,8 @@ static void pc_machine_rhel760_options(MachineClass *m)
|
||||
pcmc->enforce_amd_1tb_hole = false;
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_2,
|
||||
hw_compat_rhel_9_2_len);
|
||||
+ compat_props_add(m->compat_props, pc_rhel_9_2_compat,
|
||||
+ pc_rhel_9_2_compat_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
|
||||
hw_compat_rhel_9_1_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index 8945b69175..e97655616a 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -704,6 +704,8 @@ static void pc_q35_machine_rhel920_options(MachineClass *m)
|
||||
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_2,
|
||||
hw_compat_rhel_9_2_len);
|
||||
+ compat_props_add(m->compat_props, pc_rhel_9_2_compat,
|
||||
+ pc_rhel_9_2_compat_len);
|
||||
}
|
||||
|
||||
DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
|
||||
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
|
||||
index 4376f64a47..d218ad1628 100644
|
||||
--- a/include/hw/i386/pc.h
|
||||
+++ b/include/hw/i386/pc.h
|
||||
@@ -296,6 +296,9 @@ int pc_machine_kvm_type(MachineState *machine, const char *vm_type);
|
||||
extern GlobalProperty pc_rhel_compat[];
|
||||
extern const size_t pc_rhel_compat_len;
|
||||
|
||||
+extern GlobalProperty pc_rhel_9_2_compat[];
|
||||
+extern const size_t pc_rhel_9_2_compat_len;
|
||||
+
|
||||
extern GlobalProperty pc_rhel_9_0_compat[];
|
||||
extern const size_t pc_rhel_9_0_compat_len;
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,83 +0,0 @@
|
||||
From 8173d2eabaf77312d36b00c618f6770948b80593 Mon Sep 17 00:00:00 2001
|
||||
From: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
Date: Mon, 17 Apr 2023 01:24:18 -0400
|
||||
Subject: Disable unwanted new devices
|
||||
|
||||
QEMU 8.0 adds two new device we do not want to support that can't
|
||||
be disabled using configure switch.
|
||||
|
||||
1) ide-cf - virtual CompactFlash card
|
||||
|
||||
2) i2c-echo - testing echo device
|
||||
|
||||
Use manual disabling of the device by changing code (1) and meson configs (2).
|
||||
|
||||
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
---
|
||||
hw/ide/qdev.c | 9 +++++++++
|
||||
hw/misc/meson.build | 3 ++-
|
||||
2 files changed, 11 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
|
||||
index 1b3b4da01d..454bfa5783 100644
|
||||
--- a/hw/ide/qdev.c
|
||||
+++ b/hw/ide/qdev.c
|
||||
@@ -283,10 +283,13 @@ static void ide_cd_realize(IDEDevice *dev, Error **errp)
|
||||
ide_dev_initfn(dev, IDE_CD, errp);
|
||||
}
|
||||
|
||||
+/* Disabled for Red Hat Enterprise Linux */
|
||||
+#if 0
|
||||
static void ide_cf_realize(IDEDevice *dev, Error **errp)
|
||||
{
|
||||
ide_dev_initfn(dev, IDE_CFATA, errp);
|
||||
}
|
||||
+#endif
|
||||
|
||||
#define DEFINE_IDE_DEV_PROPERTIES() \
|
||||
DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \
|
||||
@@ -346,6 +349,8 @@ static const TypeInfo ide_cd_info = {
|
||||
.class_init = ide_cd_class_init,
|
||||
};
|
||||
|
||||
+/* Disabled for Red Hat Enterprise Linux */
|
||||
+#if 0
|
||||
static Property ide_cf_properties[] = {
|
||||
DEFINE_IDE_DEV_PROPERTIES(),
|
||||
DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf),
|
||||
@@ -371,6 +376,7 @@ static const TypeInfo ide_cf_info = {
|
||||
.instance_size = sizeof(IDEDrive),
|
||||
.class_init = ide_cf_class_init,
|
||||
};
|
||||
+#endif
|
||||
|
||||
static void ide_device_class_init(ObjectClass *klass, void *data)
|
||||
{
|
||||
@@ -396,7 +402,10 @@ static void ide_register_types(void)
|
||||
type_register_static(&ide_bus_info);
|
||||
type_register_static(&ide_hd_info);
|
||||
type_register_static(&ide_cd_info);
|
||||
+/* Disabled for Red Hat Enterprise Linux */
|
||||
+#if 0
|
||||
type_register_static(&ide_cf_info);
|
||||
+#endif
|
||||
type_register_static(&ide_device_type_info);
|
||||
}
|
||||
|
||||
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
|
||||
index a40245ad44..9cc5a61ed7 100644
|
||||
--- a/hw/misc/meson.build
|
||||
+++ b/hw/misc/meson.build
|
||||
@@ -128,7 +128,8 @@ softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_rng.c'))
|
||||
|
||||
softmmu_ss.add(when: 'CONFIG_GRLIB', if_true: files('grlib_ahb_apb_pnp.c'))
|
||||
|
||||
-softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c'))
|
||||
+# Disabled for Red Hat Enterprise Linux
|
||||
+# softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c'))
|
||||
|
||||
specific_ss.add(when: 'CONFIG_AVR_POWER', if_true: files('avr_power.c'))
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,37 @@
|
||||
From 363d6aedc82314a70bdfbe9fa23b7e8fdda50138 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Thu, 11 Jan 2024 12:26:19 -0500
|
||||
Subject: [PATCH 066/101] Compile IOMMUFD object on aarch64
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [65/67] 9358030fdd499c5fe122dee3bb4f114966fac9c2 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Upstream: RHEL only
|
||||
|
||||
Compiles the IOMMUFD object on aarch64 to be able to use
|
||||
the IOMMUFD VFIO backend.
|
||||
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
|
||||
index aec1831199..b0191d3c69 100644
|
||||
--- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
|
||||
+++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
|
||||
@@ -39,3 +39,4 @@ CONFIG_PXB=y
|
||||
CONFIG_VHOST_VSOCK=y
|
||||
CONFIG_VHOST_USER_VSOCK=y
|
||||
CONFIG_VHOST_USER_FS=y
|
||||
+CONFIG_IOMMUFD=y
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,37 @@
|
||||
From c1e9ddf8d0ea6d358fcaa5cacd3a91920f36e73b Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Thu, 11 Jan 2024 12:33:17 -0500
|
||||
Subject: [PATCH 067/101] Compile IOMMUFD on s390x
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [66/67] d3004aafca2bb76d817ac99c3d65973b8fbd4557 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Upstream: RHEL only
|
||||
|
||||
Compiles the IOMMUFD object on s390x to be able to use
|
||||
the IOMMUFD VFIO backend.
|
||||
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
configs/devices/s390x-softmmu/s390x-rh-devices.mak | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak
|
||||
index 69a799adbd..24cf6dbd03 100644
|
||||
--- a/configs/devices/s390x-softmmu/s390x-rh-devices.mak
|
||||
+++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak
|
||||
@@ -16,3 +16,4 @@ CONFIG_WDT_DIAG288=y
|
||||
CONFIG_VHOST_VSOCK=y
|
||||
CONFIG_VHOST_USER_VSOCK=y
|
||||
CONFIG_VHOST_USER_FS=y
|
||||
+CONFIG_IOMMUFD=y
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,37 @@
|
||||
From be2c3d9bbee1bdec061c901f507bc999fa40a53e Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Thu, 11 Jan 2024 12:34:44 -0500
|
||||
Subject: [PATCH 068/101] Compile IOMMUFD on x86_64
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [67/67] 411d48a5cc7ce1f05be793fd9a89c143ce34c91a (eauger1/centos-qemu-kvm)
|
||||
|
||||
Upstream: RHEL only
|
||||
|
||||
Compiles the IOMMUFD object on s390x to be able to use
|
||||
the IOMMUFD VFIO backend.
|
||||
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
|
||||
index ce5be73633..ba41108e0c 100644
|
||||
--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
|
||||
+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
|
||||
@@ -108,3 +108,4 @@ CONFIG_SGX=y
|
||||
CONFIG_VHOST_VSOCK=y
|
||||
CONFIG_VHOST_USER_VSOCK=y
|
||||
CONFIG_VHOST_USER_FS=y
|
||||
+CONFIG_IOMMUFD=y
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,84 +0,0 @@
|
||||
From 61256a82ce78f40222455becb8850b5f5ebb5d72 Mon Sep 17 00:00:00 2001
|
||||
From: Igor Mammedov <imammedo@redhat.com>
|
||||
Date: Tue, 18 Apr 2023 11:04:49 +0200
|
||||
Subject: [PATCH 1/3] acpi: pcihp: allow repeating hot-unplug requests
|
||||
|
||||
RH-Author: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-MergeRequest: 159: acpi: pcihp: allow repeating hot-unplug requests
|
||||
RH-Bugzilla: 2087047
|
||||
RH-Acked-by: Ani Sinha <None>
|
||||
RH-Acked-by: Julia Suvorova <None>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Commit: [1/1] 9c597232466b27d91f127ee6004322d6ba69755f (imammedo/qemu-kvm-c-9-s-imam)
|
||||
|
||||
with Q35 using ACPI PCI hotplug by default, user's request to unplug
|
||||
device is ignored when it's issued before guest OS has been booted.
|
||||
And any additional attempt to request device hot-unplug afterwards
|
||||
results in following error:
|
||||
|
||||
"Device XYZ is already in the process of unplug"
|
||||
|
||||
arguably it can be considered as a regression introduced by [2],
|
||||
before which it was possible to issue unplug request multiple
|
||||
times.
|
||||
|
||||
Accept new uplug requests after timeout (1ms). This brings ACPI PCI
|
||||
hotplug on par with native PCIe unplug behavior [1] and allows user
|
||||
to repeat unplug requests at propper times.
|
||||
Set expire timeout to arbitrary 1msec so user won't be able to
|
||||
flood guest with SCI interrupts by calling device_del in tight loop.
|
||||
|
||||
PS:
|
||||
ACPI spec doesn't mandate what OSPM can do with GPEx.status
|
||||
bits set before it's booted => it's impl. depended.
|
||||
Status bits may be retained (I tested with one Windows version)
|
||||
or cleared (Linux since 2.6 kernel times) during guest's ACPI
|
||||
subsystem initialization.
|
||||
Clearing status bits (though not wrong per se) hides the unplug
|
||||
event from guest, and it's upto user to repeat device_del later
|
||||
when guest is able to handle unplug requests.
|
||||
|
||||
1) 18416c62e3 ("pcie: expire pending delete")
|
||||
2)
|
||||
Fixes: cce8944cc9ef ("qdev-monitor: Forbid repeated device_del")
|
||||
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
|
||||
CC: mst@redhat.com
|
||||
CC: anisinha@redhat.com
|
||||
CC: jusual@redhat.com
|
||||
CC: kraxel@redhat.com
|
||||
Message-Id: <20230418090449.2155757-1-imammedo@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Reviewed-by: Ani Sinha <anisinha@redhat.com>
|
||||
(cherry picked from commit 0f689cf5ada4d5df5ab95c7f7aa9fc221afa855d)
|
||||
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
|
||||
---
|
||||
hw/acpi/pcihp.c | 10 ++++++++++
|
||||
1 file changed, 10 insertions(+)
|
||||
|
||||
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
|
||||
index dcfb779a7a..cdd6f775a1 100644
|
||||
--- a/hw/acpi/pcihp.c
|
||||
+++ b/hw/acpi/pcihp.c
|
||||
@@ -357,6 +357,16 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev,
|
||||
* acpi_pcihp_eject_slot() when the operation is completed.
|
||||
*/
|
||||
pdev->qdev.pending_deleted_event = true;
|
||||
+ /* if unplug was requested before OSPM is initialized,
|
||||
+ * linux kernel will clear GPE0.sts[] bits during boot, which effectively
|
||||
+ * hides unplug event. And than followup qmp_device_del() calls remain
|
||||
+ * blocked by above flag permanently.
|
||||
+ * Unblock qmp_device_del() by setting expire limit, so user can
|
||||
+ * repeat unplug request later when OSPM has been booted.
|
||||
+ */
|
||||
+ pdev->qdev.pending_deleted_expires_ms =
|
||||
+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); /* 1 msec */
|
||||
+
|
||||
s->acpi_pcihp_pci_status[bsel].down |= (1U << slot);
|
||||
acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS);
|
||||
}
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,60 @@
|
||||
From 6b5cfed21e20b372090046a934387255ff4bda58 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 5 Dec 2023 13:20:01 -0500
|
||||
Subject: [PATCH 084/101] aio: make aio_context_acquire()/aio_context_release()
|
||||
a no-op
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [15/26] 723dcada900aaf08862e8221921be22506b561a8 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
aio_context_acquire()/aio_context_release() has been replaced by
|
||||
fine-grained locking to protect state shared by multiple threads. The
|
||||
AioContext lock still plays the role of balancing locking in
|
||||
AIO_WAIT_WHILE() and many functions in QEMU either require that the
|
||||
AioContext lock is held or not held for this reason. In other words, the
|
||||
AioContext lock is purely there for consistency with itself and serves
|
||||
no real purpose anymore.
|
||||
|
||||
Stop actually acquiring/releasing the lock in
|
||||
aio_context_acquire()/aio_context_release() so that subsequent patches
|
||||
can remove callers across the codebase incrementally.
|
||||
|
||||
I have performed "make check" and qemu-iotests stress tests across
|
||||
x86-64, ppc64le, and aarch64 to confirm that there are no failures as a
|
||||
result of eliminating the lock.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-ID: <20231205182011.1976568-5-stefanha@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
util/async.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index 8f90ddc304..04ee83d220 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -725,12 +725,12 @@ void aio_context_unref(AioContext *ctx)
|
||||
|
||||
void aio_context_acquire(AioContext *ctx)
|
||||
{
|
||||
- qemu_rec_mutex_lock(&ctx->lock);
|
||||
+ /* TODO remove this function */
|
||||
}
|
||||
|
||||
void aio_context_release(AioContext *ctx)
|
||||
{
|
||||
- qemu_rec_mutex_unlock(&ctx->lock);
|
||||
+ /* TODO remove this function */
|
||||
}
|
||||
|
||||
QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext)
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,102 @@
|
||||
From 14913d8970090c8914dc19dad14f3b9f91985ec3 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 5 Dec 2023 13:20:07 -0500
|
||||
Subject: [PATCH 090/101] aio: remove
|
||||
aio_context_acquire()/aio_context_release() API
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [21/26] 4b6d4afcac79d3248a6722b063b5fc777dc418df (kmwolf/centos-qemu-kvm)
|
||||
|
||||
Delete these functions because nothing calls these functions anymore.
|
||||
|
||||
I introduced these APIs in commit 98563fc3ec44 ("aio: add
|
||||
aio_context_acquire() and aio_context_release()") in 2014. It's with a
|
||||
sigh of relief that I delete these APIs almost 10 years later.
|
||||
|
||||
Thanks to Paolo Bonzini's vision for multi-queue QEMU, we got an
|
||||
understanding of where the code needed to go in order to remove the
|
||||
limitations that the original dataplane and the IOThread/AioContext
|
||||
approach that followed it.
|
||||
|
||||
Emanuele Giuseppe Esposito had the splendid determination to convert
|
||||
large parts of the codebase so that they no longer needed the AioContext
|
||||
lock. This was a painstaking process, both in the actual code changes
|
||||
required and the iterations of code review that Emanuele eked out of
|
||||
Kevin and me over many months.
|
||||
|
||||
Kevin Wolf tackled multitudes of graph locking conversions to protect
|
||||
in-flight I/O from run-time changes to the block graph as well as the
|
||||
clang Thread Safety Analysis annotations that allow the compiler to
|
||||
check whether the graph lock is being used correctly.
|
||||
|
||||
And me, well, I'm just here to add some pizzazz to the QEMU multi-queue
|
||||
block layer :). Thank you to everyone who helped with this effort,
|
||||
including Eric Blake, code reviewer extraordinaire, and others who I've
|
||||
forgotten to mention.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20231205182011.1976568-11-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
include/block/aio.h | 17 -----------------
|
||||
util/async.c | 10 ----------
|
||||
2 files changed, 27 deletions(-)
|
||||
|
||||
diff --git a/include/block/aio.h b/include/block/aio.h
|
||||
index f08b358077..af05512a7d 100644
|
||||
--- a/include/block/aio.h
|
||||
+++ b/include/block/aio.h
|
||||
@@ -278,23 +278,6 @@ void aio_context_ref(AioContext *ctx);
|
||||
*/
|
||||
void aio_context_unref(AioContext *ctx);
|
||||
|
||||
-/* Take ownership of the AioContext. If the AioContext will be shared between
|
||||
- * threads, and a thread does not want to be interrupted, it will have to
|
||||
- * take ownership around calls to aio_poll(). Otherwise, aio_poll()
|
||||
- * automatically takes care of calling aio_context_acquire and
|
||||
- * aio_context_release.
|
||||
- *
|
||||
- * Note that this is separate from bdrv_drained_begin/bdrv_drained_end. A
|
||||
- * thread still has to call those to avoid being interrupted by the guest.
|
||||
- *
|
||||
- * Bottom halves, timers and callbacks can be created or removed without
|
||||
- * acquiring the AioContext.
|
||||
- */
|
||||
-void aio_context_acquire(AioContext *ctx);
|
||||
-
|
||||
-/* Relinquish ownership of the AioContext. */
|
||||
-void aio_context_release(AioContext *ctx);
|
||||
-
|
||||
/**
|
||||
* aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will
|
||||
* run only once and as soon as possible.
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index dfd44ef612..460529057c 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -719,16 +719,6 @@ void aio_context_unref(AioContext *ctx)
|
||||
g_source_unref(&ctx->source);
|
||||
}
|
||||
|
||||
-void aio_context_acquire(AioContext *ctx)
|
||||
-{
|
||||
- /* TODO remove this function */
|
||||
-}
|
||||
-
|
||||
-void aio_context_release(AioContext *ctx)
|
||||
-{
|
||||
- /* TODO remove this function */
|
||||
-}
|
||||
-
|
||||
QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext)
|
||||
|
||||
AioContext *qemu_get_current_aio_context(void)
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,81 @@
|
||||
From e1e2f3972065c4b5d6fcf37e0e1c4fb92a0d5260 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 5 Dec 2023 13:20:06 -0500
|
||||
Subject: [PATCH 089/101] aio-wait: draw equivalence between AIO_WAIT_WHILE()
|
||||
and AIO_WAIT_WHILE_UNLOCKED()
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [20/26] 20e49777869714c99769263103f1b0c2c370cfcd (kmwolf/centos-qemu-kvm)
|
||||
|
||||
Now that the AioContext lock no longer exists, AIO_WAIT_WHILE() and
|
||||
AIO_WAIT_WHILE_UNLOCKED() are equivalent.
|
||||
|
||||
A future patch will get rid of AIO_WAIT_WHILE_UNLOCKED().
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20231205182011.1976568-10-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
include/block/aio-wait.h | 16 ++++------------
|
||||
1 file changed, 4 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
|
||||
index 5449b6d742..157f105916 100644
|
||||
--- a/include/block/aio-wait.h
|
||||
+++ b/include/block/aio-wait.h
|
||||
@@ -63,9 +63,6 @@ extern AioWait global_aio_wait;
|
||||
* @ctx: the aio context, or NULL if multiple aio contexts (for which the
|
||||
* caller does not hold a lock) are involved in the polling condition.
|
||||
* @cond: wait while this conditional expression is true
|
||||
- * @unlock: whether to unlock and then lock again @ctx. This applies
|
||||
- * only when waiting for another AioContext from the main loop.
|
||||
- * Otherwise it's ignored.
|
||||
*
|
||||
* Wait while a condition is true. Use this to implement synchronous
|
||||
* operations that require event loop activity.
|
||||
@@ -78,7 +75,7 @@ extern AioWait global_aio_wait;
|
||||
* wait on conditions between two IOThreads since that could lead to deadlock,
|
||||
* go via the main loop instead.
|
||||
*/
|
||||
-#define AIO_WAIT_WHILE_INTERNAL(ctx, cond, unlock) ({ \
|
||||
+#define AIO_WAIT_WHILE_INTERNAL(ctx, cond) ({ \
|
||||
bool waited_ = false; \
|
||||
AioWait *wait_ = &global_aio_wait; \
|
||||
AioContext *ctx_ = (ctx); \
|
||||
@@ -95,13 +92,7 @@ extern AioWait global_aio_wait;
|
||||
assert(qemu_get_current_aio_context() == \
|
||||
qemu_get_aio_context()); \
|
||||
while ((cond)) { \
|
||||
- if (unlock && ctx_) { \
|
||||
- aio_context_release(ctx_); \
|
||||
- } \
|
||||
aio_poll(qemu_get_aio_context(), true); \
|
||||
- if (unlock && ctx_) { \
|
||||
- aio_context_acquire(ctx_); \
|
||||
- } \
|
||||
waited_ = true; \
|
||||
} \
|
||||
} \
|
||||
@@ -109,10 +100,11 @@ extern AioWait global_aio_wait;
|
||||
waited_; })
|
||||
|
||||
#define AIO_WAIT_WHILE(ctx, cond) \
|
||||
- AIO_WAIT_WHILE_INTERNAL(ctx, cond, true)
|
||||
+ AIO_WAIT_WHILE_INTERNAL(ctx, cond)
|
||||
|
||||
+/* TODO replace this with AIO_WAIT_WHILE() in a future patch */
|
||||
#define AIO_WAIT_WHILE_UNLOCKED(ctx, cond) \
|
||||
- AIO_WAIT_WHILE_INTERNAL(ctx, cond, false)
|
||||
+ AIO_WAIT_WHILE_INTERNAL(ctx, cond)
|
||||
|
||||
/**
|
||||
* aio_wait_kick:
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,55 +0,0 @@
|
||||
From 5beea8b889a38aa59259679d7f1ba050f09eb0f0 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 12/21] apic: disable reentrancy detection for apic-msi
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [8/13] 329f3b1c02fc42d85c821dd14c70e6b885cf849a (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit 50795ee051a342c681a9b45671c552fbd6274db8
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:13 2023 -0400
|
||||
|
||||
apic: disable reentrancy detection for apic-msi
|
||||
|
||||
As the code is designed for re-entrant calls to apic-msi, mark apic-msi
|
||||
as reentrancy-safe.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Message-Id: <20230427211013.2994127-9-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/intc/apic.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
|
||||
index 20b5a94073..ac3d47d231 100644
|
||||
--- a/hw/intc/apic.c
|
||||
+++ b/hw/intc/apic.c
|
||||
@@ -885,6 +885,13 @@ static void apic_realize(DeviceState *dev, Error **errp)
|
||||
memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi",
|
||||
APIC_SPACE_SIZE);
|
||||
|
||||
+ /*
|
||||
+ * apic-msi's apic_mem_write can call into ioapic_eoi_broadcast, which can
|
||||
+ * write back to apic-msi. As such mark the apic-msi region re-entrancy
|
||||
+ * safe.
|
||||
+ */
|
||||
+ s->io_memory.disable_reentrancy_guard = true;
|
||||
+
|
||||
s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s);
|
||||
local_apics[s->id] = s;
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,231 +0,0 @@
|
||||
From f6db359f543723e2eb840653d35004af357ea5ac Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 06/21] async: Add an optional reentrancy guard to the BH API
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/13] 009a9a68c1c25b9ad0cd9bc0d73b3e07bee2a19d (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit 9c86c97f12c060bf7484dd931f38634e166a81f0
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:07 2023 -0400
|
||||
|
||||
async: Add an optional reentrancy guard to the BH API
|
||||
|
||||
Devices can pass their MemoryReentrancyGuard (from their DeviceState),
|
||||
when creating new BHes. Then, the async API will toggle the guard
|
||||
before/after calling the BH call-back. This prevents bh->mmio reentrancy
|
||||
issues.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Message-Id: <20230427211013.2994127-3-alxndr@bu.edu>
|
||||
[thuth: Fix "line over 90 characters" checkpatch.pl error]
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
docs/devel/multiple-iothreads.txt | 7 +++++++
|
||||
include/block/aio.h | 18 ++++++++++++++++--
|
||||
include/qemu/main-loop.h | 7 +++++--
|
||||
tests/unit/ptimer-test-stubs.c | 3 ++-
|
||||
util/async.c | 18 +++++++++++++++++-
|
||||
util/main-loop.c | 6 ++++--
|
||||
util/trace-events | 1 +
|
||||
7 files changed, 52 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt
|
||||
index 343120f2ef..a3e949f6b3 100644
|
||||
--- a/docs/devel/multiple-iothreads.txt
|
||||
+++ b/docs/devel/multiple-iothreads.txt
|
||||
@@ -61,6 +61,7 @@ There are several old APIs that use the main loop AioContext:
|
||||
* LEGACY qemu_aio_set_event_notifier() - monitor an event notifier
|
||||
* LEGACY timer_new_ms() - create a timer
|
||||
* LEGACY qemu_bh_new() - create a BH
|
||||
+ * LEGACY qemu_bh_new_guarded() - create a BH with a device re-entrancy guard
|
||||
* LEGACY qemu_aio_wait() - run an event loop iteration
|
||||
|
||||
Since they implicitly work on the main loop they cannot be used in code that
|
||||
@@ -72,8 +73,14 @@ Instead, use the AioContext functions directly (see include/block/aio.h):
|
||||
* aio_set_event_notifier() - monitor an event notifier
|
||||
* aio_timer_new() - create a timer
|
||||
* aio_bh_new() - create a BH
|
||||
+ * aio_bh_new_guarded() - create a BH with a device re-entrancy guard
|
||||
* aio_poll() - run an event loop iteration
|
||||
|
||||
+The qemu_bh_new_guarded/aio_bh_new_guarded APIs accept a "MemReentrancyGuard"
|
||||
+argument, which is used to check for and prevent re-entrancy problems. For
|
||||
+BHs associated with devices, the reentrancy-guard is contained in the
|
||||
+corresponding DeviceState and named "mem_reentrancy_guard".
|
||||
+
|
||||
The AioContext can be obtained from the IOThread using
|
||||
iothread_get_aio_context() or for the main loop using qemu_get_aio_context().
|
||||
Code that takes an AioContext argument works both in IOThreads or the main
|
||||
diff --git a/include/block/aio.h b/include/block/aio.h
|
||||
index 543717f294..db6f23c619 100644
|
||||
--- a/include/block/aio.h
|
||||
+++ b/include/block/aio.h
|
||||
@@ -23,6 +23,8 @@
|
||||
#include "qemu/thread.h"
|
||||
#include "qemu/timer.h"
|
||||
#include "block/graph-lock.h"
|
||||
+#include "hw/qdev-core.h"
|
||||
+
|
||||
|
||||
typedef struct BlockAIOCB BlockAIOCB;
|
||||
typedef void BlockCompletionFunc(void *opaque, int ret);
|
||||
@@ -331,9 +333,11 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
* is opaque and must be allocated prior to its use.
|
||||
*
|
||||
* @name: A human-readable identifier for debugging purposes.
|
||||
+ * @reentrancy_guard: A guard set when entering a cb to prevent
|
||||
+ * device-reentrancy issues
|
||||
*/
|
||||
QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
- const char *name);
|
||||
+ const char *name, MemReentrancyGuard *reentrancy_guard);
|
||||
|
||||
/**
|
||||
* aio_bh_new: Allocate a new bottom half structure
|
||||
@@ -342,7 +346,17 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
* string.
|
||||
*/
|
||||
#define aio_bh_new(ctx, cb, opaque) \
|
||||
- aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)))
|
||||
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL)
|
||||
+
|
||||
+/**
|
||||
+ * aio_bh_new_guarded: Allocate a new bottom half structure with a
|
||||
+ * reentrancy_guard
|
||||
+ *
|
||||
+ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name
|
||||
+ * string.
|
||||
+ */
|
||||
+#define aio_bh_new_guarded(ctx, cb, opaque, guard) \
|
||||
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard)
|
||||
|
||||
/**
|
||||
* aio_notify: Force processing of pending events.
|
||||
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
|
||||
index b3e54e00bc..68e70e61aa 100644
|
||||
--- a/include/qemu/main-loop.h
|
||||
+++ b/include/qemu/main-loop.h
|
||||
@@ -387,9 +387,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms);
|
||||
|
||||
/* internal interfaces */
|
||||
|
||||
+#define qemu_bh_new_guarded(cb, opaque, guard) \
|
||||
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard)
|
||||
#define qemu_bh_new(cb, opaque) \
|
||||
- qemu_bh_new_full((cb), (opaque), (stringify(cb)))
|
||||
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name);
|
||||
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL)
|
||||
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
|
||||
+ MemReentrancyGuard *reentrancy_guard);
|
||||
void qemu_bh_schedule_idle(QEMUBH *bh);
|
||||
|
||||
enum {
|
||||
diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c
|
||||
index f2bfcede93..8c9407c560 100644
|
||||
--- a/tests/unit/ptimer-test-stubs.c
|
||||
+++ b/tests/unit/ptimer-test-stubs.c
|
||||
@@ -107,7 +107,8 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
|
||||
return deadline;
|
||||
}
|
||||
|
||||
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
|
||||
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
|
||||
+ MemReentrancyGuard *reentrancy_guard)
|
||||
{
|
||||
QEMUBH *bh = g_new(QEMUBH, 1);
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index 21016a1ac7..a9b528c370 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -65,6 +65,7 @@ struct QEMUBH {
|
||||
void *opaque;
|
||||
QSLIST_ENTRY(QEMUBH) next;
|
||||
unsigned flags;
|
||||
+ MemReentrancyGuard *reentrancy_guard;
|
||||
};
|
||||
|
||||
/* Called concurrently from any thread */
|
||||
@@ -137,7 +138,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb,
|
||||
}
|
||||
|
||||
QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
- const char *name)
|
||||
+ const char *name, MemReentrancyGuard *reentrancy_guard)
|
||||
{
|
||||
QEMUBH *bh;
|
||||
bh = g_new(QEMUBH, 1);
|
||||
@@ -146,13 +147,28 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
.cb = cb,
|
||||
.opaque = opaque,
|
||||
.name = name,
|
||||
+ .reentrancy_guard = reentrancy_guard,
|
||||
};
|
||||
return bh;
|
||||
}
|
||||
|
||||
void aio_bh_call(QEMUBH *bh)
|
||||
{
|
||||
+ bool last_engaged_in_io = false;
|
||||
+
|
||||
+ if (bh->reentrancy_guard) {
|
||||
+ last_engaged_in_io = bh->reentrancy_guard->engaged_in_io;
|
||||
+ if (bh->reentrancy_guard->engaged_in_io) {
|
||||
+ trace_reentrant_aio(bh->ctx, bh->name);
|
||||
+ }
|
||||
+ bh->reentrancy_guard->engaged_in_io = true;
|
||||
+ }
|
||||
+
|
||||
bh->cb(bh->opaque);
|
||||
+
|
||||
+ if (bh->reentrancy_guard) {
|
||||
+ bh->reentrancy_guard->engaged_in_io = last_engaged_in_io;
|
||||
+ }
|
||||
}
|
||||
|
||||
/* Multiple occurrences of aio_bh_poll cannot be called concurrently. */
|
||||
diff --git a/util/main-loop.c b/util/main-loop.c
|
||||
index e180c85145..7022f02ef8 100644
|
||||
--- a/util/main-loop.c
|
||||
+++ b/util/main-loop.c
|
||||
@@ -605,9 +605,11 @@ void main_loop_wait(int nonblocking)
|
||||
|
||||
/* Functions to operate on the main QEMU AioContext. */
|
||||
|
||||
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
|
||||
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
|
||||
+ MemReentrancyGuard *reentrancy_guard)
|
||||
{
|
||||
- return aio_bh_new_full(qemu_aio_context, cb, opaque, name);
|
||||
+ return aio_bh_new_full(qemu_aio_context, cb, opaque, name,
|
||||
+ reentrancy_guard);
|
||||
}
|
||||
|
||||
/*
|
||||
diff --git a/util/trace-events b/util/trace-events
|
||||
index 16f78d8fe5..3f7e766683 100644
|
||||
--- a/util/trace-events
|
||||
+++ b/util/trace-events
|
||||
@@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d"
|
||||
# async.c
|
||||
aio_co_schedule(void *ctx, void *co) "ctx %p co %p"
|
||||
aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p"
|
||||
+reentrant_aio(void *ctx, const char *name) "ctx %p name %s"
|
||||
|
||||
# thread-pool.c
|
||||
thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,70 +0,0 @@
|
||||
From 137e84f68da06666ebf7f391766cc6209ce1c39c Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 13/21] async: avoid use-after-free on re-entrancy guard
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [9/13] d4b957108aaacf4a597122aaeeaa8e56985f1fca (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit 7915bd06f25e1803778081161bf6fa10c42dc7cd
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Mon May 1 10:19:56 2023 -0400
|
||||
|
||||
async: avoid use-after-free on re-entrancy guard
|
||||
|
||||
A BH callback can free the BH, causing a use-after-free in aio_bh_call.
|
||||
Fix that by keeping a local copy of the re-entrancy guard pointer.
|
||||
|
||||
Buglink: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58513
|
||||
Fixes: 9c86c97f12 ("async: Add an optional reentrancy guard to the BH API")
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Message-Id: <20230501141956.3444868-1-alxndr@bu.edu>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
util/async.c | 14 ++++++++------
|
||||
1 file changed, 8 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index a9b528c370..cd1a1815f9 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -156,18 +156,20 @@ void aio_bh_call(QEMUBH *bh)
|
||||
{
|
||||
bool last_engaged_in_io = false;
|
||||
|
||||
- if (bh->reentrancy_guard) {
|
||||
- last_engaged_in_io = bh->reentrancy_guard->engaged_in_io;
|
||||
- if (bh->reentrancy_guard->engaged_in_io) {
|
||||
+ /* Make a copy of the guard-pointer as cb may free the bh */
|
||||
+ MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard;
|
||||
+ if (reentrancy_guard) {
|
||||
+ last_engaged_in_io = reentrancy_guard->engaged_in_io;
|
||||
+ if (reentrancy_guard->engaged_in_io) {
|
||||
trace_reentrant_aio(bh->ctx, bh->name);
|
||||
}
|
||||
- bh->reentrancy_guard->engaged_in_io = true;
|
||||
+ reentrancy_guard->engaged_in_io = true;
|
||||
}
|
||||
|
||||
bh->cb(bh->opaque);
|
||||
|
||||
- if (bh->reentrancy_guard) {
|
||||
- bh->reentrancy_guard->engaged_in_io = last_engaged_in_io;
|
||||
+ if (reentrancy_guard) {
|
||||
+ reentrancy_guard->engaged_in_io = last_engaged_in_io;
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,476 @@
|
||||
From 0d8255c98b3ef6f603ff0279592d3e91de26de0e Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Tue, 21 Nov 2023 16:44:00 +0800
|
||||
Subject: [PATCH 021/101] backends/iommufd: Introduce the iommufd object
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [20/67] 8a56344ab4a2126f248bfa492ccddd19265f39be (eauger1/centos-qemu-kvm)
|
||||
|
||||
Introduce an iommufd object which allows the interaction
|
||||
with the host /dev/iommu device.
|
||||
|
||||
The /dev/iommu can have been already pre-opened outside of qemu,
|
||||
in which case the fd can be passed directly along with the
|
||||
iommufd object:
|
||||
|
||||
This allows the iommufd object to be shared accross several
|
||||
subsystems (VFIO, VDPA, ...). For example, libvirt would open
|
||||
the /dev/iommu once.
|
||||
|
||||
If no fd is passed along with the iommufd object, the /dev/iommu
|
||||
is opened by the qemu code.
|
||||
|
||||
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||||
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit 6e6d8ac62b5b38dc9d4b69ffdf073f0a0b43b7be)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
MAINTAINERS | 8 ++
|
||||
backends/Kconfig | 4 +
|
||||
backends/iommufd.c | 245 +++++++++++++++++++++++++++++++++++++++
|
||||
backends/meson.build | 1 +
|
||||
backends/trace-events | 10 ++
|
||||
include/sysemu/iommufd.h | 38 ++++++
|
||||
qapi/qom.json | 19 +++
|
||||
qemu-options.hx | 12 ++
|
||||
8 files changed, 337 insertions(+)
|
||||
create mode 100644 backends/iommufd.c
|
||||
create mode 100644 include/sysemu/iommufd.h
|
||||
|
||||
diff --git a/MAINTAINERS b/MAINTAINERS
|
||||
index 695e0bd34f..a5a446914a 100644
|
||||
--- a/MAINTAINERS
|
||||
+++ b/MAINTAINERS
|
||||
@@ -2167,6 +2167,14 @@ F: hw/vfio/ap.c
|
||||
F: docs/system/s390x/vfio-ap.rst
|
||||
L: qemu-s390x@nongnu.org
|
||||
|
||||
+iommufd
|
||||
+M: Yi Liu <yi.l.liu@intel.com>
|
||||
+M: Eric Auger <eric.auger@redhat.com>
|
||||
+M: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
+S: Supported
|
||||
+F: backends/iommufd.c
|
||||
+F: include/sysemu/iommufd.h
|
||||
+
|
||||
vhost
|
||||
M: Michael S. Tsirkin <mst@redhat.com>
|
||||
S: Supported
|
||||
diff --git a/backends/Kconfig b/backends/Kconfig
|
||||
index f35abc1609..2cb23f62fa 100644
|
||||
--- a/backends/Kconfig
|
||||
+++ b/backends/Kconfig
|
||||
@@ -1 +1,5 @@
|
||||
source tpm/Kconfig
|
||||
+
|
||||
+config IOMMUFD
|
||||
+ bool
|
||||
+ depends on VFIO
|
||||
diff --git a/backends/iommufd.c b/backends/iommufd.c
|
||||
new file mode 100644
|
||||
index 0000000000..ba58a0eb0d
|
||||
--- /dev/null
|
||||
+++ b/backends/iommufd.c
|
||||
@@ -0,0 +1,245 @@
|
||||
+/*
|
||||
+ * iommufd container backend
|
||||
+ *
|
||||
+ * Copyright (C) 2023 Intel Corporation.
|
||||
+ * Copyright Red Hat, Inc. 2023
|
||||
+ *
|
||||
+ * Authors: Yi Liu <yi.l.liu@intel.com>
|
||||
+ * Eric Auger <eric.auger@redhat.com>
|
||||
+ *
|
||||
+ * SPDX-License-Identifier: GPL-2.0-or-later
|
||||
+ */
|
||||
+
|
||||
+#include "qemu/osdep.h"
|
||||
+#include "sysemu/iommufd.h"
|
||||
+#include "qapi/error.h"
|
||||
+#include "qapi/qmp/qerror.h"
|
||||
+#include "qemu/module.h"
|
||||
+#include "qom/object_interfaces.h"
|
||||
+#include "qemu/error-report.h"
|
||||
+#include "monitor/monitor.h"
|
||||
+#include "trace.h"
|
||||
+#include <sys/ioctl.h>
|
||||
+#include <linux/iommufd.h>
|
||||
+
|
||||
+static void iommufd_backend_init(Object *obj)
|
||||
+{
|
||||
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
|
||||
+
|
||||
+ be->fd = -1;
|
||||
+ be->users = 0;
|
||||
+ be->owned = true;
|
||||
+ qemu_mutex_init(&be->lock);
|
||||
+}
|
||||
+
|
||||
+static void iommufd_backend_finalize(Object *obj)
|
||||
+{
|
||||
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
|
||||
+
|
||||
+ if (be->owned) {
|
||||
+ close(be->fd);
|
||||
+ be->fd = -1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp)
|
||||
+{
|
||||
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
|
||||
+ int fd = -1;
|
||||
+
|
||||
+ fd = monitor_fd_param(monitor_cur(), str, errp);
|
||||
+ if (fd == -1) {
|
||||
+ error_prepend(errp, "Could not parse remote object fd %s:", str);
|
||||
+ return;
|
||||
+ }
|
||||
+ qemu_mutex_lock(&be->lock);
|
||||
+ be->fd = fd;
|
||||
+ be->owned = false;
|
||||
+ qemu_mutex_unlock(&be->lock);
|
||||
+ trace_iommu_backend_set_fd(be->fd);
|
||||
+}
|
||||
+
|
||||
+static bool iommufd_backend_can_be_deleted(UserCreatable *uc)
|
||||
+{
|
||||
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(uc);
|
||||
+
|
||||
+ return !be->users;
|
||||
+}
|
||||
+
|
||||
+static void iommufd_backend_class_init(ObjectClass *oc, void *data)
|
||||
+{
|
||||
+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
|
||||
+
|
||||
+ ucc->can_be_deleted = iommufd_backend_can_be_deleted;
|
||||
+
|
||||
+ object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd);
|
||||
+}
|
||||
+
|
||||
+int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
|
||||
+{
|
||||
+ int fd, ret = 0;
|
||||
+
|
||||
+ qemu_mutex_lock(&be->lock);
|
||||
+ if (be->users == UINT32_MAX) {
|
||||
+ error_setg(errp, "too many connections");
|
||||
+ ret = -E2BIG;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ if (be->owned && !be->users) {
|
||||
+ fd = qemu_open_old("/dev/iommu", O_RDWR);
|
||||
+ if (fd < 0) {
|
||||
+ error_setg_errno(errp, errno, "/dev/iommu opening failed");
|
||||
+ ret = fd;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ be->fd = fd;
|
||||
+ }
|
||||
+ be->users++;
|
||||
+out:
|
||||
+ trace_iommufd_backend_connect(be->fd, be->owned,
|
||||
+ be->users, ret);
|
||||
+ qemu_mutex_unlock(&be->lock);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+void iommufd_backend_disconnect(IOMMUFDBackend *be)
|
||||
+{
|
||||
+ qemu_mutex_lock(&be->lock);
|
||||
+ if (!be->users) {
|
||||
+ goto out;
|
||||
+ }
|
||||
+ be->users--;
|
||||
+ if (!be->users && be->owned) {
|
||||
+ close(be->fd);
|
||||
+ be->fd = -1;
|
||||
+ }
|
||||
+out:
|
||||
+ trace_iommufd_backend_disconnect(be->fd, be->users);
|
||||
+ qemu_mutex_unlock(&be->lock);
|
||||
+}
|
||||
+
|
||||
+int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ int ret, fd = be->fd;
|
||||
+ struct iommu_ioas_alloc alloc_data = {
|
||||
+ .size = sizeof(alloc_data),
|
||||
+ .flags = 0,
|
||||
+ };
|
||||
+
|
||||
+ ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data);
|
||||
+ if (ret) {
|
||||
+ error_setg_errno(errp, errno, "Failed to allocate ioas");
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ *ioas_id = alloc_data.out_ioas_id;
|
||||
+ trace_iommufd_backend_alloc_ioas(fd, *ioas_id, ret);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id)
|
||||
+{
|
||||
+ int ret, fd = be->fd;
|
||||
+ struct iommu_destroy des = {
|
||||
+ .size = sizeof(des),
|
||||
+ .id = id,
|
||||
+ };
|
||||
+
|
||||
+ ret = ioctl(fd, IOMMU_DESTROY, &des);
|
||||
+ trace_iommufd_backend_free_id(fd, id, ret);
|
||||
+ if (ret) {
|
||||
+ error_report("Failed to free id: %u %m", id);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
|
||||
+ ram_addr_t size, void *vaddr, bool readonly)
|
||||
+{
|
||||
+ int ret, fd = be->fd;
|
||||
+ struct iommu_ioas_map map = {
|
||||
+ .size = sizeof(map),
|
||||
+ .flags = IOMMU_IOAS_MAP_READABLE |
|
||||
+ IOMMU_IOAS_MAP_FIXED_IOVA,
|
||||
+ .ioas_id = ioas_id,
|
||||
+ .__reserved = 0,
|
||||
+ .user_va = (uintptr_t)vaddr,
|
||||
+ .iova = iova,
|
||||
+ .length = size,
|
||||
+ };
|
||||
+
|
||||
+ if (!readonly) {
|
||||
+ map.flags |= IOMMU_IOAS_MAP_WRITEABLE;
|
||||
+ }
|
||||
+
|
||||
+ ret = ioctl(fd, IOMMU_IOAS_MAP, &map);
|
||||
+ trace_iommufd_backend_map_dma(fd, ioas_id, iova, size,
|
||||
+ vaddr, readonly, ret);
|
||||
+ if (ret) {
|
||||
+ ret = -errno;
|
||||
+
|
||||
+ /* TODO: Not support mapping hardware PCI BAR region for now. */
|
||||
+ if (errno == EFAULT) {
|
||||
+ warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?");
|
||||
+ } else {
|
||||
+ error_report("IOMMU_IOAS_MAP failed: %m");
|
||||
+ }
|
||||
+ }
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
|
||||
+ hwaddr iova, ram_addr_t size)
|
||||
+{
|
||||
+ int ret, fd = be->fd;
|
||||
+ struct iommu_ioas_unmap unmap = {
|
||||
+ .size = sizeof(unmap),
|
||||
+ .ioas_id = ioas_id,
|
||||
+ .iova = iova,
|
||||
+ .length = size,
|
||||
+ };
|
||||
+
|
||||
+ ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap);
|
||||
+ /*
|
||||
+ * IOMMUFD takes mapping as some kind of object, unmapping
|
||||
+ * nonexistent mapping is treated as deleting a nonexistent
|
||||
+ * object and return ENOENT. This is different from legacy
|
||||
+ * backend which allows it. vIOMMU may trigger a lot of
|
||||
+ * redundant unmapping, to avoid flush the log, treat them
|
||||
+ * as succeess for IOMMUFD just like legacy backend.
|
||||
+ */
|
||||
+ if (ret && errno == ENOENT) {
|
||||
+ trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret);
|
||||
+ ret = 0;
|
||||
+ } else {
|
||||
+ trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret);
|
||||
+ }
|
||||
+
|
||||
+ if (ret) {
|
||||
+ ret = -errno;
|
||||
+ error_report("IOMMU_IOAS_UNMAP failed: %m");
|
||||
+ }
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static const TypeInfo iommufd_backend_info = {
|
||||
+ .name = TYPE_IOMMUFD_BACKEND,
|
||||
+ .parent = TYPE_OBJECT,
|
||||
+ .instance_size = sizeof(IOMMUFDBackend),
|
||||
+ .instance_init = iommufd_backend_init,
|
||||
+ .instance_finalize = iommufd_backend_finalize,
|
||||
+ .class_size = sizeof(IOMMUFDBackendClass),
|
||||
+ .class_init = iommufd_backend_class_init,
|
||||
+ .interfaces = (InterfaceInfo[]) {
|
||||
+ { TYPE_USER_CREATABLE },
|
||||
+ { }
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+static void register_types(void)
|
||||
+{
|
||||
+ type_register_static(&iommufd_backend_info);
|
||||
+}
|
||||
+
|
||||
+type_init(register_types);
|
||||
diff --git a/backends/meson.build b/backends/meson.build
|
||||
index 914c7c4afb..9a5cea480d 100644
|
||||
--- a/backends/meson.build
|
||||
+++ b/backends/meson.build
|
||||
@@ -20,6 +20,7 @@ if have_vhost_user
|
||||
system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c'))
|
||||
endif
|
||||
system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c'))
|
||||
+system_ss.add(when: 'CONFIG_IOMMUFD', if_true: files('iommufd.c'))
|
||||
if have_vhost_user_crypto
|
||||
system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c'))
|
||||
endif
|
||||
diff --git a/backends/trace-events b/backends/trace-events
|
||||
index 652eb76a57..d45c6e31a6 100644
|
||||
--- a/backends/trace-events
|
||||
+++ b/backends/trace-events
|
||||
@@ -5,3 +5,13 @@ dbus_vmstate_pre_save(void)
|
||||
dbus_vmstate_post_load(int version_id) "version_id: %d"
|
||||
dbus_vmstate_loading(const char *id) "id: %s"
|
||||
dbus_vmstate_saving(const char *id) "id: %s"
|
||||
+
|
||||
+# iommufd.c
|
||||
+iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d owned=%d users=%d (%d)"
|
||||
+iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d"
|
||||
+iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d"
|
||||
+iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)"
|
||||
+iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
|
||||
+iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
|
||||
+iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)"
|
||||
+iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
|
||||
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
|
||||
new file mode 100644
|
||||
index 0000000000..9c5524b0ed
|
||||
--- /dev/null
|
||||
+++ b/include/sysemu/iommufd.h
|
||||
@@ -0,0 +1,38 @@
|
||||
+#ifndef SYSEMU_IOMMUFD_H
|
||||
+#define SYSEMU_IOMMUFD_H
|
||||
+
|
||||
+#include "qom/object.h"
|
||||
+#include "qemu/thread.h"
|
||||
+#include "exec/hwaddr.h"
|
||||
+#include "exec/cpu-common.h"
|
||||
+
|
||||
+#define TYPE_IOMMUFD_BACKEND "iommufd"
|
||||
+OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND)
|
||||
+
|
||||
+struct IOMMUFDBackendClass {
|
||||
+ ObjectClass parent_class;
|
||||
+};
|
||||
+
|
||||
+struct IOMMUFDBackend {
|
||||
+ Object parent;
|
||||
+
|
||||
+ /*< protected >*/
|
||||
+ int fd; /* /dev/iommu file descriptor */
|
||||
+ bool owned; /* is the /dev/iommu opened internally */
|
||||
+ QemuMutex lock;
|
||||
+ uint32_t users;
|
||||
+
|
||||
+ /*< public >*/
|
||||
+};
|
||||
+
|
||||
+int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp);
|
||||
+void iommufd_backend_disconnect(IOMMUFDBackend *be);
|
||||
+
|
||||
+int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
|
||||
+ Error **errp);
|
||||
+void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id);
|
||||
+int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
|
||||
+ ram_addr_t size, void *vaddr, bool readonly);
|
||||
+int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
|
||||
+ hwaddr iova, ram_addr_t size);
|
||||
+#endif
|
||||
diff --git a/qapi/qom.json b/qapi/qom.json
|
||||
index c53ef978ff..95516ba325 100644
|
||||
--- a/qapi/qom.json
|
||||
+++ b/qapi/qom.json
|
||||
@@ -794,6 +794,23 @@
|
||||
{ 'struct': 'VfioUserServerProperties',
|
||||
'data': { 'socket': 'SocketAddress', 'device': 'str' } }
|
||||
|
||||
+##
|
||||
+# @IOMMUFDProperties:
|
||||
+#
|
||||
+# Properties for iommufd objects.
|
||||
+#
|
||||
+# @fd: file descriptor name previously passed via 'getfd' command,
|
||||
+# which represents a pre-opened /dev/iommu. This allows the
|
||||
+# iommufd object to be shared accross several subsystems
|
||||
+# (VFIO, VDPA, ...), and the file descriptor to be shared
|
||||
+# with other process, e.g. DPDK. (default: QEMU opens
|
||||
+# /dev/iommu by itself)
|
||||
+#
|
||||
+# Since: 9.0
|
||||
+##
|
||||
+{ 'struct': 'IOMMUFDProperties',
|
||||
+ 'data': { '*fd': 'str' } }
|
||||
+
|
||||
##
|
||||
# @RngProperties:
|
||||
#
|
||||
@@ -934,6 +951,7 @@
|
||||
'input-barrier',
|
||||
{ 'name': 'input-linux',
|
||||
'if': 'CONFIG_LINUX' },
|
||||
+ 'iommufd',
|
||||
'iothread',
|
||||
'main-loop',
|
||||
{ 'name': 'memory-backend-epc',
|
||||
@@ -1003,6 +1021,7 @@
|
||||
'input-barrier': 'InputBarrierProperties',
|
||||
'input-linux': { 'type': 'InputLinuxProperties',
|
||||
'if': 'CONFIG_LINUX' },
|
||||
+ 'iommufd': 'IOMMUFDProperties',
|
||||
'iothread': 'IothreadProperties',
|
||||
'main-loop': 'MainLoopProperties',
|
||||
'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties',
|
||||
diff --git a/qemu-options.hx b/qemu-options.hx
|
||||
index 557118cb1f..0814f43066 100644
|
||||
--- a/qemu-options.hx
|
||||
+++ b/qemu-options.hx
|
||||
@@ -5224,6 +5224,18 @@ SRST
|
||||
|
||||
The ``share`` boolean option is on by default with memfd.
|
||||
|
||||
+ ``-object iommufd,id=id[,fd=fd]``
|
||||
+ Creates an iommufd backend which allows control of DMA mapping
|
||||
+ through the ``/dev/iommu`` device.
|
||||
+
|
||||
+ The ``id`` parameter is a unique ID which frontends (such as
|
||||
+ vfio-pci of vdpa) will use to connect with the iommufd backend.
|
||||
+
|
||||
+ The ``fd`` parameter is an optional pre-opened file descriptor
|
||||
+ resulting from ``/dev/iommu`` opening. Usually the iommufd is shared
|
||||
+ across all subsystems, bringing the benefit of centralized
|
||||
+ reference counting.
|
||||
+
|
||||
``-object rng-builtin,id=id``
|
||||
Creates a random number generator backend which obtains entropy
|
||||
from QEMU builtin functions. The ``id`` parameter is a unique ID
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,47 @@
|
||||
From da9a24793e876f6f2727d57f939d882be26a47b8 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Fri, 22 Dec 2023 08:55:23 +0100
|
||||
Subject: [PATCH 064/101] backends/iommufd: Remove check on number of backend
|
||||
users
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [63/67] ac4d4589d1f2de5ac3f0adfd8d1f27dbf6bbfdee (eauger1/centos-qemu-kvm)
|
||||
|
||||
QOM already has a ref count on objects and it will assert much
|
||||
earlier, when INT_MAX is reached.
|
||||
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit c2ab3a6f7411c895e538e8350fee8948ac07c1a0)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
backends/iommufd.c | 5 -----
|
||||
1 file changed, 5 deletions(-)
|
||||
|
||||
diff --git a/backends/iommufd.c b/backends/iommufd.c
|
||||
index ba58a0eb0d..393c0d9a37 100644
|
||||
--- a/backends/iommufd.c
|
||||
+++ b/backends/iommufd.c
|
||||
@@ -80,11 +80,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
|
||||
int fd, ret = 0;
|
||||
|
||||
qemu_mutex_lock(&be->lock);
|
||||
- if (be->users == UINT32_MAX) {
|
||||
- error_setg(errp, "too many connections");
|
||||
- ret = -E2BIG;
|
||||
- goto out;
|
||||
- }
|
||||
if (be->owned && !be->users) {
|
||||
fd = qemu_open_old("/dev/iommu", O_RDWR);
|
||||
if (fd < 0) {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,112 @@
|
||||
From 92aff3cc1a412de01e9563802fa48848eae5283f Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Thu, 21 Dec 2023 16:58:41 +0100
|
||||
Subject: [PATCH 065/101] backends/iommufd: Remove mutex
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [64/67] 65518432b18f18ceadafe1b0698cdaa962e84f61 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Coverity reports a concurrent data access violation because be->users
|
||||
is being accessed in iommufd_backend_can_be_deleted() without holding
|
||||
the mutex.
|
||||
|
||||
However, these routines are called from the QEMU main thread when a
|
||||
device is created. In this case, the code paths should be protected by
|
||||
the BQL lock and it should be safe to drop the IOMMUFD backend mutex.
|
||||
Simply remove it.
|
||||
|
||||
Fixes: CID 1531550
|
||||
Fixes: CID 1531549
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit 19368b1905b4b917e915526fcbd5bfa3f7439451)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
backends/iommufd.c | 7 -------
|
||||
include/sysemu/iommufd.h | 2 --
|
||||
2 files changed, 9 deletions(-)
|
||||
|
||||
diff --git a/backends/iommufd.c b/backends/iommufd.c
|
||||
index 393c0d9a37..1ef683c7b0 100644
|
||||
--- a/backends/iommufd.c
|
||||
+++ b/backends/iommufd.c
|
||||
@@ -29,7 +29,6 @@ static void iommufd_backend_init(Object *obj)
|
||||
be->fd = -1;
|
||||
be->users = 0;
|
||||
be->owned = true;
|
||||
- qemu_mutex_init(&be->lock);
|
||||
}
|
||||
|
||||
static void iommufd_backend_finalize(Object *obj)
|
||||
@@ -52,10 +51,8 @@ static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp)
|
||||
error_prepend(errp, "Could not parse remote object fd %s:", str);
|
||||
return;
|
||||
}
|
||||
- qemu_mutex_lock(&be->lock);
|
||||
be->fd = fd;
|
||||
be->owned = false;
|
||||
- qemu_mutex_unlock(&be->lock);
|
||||
trace_iommu_backend_set_fd(be->fd);
|
||||
}
|
||||
|
||||
@@ -79,7 +76,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
|
||||
{
|
||||
int fd, ret = 0;
|
||||
|
||||
- qemu_mutex_lock(&be->lock);
|
||||
if (be->owned && !be->users) {
|
||||
fd = qemu_open_old("/dev/iommu", O_RDWR);
|
||||
if (fd < 0) {
|
||||
@@ -93,13 +89,11 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
|
||||
out:
|
||||
trace_iommufd_backend_connect(be->fd, be->owned,
|
||||
be->users, ret);
|
||||
- qemu_mutex_unlock(&be->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void iommufd_backend_disconnect(IOMMUFDBackend *be)
|
||||
{
|
||||
- qemu_mutex_lock(&be->lock);
|
||||
if (!be->users) {
|
||||
goto out;
|
||||
}
|
||||
@@ -110,7 +104,6 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be)
|
||||
}
|
||||
out:
|
||||
trace_iommufd_backend_disconnect(be->fd, be->users);
|
||||
- qemu_mutex_unlock(&be->lock);
|
||||
}
|
||||
|
||||
int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
|
||||
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
|
||||
index 9c5524b0ed..9af27ebd6c 100644
|
||||
--- a/include/sysemu/iommufd.h
|
||||
+++ b/include/sysemu/iommufd.h
|
||||
@@ -2,7 +2,6 @@
|
||||
#define SYSEMU_IOMMUFD_H
|
||||
|
||||
#include "qom/object.h"
|
||||
-#include "qemu/thread.h"
|
||||
#include "exec/hwaddr.h"
|
||||
#include "exec/cpu-common.h"
|
||||
|
||||
@@ -19,7 +18,6 @@ struct IOMMUFDBackend {
|
||||
/*< protected >*/
|
||||
int fd; /* /dev/iommu file descriptor */
|
||||
bool owned; /* is the /dev/iommu opened internally */
|
||||
- QemuMutex lock;
|
||||
uint32_t users;
|
||||
|
||||
/*< public >*/
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,57 +0,0 @@
|
||||
From 40866640d15e6a8c9f6af7e437edc1ec1e17ba34 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 10/21] bcm2835_property: disable reentrancy detection for
|
||||
iomem
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [6/13] 128ebc85e228674af66553af82fba70eb87960e6 (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit 985c4a4e547afb9573b6bd6843d20eb2c3d1d1cd
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:11 2023 -0400
|
||||
|
||||
bcm2835_property: disable reentrancy detection for iomem
|
||||
|
||||
As the code is designed for re-entrant calls from bcm2835_property to
|
||||
bcm2835_mbox and back into bcm2835_property, mark iomem as
|
||||
reentrancy-safe.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Message-Id: <20230427211013.2994127-7-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/misc/bcm2835_property.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c
|
||||
index 890ae7bae5..de056ea2df 100644
|
||||
--- a/hw/misc/bcm2835_property.c
|
||||
+++ b/hw/misc/bcm2835_property.c
|
||||
@@ -382,6 +382,13 @@ static void bcm2835_property_init(Object *obj)
|
||||
|
||||
memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_property_ops, s,
|
||||
TYPE_BCM2835_PROPERTY, 0x10);
|
||||
+
|
||||
+ /*
|
||||
+ * bcm2835_property_ops call into bcm2835_mbox, which in-turn reads from
|
||||
+ * iomem. As such, mark iomem as re-entracy safe.
|
||||
+ */
|
||||
+ s->iomem.disable_reentrancy_guard = true;
|
||||
+
|
||||
sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
|
||||
sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq);
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,354 +0,0 @@
|
||||
From ff05c0b0d3414c0e5b3903048280accdc6c75ca0 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Czenczek <hreitz@redhat.com>
|
||||
Date: Tue, 11 Apr 2023 19:34:16 +0200
|
||||
Subject: [PATCH 2/9] block: Collapse padded I/O vecs exceeding IOV_MAX
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX
|
||||
RH-Bugzilla: 2174676
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/5] 84c56bd16f841a18cf2baa918dfeab3240e3944d (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
When processing vectored guest requests that are not aligned to the
|
||||
storage request alignment, we pad them by adding head and/or tail
|
||||
buffers for a read-modify-write cycle.
|
||||
|
||||
The guest can submit I/O vectors up to IOV_MAX (1024) in length, but
|
||||
with this padding, the vector can exceed that limit. As of
|
||||
4c002cef0e9abe7135d7916c51abce47f7fc1ee2 ("util/iov: make
|
||||
qemu_iovec_init_extended() honest"), we refuse to pad vectors beyond the
|
||||
limit, instead returning an error to the guest.
|
||||
|
||||
To the guest, this appears as a random I/O error. We should not return
|
||||
an I/O error to the guest when it issued a perfectly valid request.
|
||||
|
||||
Before 4c002cef0e9abe7135d7916c51abce47f7fc1ee2, we just made the vector
|
||||
longer than IOV_MAX, which generally seems to work (because the guest
|
||||
assumes a smaller alignment than we really have, file-posix's
|
||||
raw_co_prw() will generally see bdrv_qiov_is_aligned() return false, and
|
||||
so emulate the request, so that the IOV_MAX does not matter). However,
|
||||
that does not seem exactly great.
|
||||
|
||||
I see two ways to fix this problem:
|
||||
1. We split such long requests into two requests.
|
||||
2. We join some elements of the vector into new buffers to make it
|
||||
shorter.
|
||||
|
||||
I am wary of (1), because it seems like it may have unintended side
|
||||
effects.
|
||||
|
||||
(2) on the other hand seems relatively simple to implement, with
|
||||
hopefully few side effects, so this patch does that.
|
||||
|
||||
To do this, the use of qemu_iovec_init_extended() in bdrv_pad_request()
|
||||
is effectively replaced by the new function bdrv_create_padded_qiov(),
|
||||
which not only wraps the request IOV with padding head/tail, but also
|
||||
ensures that the resulting vector will not have more than IOV_MAX
|
||||
elements. Putting that functionality into qemu_iovec_init_extended() is
|
||||
infeasible because it requires allocating a bounce buffer; doing so
|
||||
would require many more parameters (buffer alignment, how to initialize
|
||||
the buffer, and out parameters like the buffer, its length, and the
|
||||
original elements), which is not reasonable.
|
||||
|
||||
Conversely, it is not difficult to move qemu_iovec_init_extended()'s
|
||||
functionality into bdrv_create_padded_qiov() by using public
|
||||
qemu_iovec_* functions, so that is what this patch does.
|
||||
|
||||
Because bdrv_pad_request() was the only "serious" user of
|
||||
qemu_iovec_init_extended(), the next patch will remove the latter
|
||||
function, so the functionality is not implemented twice.
|
||||
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2141964
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Message-Id: <20230411173418.19549-3-hreitz@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
(cherry picked from commit 18743311b829cafc1737a5f20bc3248d5f91ee2a)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/io.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++-----
|
||||
1 file changed, 151 insertions(+), 15 deletions(-)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 2e267a85ab..4e8e90208b 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -1439,6 +1439,14 @@ out:
|
||||
* @merge_reads is true for small requests,
|
||||
* if @buf_len == @head + bytes + @tail. In this case it is possible that both
|
||||
* head and tail exist but @buf_len == align and @tail_buf == @buf.
|
||||
+ *
|
||||
+ * @write is true for write requests, false for read requests.
|
||||
+ *
|
||||
+ * If padding makes the vector too long (exceeding IOV_MAX), then we need to
|
||||
+ * merge existing vector elements into a single one. @collapse_bounce_buf acts
|
||||
+ * as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse
|
||||
+ * I/O vector elements so for read requests, the data can be copied back after
|
||||
+ * the read is done.
|
||||
*/
|
||||
typedef struct BdrvRequestPadding {
|
||||
uint8_t *buf;
|
||||
@@ -1447,11 +1455,17 @@ typedef struct BdrvRequestPadding {
|
||||
size_t head;
|
||||
size_t tail;
|
||||
bool merge_reads;
|
||||
+ bool write;
|
||||
QEMUIOVector local_qiov;
|
||||
+
|
||||
+ uint8_t *collapse_bounce_buf;
|
||||
+ size_t collapse_len;
|
||||
+ QEMUIOVector pre_collapse_qiov;
|
||||
} BdrvRequestPadding;
|
||||
|
||||
static bool bdrv_init_padding(BlockDriverState *bs,
|
||||
int64_t offset, int64_t bytes,
|
||||
+ bool write,
|
||||
BdrvRequestPadding *pad)
|
||||
{
|
||||
int64_t align = bs->bl.request_alignment;
|
||||
@@ -1483,6 +1497,8 @@ static bool bdrv_init_padding(BlockDriverState *bs,
|
||||
pad->tail_buf = pad->buf + pad->buf_len - align;
|
||||
}
|
||||
|
||||
+ pad->write = write;
|
||||
+
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1547,8 +1563,23 @@ zero_mem:
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
+/**
|
||||
+ * Free *pad's associated buffers, and perform any necessary finalization steps.
|
||||
+ */
|
||||
+static void bdrv_padding_finalize(BdrvRequestPadding *pad)
|
||||
{
|
||||
+ if (pad->collapse_bounce_buf) {
|
||||
+ if (!pad->write) {
|
||||
+ /*
|
||||
+ * If padding required elements in the vector to be collapsed into a
|
||||
+ * bounce buffer, copy the bounce buffer content back
|
||||
+ */
|
||||
+ qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0,
|
||||
+ pad->collapse_bounce_buf, pad->collapse_len);
|
||||
+ }
|
||||
+ qemu_vfree(pad->collapse_bounce_buf);
|
||||
+ qemu_iovec_destroy(&pad->pre_collapse_qiov);
|
||||
+ }
|
||||
if (pad->buf) {
|
||||
qemu_vfree(pad->buf);
|
||||
qemu_iovec_destroy(&pad->local_qiov);
|
||||
@@ -1556,6 +1587,101 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
memset(pad, 0, sizeof(*pad));
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while
|
||||
+ * ensuring that the resulting vector will not exceed IOV_MAX elements.
|
||||
+ *
|
||||
+ * To ensure this, when necessary, the first two or three elements of @iov are
|
||||
+ * merged into pad->collapse_bounce_buf and replaced by a reference to that
|
||||
+ * bounce buffer in pad->local_qiov.
|
||||
+ *
|
||||
+ * After performing a read request, the data from the bounce buffer must be
|
||||
+ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()).
|
||||
+ */
|
||||
+static int bdrv_create_padded_qiov(BlockDriverState *bs,
|
||||
+ BdrvRequestPadding *pad,
|
||||
+ struct iovec *iov, int niov,
|
||||
+ size_t iov_offset, size_t bytes)
|
||||
+{
|
||||
+ int padded_niov, surplus_count, collapse_count;
|
||||
+
|
||||
+ /* Assert this invariant */
|
||||
+ assert(niov <= IOV_MAX);
|
||||
+
|
||||
+ /*
|
||||
+ * Cannot pad if resulting length would exceed SIZE_MAX. Returning an error
|
||||
+ * to the guest is not ideal, but there is little else we can do. At least
|
||||
+ * this will practically never happen on 64-bit systems.
|
||||
+ */
|
||||
+ if (SIZE_MAX - pad->head < bytes ||
|
||||
+ SIZE_MAX - pad->head - bytes < pad->tail)
|
||||
+ {
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ /* Length of the resulting IOV if we just concatenated everything */
|
||||
+ padded_niov = !!pad->head + niov + !!pad->tail;
|
||||
+
|
||||
+ qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX));
|
||||
+
|
||||
+ if (pad->head) {
|
||||
+ qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head);
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * If padded_niov > IOV_MAX, we cannot just concatenate everything.
|
||||
+ * Instead, merge the first two or three elements of @iov to reduce the
|
||||
+ * number of vector elements as necessary.
|
||||
+ */
|
||||
+ if (padded_niov > IOV_MAX) {
|
||||
+ /*
|
||||
+ * Only head and tail can have lead to the number of entries exceeding
|
||||
+ * IOV_MAX, so we can exceed it by the head and tail at most. We need
|
||||
+ * to reduce the number of elements by `surplus_count`, so we merge that
|
||||
+ * many elements plus one into one element.
|
||||
+ */
|
||||
+ surplus_count = padded_niov - IOV_MAX;
|
||||
+ assert(surplus_count <= !!pad->head + !!pad->tail);
|
||||
+ collapse_count = surplus_count + 1;
|
||||
+
|
||||
+ /*
|
||||
+ * Move the elements to collapse into `pad->pre_collapse_qiov`, then
|
||||
+ * advance `iov` (and associated variables) by those elements.
|
||||
+ */
|
||||
+ qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count);
|
||||
+ qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov,
|
||||
+ collapse_count, iov_offset, SIZE_MAX);
|
||||
+ iov += collapse_count;
|
||||
+ iov_offset = 0;
|
||||
+ niov -= collapse_count;
|
||||
+ bytes -= pad->pre_collapse_qiov.size;
|
||||
+
|
||||
+ /*
|
||||
+ * Construct the bounce buffer to match the length of the to-collapse
|
||||
+ * vector elements, and for write requests, initialize it with the data
|
||||
+ * from those elements. Then add it to `pad->local_qiov`.
|
||||
+ */
|
||||
+ pad->collapse_len = pad->pre_collapse_qiov.size;
|
||||
+ pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len);
|
||||
+ if (pad->write) {
|
||||
+ qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0,
|
||||
+ pad->collapse_bounce_buf, pad->collapse_len);
|
||||
+ }
|
||||
+ qemu_iovec_add(&pad->local_qiov,
|
||||
+ pad->collapse_bounce_buf, pad->collapse_len);
|
||||
+ }
|
||||
+
|
||||
+ qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes);
|
||||
+
|
||||
+ if (pad->tail) {
|
||||
+ qemu_iovec_add(&pad->local_qiov,
|
||||
+ pad->buf + pad->buf_len - pad->tail, pad->tail);
|
||||
+ }
|
||||
+
|
||||
+ assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX));
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* bdrv_pad_request
|
||||
*
|
||||
@@ -1563,6 +1689,8 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
* read of padding, bdrv_padding_rmw_read() should be called separately if
|
||||
* needed.
|
||||
*
|
||||
+ * @write is true for write requests, false for read requests.
|
||||
+ *
|
||||
* Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out:
|
||||
* - on function start they represent original request
|
||||
* - on failure or when padding is not needed they are unchanged
|
||||
@@ -1571,26 +1699,34 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
static int bdrv_pad_request(BlockDriverState *bs,
|
||||
QEMUIOVector **qiov, size_t *qiov_offset,
|
||||
int64_t *offset, int64_t *bytes,
|
||||
+ bool write,
|
||||
BdrvRequestPadding *pad, bool *padded,
|
||||
BdrvRequestFlags *flags)
|
||||
{
|
||||
int ret;
|
||||
+ struct iovec *sliced_iov;
|
||||
+ int sliced_niov;
|
||||
+ size_t sliced_head, sliced_tail;
|
||||
|
||||
bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
|
||||
|
||||
- if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
|
||||
+ if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
|
||||
if (padded) {
|
||||
*padded = false;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
- ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
|
||||
- *qiov, *qiov_offset, *bytes,
|
||||
- pad->buf + pad->buf_len - pad->tail,
|
||||
- pad->tail);
|
||||
+ sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
|
||||
+ &sliced_head, &sliced_tail,
|
||||
+ &sliced_niov);
|
||||
+
|
||||
+ /* Guaranteed by bdrv_check_qiov_request() */
|
||||
+ assert(*bytes <= SIZE_MAX);
|
||||
+ ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
|
||||
+ sliced_head, *bytes);
|
||||
if (ret < 0) {
|
||||
- bdrv_padding_destroy(pad);
|
||||
+ bdrv_padding_finalize(pad);
|
||||
return ret;
|
||||
}
|
||||
*bytes += pad->head + pad->tail;
|
||||
@@ -1657,8 +1793,8 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
|
||||
flags |= BDRV_REQ_COPY_ON_READ;
|
||||
}
|
||||
|
||||
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
|
||||
- NULL, &flags);
|
||||
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false,
|
||||
+ &pad, NULL, &flags);
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
@@ -1668,7 +1804,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
|
||||
bs->bl.request_alignment,
|
||||
qiov, qiov_offset, flags);
|
||||
tracked_request_end(&req);
|
||||
- bdrv_padding_destroy(&pad);
|
||||
+ bdrv_padding_finalize(&pad);
|
||||
|
||||
fail:
|
||||
bdrv_dec_in_flight(bs);
|
||||
@@ -2000,7 +2136,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
|
||||
/* This flag doesn't make sense for padding or zero writes */
|
||||
flags &= ~BDRV_REQ_REGISTERED_BUF;
|
||||
|
||||
- padding = bdrv_init_padding(bs, offset, bytes, &pad);
|
||||
+ padding = bdrv_init_padding(bs, offset, bytes, true, &pad);
|
||||
if (padding) {
|
||||
assert(!(flags & BDRV_REQ_NO_WAIT));
|
||||
bdrv_make_request_serialising(req, align);
|
||||
@@ -2048,7 +2184,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
|
||||
}
|
||||
|
||||
out:
|
||||
- bdrv_padding_destroy(&pad);
|
||||
+ bdrv_padding_finalize(&pad);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -2116,8 +2252,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
|
||||
* bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
|
||||
* alignment only if there is no ZERO flag.
|
||||
*/
|
||||
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
|
||||
- &padded, &flags);
|
||||
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true,
|
||||
+ &pad, &padded, &flags);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
@@ -2147,7 +2283,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
|
||||
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
|
||||
qiov, qiov_offset, flags);
|
||||
|
||||
- bdrv_padding_destroy(&pad);
|
||||
+ bdrv_padding_finalize(&pad);
|
||||
|
||||
out:
|
||||
tracked_request_end(&req);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,56 +0,0 @@
|
||||
From dfa2811e88afaf996345552330e97f0513c1803c Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 4 May 2023 13:57:34 +0200
|
||||
Subject: [PATCH 53/56] block: Don't call no_coroutine_fns in
|
||||
qmp_block_resize()
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize()
|
||||
RH-Bugzilla: 2185688
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [2/4] 7ac7e34821cfc8bd5f0daadd7a1c4a5596bc60a6 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
This QMP handler runs in a coroutine, so it must use the corresponding
|
||||
no_co_wrappers instead.
|
||||
|
||||
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2185688
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20230504115750.54437-5-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 0c7d204f50c382c6baac8c94bd57af4a022b3888)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
blockdev.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/blockdev.c b/blockdev.c
|
||||
index d7b5c18f0a..eb509cf964 100644
|
||||
--- a/blockdev.c
|
||||
+++ b/blockdev.c
|
||||
@@ -2430,7 +2430,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
|
||||
return;
|
||||
}
|
||||
|
||||
- blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
|
||||
+ blk = blk_co_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
|
||||
if (!blk) {
|
||||
return;
|
||||
}
|
||||
@@ -2445,7 +2445,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
|
||||
|
||||
bdrv_co_lock(bs);
|
||||
bdrv_drained_end(bs);
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
bdrv_co_unlock(bs);
|
||||
}
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,73 +0,0 @@
|
||||
From 547f6bf93734f7c13675eebb93273ef2273f7c31 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Czenczek <hreitz@redhat.com>
|
||||
Date: Fri, 14 Jul 2023 10:59:38 +0200
|
||||
Subject: [PATCH 5/9] block: Fix pad_request's request restriction
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX
|
||||
RH-Bugzilla: 2174676
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [5/5] e8abc0485f6e0608a1ec55143ff40a14d273dfc8 (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
bdrv_pad_request() relies on requests' lengths not to exceed SIZE_MAX,
|
||||
which bdrv_check_qiov_request() does not guarantee.
|
||||
|
||||
bdrv_check_request32() however will guarantee this, and both of
|
||||
bdrv_pad_request()'s callers (bdrv_co_preadv_part() and
|
||||
bdrv_co_pwritev_part()) already run it before calling
|
||||
bdrv_pad_request(). Therefore, bdrv_pad_request() can safely call
|
||||
bdrv_check_request32() without expecting error, too.
|
||||
|
||||
In effect, this patch will not change guest-visible behavior. It is a
|
||||
clean-up to tighten a condition to match what is guaranteed by our
|
||||
callers, and which exists purely to show clearly why the subsequent
|
||||
assertion (`assert(*bytes <= SIZE_MAX)`) is always true.
|
||||
|
||||
Note there is a difference between the interfaces of
|
||||
bdrv_check_qiov_request() and bdrv_check_request32(): The former takes
|
||||
an errp, the latter does not, so we can no longer just pass
|
||||
&error_abort. Instead, we need to check the returned value. While we
|
||||
do expect success (because the callers have already run this function),
|
||||
an assert(ret == 0) is not much simpler than just to return an error if
|
||||
it occurs, so let us handle errors by returning them up the stack now.
|
||||
|
||||
Reported-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Message-id: 20230714085938.202730-1-hreitz@redhat.com
|
||||
Fixes: 18743311b829cafc1737a5f20bc3248d5f91ee2a
|
||||
("block: Collapse padded I/O vecs exceeding IOV_MAX")
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
block/io.c | 8 ++++++--
|
||||
1 file changed, 6 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 4e8e90208b..807c9fb720 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -1708,7 +1708,11 @@ static int bdrv_pad_request(BlockDriverState *bs,
|
||||
int sliced_niov;
|
||||
size_t sliced_head, sliced_tail;
|
||||
|
||||
- bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
|
||||
+ /* Should have been checked by the caller already */
|
||||
+ ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
+ }
|
||||
|
||||
if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
|
||||
if (padded) {
|
||||
@@ -1721,7 +1725,7 @@ static int bdrv_pad_request(BlockDriverState *bs,
|
||||
&sliced_head, &sliced_tail,
|
||||
&sliced_niov);
|
||||
|
||||
- /* Guaranteed by bdrv_check_qiov_request() */
|
||||
+ /* Guaranteed by bdrv_check_request32() */
|
||||
assert(*bytes <= SIZE_MAX);
|
||||
ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
|
||||
sliced_head, *bytes);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,104 @@
|
||||
From afa842e9fdf6e1d6e5d5785679a22779632142bd Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Czenczek <hreitz@redhat.com>
|
||||
Date: Fri, 2 Feb 2024 15:47:54 +0100
|
||||
Subject: [PATCH 03/22] block-backend: Allow concurrent context changes
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 222: Allow concurrent BlockBackend context changes
|
||||
RH-Jira: RHEL-24593
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [1/2] 9e1b535f60f7afa94a0817dc3e71136e41631c71 (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
Since AioContext locks have been removed, a BlockBackend's AioContext
|
||||
may really change at any time (only exception is that it is often
|
||||
confined to a drained section, as noted in this patch). Therefore,
|
||||
blk_get_aio_context() cannot rely on its root node's context always
|
||||
matching that of the BlockBackend.
|
||||
|
||||
In practice, whether they match does not matter anymore anyway: Requests
|
||||
can be sent to BDSs from any context, so anyone who requests the BB's
|
||||
context should have no reason to require the root node to have the same
|
||||
context. Therefore, we can and should remove the assertion to that
|
||||
effect.
|
||||
|
||||
In addition, because the context can be set and queried from different
|
||||
threads concurrently, it has to be accessed with atomic operations.
|
||||
|
||||
Buglink: https://issues.redhat.com/browse/RHEL-19381
|
||||
Suggested-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Message-ID: <20240202144755.671354-2-hreitz@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit ad893672027ffe26db498947d70cde6d4f58a111)
|
||||
---
|
||||
block/block-backend.c | 22 +++++++++++-----------
|
||||
1 file changed, 11 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/block/block-backend.c b/block/block-backend.c
|
||||
index 209eb07528..9c4de79e6b 100644
|
||||
--- a/block/block-backend.c
|
||||
+++ b/block/block-backend.c
|
||||
@@ -44,7 +44,7 @@ struct BlockBackend {
|
||||
char *name;
|
||||
int refcnt;
|
||||
BdrvChild *root;
|
||||
- AioContext *ctx;
|
||||
+ AioContext *ctx; /* access with atomic operations only */
|
||||
DriveInfo *legacy_dinfo; /* null unless created by drive_new() */
|
||||
QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */
|
||||
QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
|
||||
@@ -2414,22 +2414,22 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason)
|
||||
}
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * Return BB's current AioContext. Note that this context may change
|
||||
+ * concurrently at any time, with one exception: If the BB has a root node
|
||||
+ * attached, its context will only change through bdrv_try_change_aio_context(),
|
||||
+ * which creates a drained section. Therefore, incrementing such a BB's
|
||||
+ * in-flight counter will prevent its context from changing.
|
||||
+ */
|
||||
AioContext *blk_get_aio_context(BlockBackend *blk)
|
||||
{
|
||||
- BlockDriverState *bs;
|
||||
IO_CODE();
|
||||
|
||||
if (!blk) {
|
||||
return qemu_get_aio_context();
|
||||
}
|
||||
|
||||
- bs = blk_bs(blk);
|
||||
- if (bs) {
|
||||
- AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
|
||||
- assert(ctx == blk->ctx);
|
||||
- }
|
||||
-
|
||||
- return blk->ctx;
|
||||
+ return qatomic_read(&blk->ctx);
|
||||
}
|
||||
|
||||
int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
|
||||
@@ -2442,7 +2442,7 @@ int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
if (!bs) {
|
||||
- blk->ctx = new_context;
|
||||
+ qatomic_set(&blk->ctx, new_context);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2471,7 +2471,7 @@ static void blk_root_set_aio_ctx_commit(void *opaque)
|
||||
AioContext *new_context = s->new_ctx;
|
||||
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
|
||||
|
||||
- blk->ctx = new_context;
|
||||
+ qatomic_set(&blk->ctx, new_context);
|
||||
if (tgm->throttle_state) {
|
||||
throttle_group_detach_aio_context(tgm);
|
||||
throttle_group_attach_aio_context(tgm, new_context);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,386 +0,0 @@
|
||||
From 7baea25be90e184175dd5a919ee5878cbd4970c2 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 4 May 2023 13:57:33 +0200
|
||||
Subject: [PATCH 52/56] block: bdrv/blk_co_unref() for calls in coroutine
|
||||
context
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize()
|
||||
RH-Bugzilla: 2185688
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [1/4] 8ebf8486b082c30ca1b39a6ede35e471eaaccfa3 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
These functions must not be called in coroutine context, because they
|
||||
need write access to the graph.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20230504115750.54437-4-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit b2ab5f545fa1eaaf2955dd617bee19a8b3279786)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block.c | 2 +-
|
||||
block/crypto.c | 6 +++---
|
||||
block/parallels.c | 6 +++---
|
||||
block/qcow.c | 6 +++---
|
||||
block/qcow2.c | 14 +++++++-------
|
||||
block/qed.c | 6 +++---
|
||||
block/vdi.c | 6 +++---
|
||||
block/vhdx.c | 6 +++---
|
||||
block/vmdk.c | 18 +++++++++---------
|
||||
block/vpc.c | 6 +++---
|
||||
include/block/block-global-state.h | 3 ++-
|
||||
include/sysemu/block-backend-global-state.h | 5 ++++-
|
||||
12 files changed, 44 insertions(+), 40 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index d79a52ca74..a48112f945 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -680,7 +680,7 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
diff --git a/block/crypto.c b/block/crypto.c
|
||||
index ca67289187..8fd3ad0054 100644
|
||||
--- a/block/crypto.c
|
||||
+++ b/block/crypto.c
|
||||
@@ -355,7 +355,7 @@ block_crypto_co_create_generic(BlockDriverState *bs, int64_t size,
|
||||
ret = 0;
|
||||
cleanup:
|
||||
qcrypto_block_free(crypto);
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -661,7 +661,7 @@ block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp)
|
||||
|
||||
ret = 0;
|
||||
fail:
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -730,7 +730,7 @@ fail:
|
||||
bdrv_co_delete_file_noerr(bs);
|
||||
}
|
||||
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_QCryptoBlockCreateOptions(create_opts);
|
||||
qobject_unref(cryptoopts);
|
||||
return ret;
|
||||
diff --git a/block/parallels.c b/block/parallels.c
|
||||
index 013684801a..b49c35929e 100644
|
||||
--- a/block/parallels.c
|
||||
+++ b/block/parallels.c
|
||||
@@ -613,8 +613,8 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts,
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
return ret;
|
||||
|
||||
exit:
|
||||
@@ -691,7 +691,7 @@ parallels_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
|
||||
done:
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/qcow.c b/block/qcow.c
|
||||
index 490e4f819e..a0c701f578 100644
|
||||
--- a/block/qcow.c
|
||||
+++ b/block/qcow.c
|
||||
@@ -915,8 +915,8 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
|
||||
g_free(tmp);
|
||||
ret = 0;
|
||||
exit:
|
||||
- blk_unref(qcow_blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(qcow_blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
qcrypto_block_free(crypto);
|
||||
return ret;
|
||||
}
|
||||
@@ -1015,7 +1015,7 @@ qcow_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
fail:
|
||||
g_free(backing_fmt);
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/qcow2.c b/block/qcow2.c
|
||||
index 22084730f9..0b8beb8b47 100644
|
||||
--- a/block/qcow2.c
|
||||
+++ b/block/qcow2.c
|
||||
@@ -3711,7 +3711,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
|
||||
goto out;
|
||||
}
|
||||
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
blk = NULL;
|
||||
|
||||
/*
|
||||
@@ -3791,7 +3791,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
|
||||
}
|
||||
}
|
||||
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
blk = NULL;
|
||||
|
||||
/* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning.
|
||||
@@ -3816,9 +3816,9 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
- bdrv_unref(data_bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
+ bdrv_co_unref(data_bs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -3949,8 +3949,8 @@ finish:
|
||||
}
|
||||
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
- bdrv_unref(data_bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
+ bdrv_co_unref(data_bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/qed.c b/block/qed.c
|
||||
index 0705a7b4e2..aff2a2076e 100644
|
||||
--- a/block/qed.c
|
||||
+++ b/block/qed.c
|
||||
@@ -748,8 +748,8 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
|
||||
ret = 0; /* success */
|
||||
out:
|
||||
g_free(l1_table);
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -819,7 +819,7 @@ bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
|
||||
fail:
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/vdi.c b/block/vdi.c
|
||||
index f2434d6153..08331d2dd7 100644
|
||||
--- a/block/vdi.c
|
||||
+++ b/block/vdi.c
|
||||
@@ -886,8 +886,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
|
||||
|
||||
ret = 0;
|
||||
exit:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs_file);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs_file);
|
||||
g_free(bmap);
|
||||
return ret;
|
||||
}
|
||||
@@ -975,7 +975,7 @@ vdi_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
done:
|
||||
qobject_unref(qdict);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
- bdrv_unref(bs_file);
|
||||
+ bdrv_co_unref(bs_file);
|
||||
return ret;
|
||||
}
|
||||
|
||||
diff --git a/block/vhdx.c b/block/vhdx.c
|
||||
index 81420722a1..00777da91a 100644
|
||||
--- a/block/vhdx.c
|
||||
+++ b/block/vhdx.c
|
||||
@@ -2053,8 +2053,8 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts,
|
||||
|
||||
ret = 0;
|
||||
delete_and_exit:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
g_free(creator);
|
||||
return ret;
|
||||
}
|
||||
@@ -2144,7 +2144,7 @@ vhdx_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
|
||||
fail:
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/vmdk.c b/block/vmdk.c
|
||||
index f5f49018fe..01ca13c82b 100644
|
||||
--- a/block/vmdk.c
|
||||
+++ b/block/vmdk.c
|
||||
@@ -2306,7 +2306,7 @@ exit:
|
||||
if (pbb) {
|
||||
*pbb = blk;
|
||||
} else {
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
blk = NULL;
|
||||
}
|
||||
}
|
||||
@@ -2516,12 +2516,12 @@ vmdk_co_do_create(int64_t size,
|
||||
if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) {
|
||||
error_setg(errp, "Invalid backing file format: %s. Must be vmdk",
|
||||
blk_bs(backing)->drv->format_name);
|
||||
- blk_unref(backing);
|
||||
+ blk_co_unref(backing);
|
||||
ret = -EINVAL;
|
||||
goto exit;
|
||||
}
|
||||
ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid);
|
||||
- blk_unref(backing);
|
||||
+ blk_co_unref(backing);
|
||||
if (ret) {
|
||||
error_setg(errp, "Failed to read parent CID");
|
||||
goto exit;
|
||||
@@ -2542,14 +2542,14 @@ vmdk_co_do_create(int64_t size,
|
||||
blk_bs(extent_blk)->filename);
|
||||
created_size += cur_size;
|
||||
extent_idx++;
|
||||
- blk_unref(extent_blk);
|
||||
+ blk_co_unref(extent_blk);
|
||||
}
|
||||
|
||||
/* Check whether we got excess extents */
|
||||
extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain,
|
||||
opaque, NULL);
|
||||
if (extent_blk) {
|
||||
- blk_unref(extent_blk);
|
||||
+ blk_co_unref(extent_blk);
|
||||
error_setg(errp, "List of extents contains unused extents");
|
||||
ret = -EINVAL;
|
||||
goto exit;
|
||||
@@ -2590,7 +2590,7 @@ vmdk_co_do_create(int64_t size,
|
||||
ret = 0;
|
||||
exit:
|
||||
if (blk) {
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
}
|
||||
g_free(desc);
|
||||
g_free(parent_desc_line);
|
||||
@@ -2641,7 +2641,7 @@ vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split,
|
||||
errp)) {
|
||||
goto exit;
|
||||
}
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
exit:
|
||||
g_free(ext_filename);
|
||||
return blk;
|
||||
@@ -2797,12 +2797,12 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx,
|
||||
return NULL;
|
||||
}
|
||||
blk_set_allow_write_beyond_eof(blk, true);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
|
||||
if (size != -1) {
|
||||
ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp);
|
||||
if (ret) {
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
blk = NULL;
|
||||
}
|
||||
}
|
||||
diff --git a/block/vpc.c b/block/vpc.c
|
||||
index b89b0ff8e2..07ddda5b99 100644
|
||||
--- a/block/vpc.c
|
||||
+++ b/block/vpc.c
|
||||
@@ -1082,8 +1082,8 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts,
|
||||
}
|
||||
|
||||
out:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1162,7 +1162,7 @@ vpc_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
|
||||
fail:
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
|
||||
index 399200a9a3..cd4ea554bf 100644
|
||||
--- a/include/block/block-global-state.h
|
||||
+++ b/include/block/block-global-state.h
|
||||
@@ -214,7 +214,8 @@ void bdrv_img_create(const char *filename, const char *fmt,
|
||||
bool quiet, Error **errp);
|
||||
|
||||
void bdrv_ref(BlockDriverState *bs);
|
||||
-void bdrv_unref(BlockDriverState *bs);
|
||||
+void no_coroutine_fn bdrv_unref(BlockDriverState *bs);
|
||||
+void coroutine_fn no_co_wrapper bdrv_co_unref(BlockDriverState *bs);
|
||||
void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
|
||||
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
|
||||
BlockDriverState *child_bs,
|
||||
diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h
|
||||
index 2b6d27db7c..fa83f9389c 100644
|
||||
--- a/include/sysemu/block-backend-global-state.h
|
||||
+++ b/include/sysemu/block-backend-global-state.h
|
||||
@@ -42,7 +42,10 @@ blk_co_new_open(const char *filename, const char *reference, QDict *options,
|
||||
|
||||
int blk_get_refcnt(BlockBackend *blk);
|
||||
void blk_ref(BlockBackend *blk);
|
||||
-void blk_unref(BlockBackend *blk);
|
||||
+
|
||||
+void no_coroutine_fn blk_unref(BlockBackend *blk);
|
||||
+void coroutine_fn no_co_wrapper blk_co_unref(BlockBackend *blk);
|
||||
+
|
||||
void blk_remove_all_bs(void);
|
||||
BlockBackend *blk_by_name(const char *name);
|
||||
BlockBackend *blk_next(BlockBackend *blk);
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,74 +0,0 @@
|
||||
From b1f0546548e561856252c2bc610a8f4f8fcdf007 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Wed, 26 Jul 2023 09:48:07 +0200
|
||||
Subject: [PATCH 02/14] block/blkio: do not use open flags in qemu_open()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [2/6] 1ccd0ef56182bb5e2374c3b5be98ee1ec05066d6 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
qemu_open() in blkio_virtio_blk_common_open() is used to open the
|
||||
character device (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or in
|
||||
the future eventually the unix socket.
|
||||
|
||||
In all these cases we cannot open the path in read-only mode,
|
||||
when the `read-only` option of blockdev is on, because the exchange
|
||||
of IOCTL commands for example will fail.
|
||||
|
||||
In order to open the device read-only, we have to use the `read-only`
|
||||
property of the libblkio driver as we already do in blkio_file_open().
|
||||
|
||||
Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk")
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2225439
|
||||
Reported-by: Qing Wang <qinwang@redhat.com>
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Message-id: 20230726074807.14041-1-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit a5942c177b7bcc1357e496b7d68668befcfc2bb9)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 21 ++++++++++++---------
|
||||
1 file changed, 12 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 3ea9841bd8..5a82c6cb1a 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -685,15 +685,18 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs,
|
||||
* layer through the "/dev/fdset/N" special path.
|
||||
*/
|
||||
if (fd_supported) {
|
||||
- int open_flags;
|
||||
-
|
||||
- if (flags & BDRV_O_RDWR) {
|
||||
- open_flags = O_RDWR;
|
||||
- } else {
|
||||
- open_flags = O_RDONLY;
|
||||
- }
|
||||
-
|
||||
- fd = qemu_open(path, open_flags, errp);
|
||||
+ /*
|
||||
+ * `path` can contain the path of a character device
|
||||
+ * (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or a unix socket.
|
||||
+ *
|
||||
+ * So, we should always open it with O_RDWR flag, also if BDRV_O_RDWR
|
||||
+ * is not set in the open flags, because the exchange of IOCTL commands
|
||||
+ * for example will fail.
|
||||
+ *
|
||||
+ * In order to open the device read-only, we are using the `read-only`
|
||||
+ * property of the libblkio driver in blkio_file_open().
|
||||
+ */
|
||||
+ fd = qemu_open(path, O_RDWR, errp);
|
||||
if (fd < 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,54 +0,0 @@
|
||||
From ef99db21e9469f3fc946b7bf3edc1837d7b24e0b Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Tue, 25 Jul 2023 12:37:44 +0200
|
||||
Subject: [PATCH 01/14] block/blkio: enable the completion eventfd
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [1/6] d91b3a465942863550130105ae2f38f47a82a360 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Until libblkio 1.3.0, virtio-blk drivers had completion eventfd
|
||||
notifications enabled from the start, but from the next releases
|
||||
this is no longer the case, so we have to explicitly enable them.
|
||||
|
||||
In fact, the libblkio documentation says they could be disabled,
|
||||
so we should always enable them at the start if we want to be
|
||||
sure to get completion eventfd notifications:
|
||||
|
||||
By default, the driver might not generate completion events for
|
||||
requests so it is necessary to explicitly enable the completion
|
||||
file descriptor before use:
|
||||
|
||||
void blkioq_set_completion_fd_enabled(struct blkioq *q, bool enable);
|
||||
|
||||
I discovered this while trying a development version of libblkio:
|
||||
the guest kernel hangs during boot, while probing the device.
|
||||
|
||||
Fixes: fd66dbd424f5 ("blkio: add libblkio block driver")
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230725103744.77343-1-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 9359c459889fce1804c4e1b2a2ff8f182b4a9ae8)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index afcec359f2..3ea9841bd8 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -844,6 +844,7 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
QLIST_INIT(&s->bounce_bufs);
|
||||
s->blkioq = blkio_get_queue(s->blkio, 0);
|
||||
s->completion_fd = blkioq_get_completion_fd(s->blkioq);
|
||||
+ blkioq_set_completion_fd_enabled(s->blkioq, true);
|
||||
|
||||
blkio_attach_aio_context(bs, bdrv_get_aio_context(bs));
|
||||
return 0;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,67 +0,0 @@
|
||||
From c1ce3ba81698b9d52ac9dff83c01ee8141ca403d Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Thu, 27 Jul 2023 18:10:19 +0200
|
||||
Subject: [PATCH 05/14] block/blkio: fall back on using `path` when `fd`
|
||||
setting fails
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [5/6] c03cea95146a59b2830ffe2dd56ef77a6630ce3e (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
qemu_open() fails if called with an unix domain socket in this way:
|
||||
-blockdev node-name=drive0,driver=virtio-blk-vhost-user,path=vhost-user-blk.sock,cache.direct=on: Could not open 'vhost-user-blk.sock': No such device or address
|
||||
|
||||
Since virtio-blk-vhost-user does not support fd passing, let`s always fall back
|
||||
on using `path` if we fail the fd passing.
|
||||
|
||||
Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk")
|
||||
Reported-by: Qing Wang <qinwang@redhat.com>
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230727161020.84213-4-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 723bea27b127969931fa26bc0de79372a3d9e148)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 20 ++++++++++----------
|
||||
1 file changed, 10 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 93a8f8fc5c..eef80e9ce5 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -710,19 +710,19 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
|
||||
* In order to open the device read-only, we are using the `read-only`
|
||||
* property of the libblkio driver in blkio_file_open().
|
||||
*/
|
||||
- fd = qemu_open(path, O_RDWR, errp);
|
||||
+ fd = qemu_open(path, O_RDWR, NULL);
|
||||
if (fd < 0) {
|
||||
- return -EINVAL;
|
||||
+ fd_supported = false;
|
||||
+ } else {
|
||||
+ ret = blkio_set_int(s->blkio, "fd", fd);
|
||||
+ if (ret < 0) {
|
||||
+ fd_supported = false;
|
||||
+ qemu_close(fd);
|
||||
+ }
|
||||
}
|
||||
+ }
|
||||
|
||||
- ret = blkio_set_int(s->blkio, "fd", fd);
|
||||
- if (ret < 0) {
|
||||
- error_setg_errno(errp, -ret, "failed to set fd: %s",
|
||||
- blkio_get_error_msg());
|
||||
- qemu_close(fd);
|
||||
- return ret;
|
||||
- }
|
||||
- } else {
|
||||
+ if (!fd_supported) {
|
||||
ret = blkio_set_str(s->blkio, "path", path);
|
||||
if (ret < 0) {
|
||||
error_setg_errno(errp, -ret, "failed to set path: %s",
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,205 +0,0 @@
|
||||
From 545482400ea87d54b1b839587f8aaad41e30692f Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 4 Jul 2023 14:34:36 +0200
|
||||
Subject: [PATCH 36/37] block/blkio: fix module_block.py parsing
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 181: block/blkio: fix module_block.py parsing
|
||||
RH-Bugzilla: 2213317
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [1/2] c85df95824f4889526a73527771dec9efcb06926 (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
When QEMU is built with --enable-modules, the module_block.py script
|
||||
parses block/*.c to find block drivers that are built as modules. The
|
||||
script generates a table of block drivers called block_driver_modules[].
|
||||
This table is used for block driver module loading.
|
||||
|
||||
The blkio.c driver uses macros to define its BlockDriver structs. This
|
||||
was done to avoid code duplication but the module_block.py script is
|
||||
unable to parse the macro. The result is that libblkio-based block
|
||||
drivers can be built as modules but will not be found at runtime.
|
||||
|
||||
One fix is to make the module_block.py script or build system fancier so
|
||||
it can parse C macros (e.g. by parsing the preprocessed source code). I
|
||||
chose not to do this because it raises the complexity of the build,
|
||||
making future issues harder to debug.
|
||||
|
||||
Keep things simple: use the macro to avoid duplicating BlockDriver
|
||||
function pointers but define .format_name and .protocol_name manually
|
||||
for each BlockDriver. This way the module_block.py is able to parse the
|
||||
code.
|
||||
|
||||
Also get rid of the block driver name macros (e.g. DRIVER_IO_URING)
|
||||
because module_block.py cannot parse them either.
|
||||
|
||||
Fixes: fd66dbd424f5 ("blkio: add libblkio block driver")
|
||||
Reported-by: Qing Wang <qinwang@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230704123436.187761-1-stefanha@redhat.com
|
||||
Cc: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit c21eae1ccc782440f320accb6f90c66cb8f45ee9)
|
||||
|
||||
Conflicts:
|
||||
- Downstream lacks commit 28ff7b4dfbb5 ("block/blkio: convert to
|
||||
blk_io_plug_call() API") so keep the .bdrv_co_io_unplug callback.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
block/blkio.c | 118 ++++++++++++++++++++++++++------------------------
|
||||
1 file changed, 61 insertions(+), 57 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 6a6f20f923..afcec359f2 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -21,16 +21,6 @@
|
||||
|
||||
#include "block/block-io.h"
|
||||
|
||||
-/*
|
||||
- * Keep the QEMU BlockDriver names identical to the libblkio driver names.
|
||||
- * Using macros instead of typing out the string literals avoids typos.
|
||||
- */
|
||||
-#define DRIVER_IO_URING "io_uring"
|
||||
-#define DRIVER_NVME_IO_URING "nvme-io_uring"
|
||||
-#define DRIVER_VIRTIO_BLK_VFIO_PCI "virtio-blk-vfio-pci"
|
||||
-#define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user"
|
||||
-#define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa"
|
||||
-
|
||||
/*
|
||||
* Allocated bounce buffers are kept in a list sorted by buffer address.
|
||||
*/
|
||||
@@ -743,15 +733,15 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
return ret;
|
||||
}
|
||||
|
||||
- if (strcmp(blkio_driver, DRIVER_IO_URING) == 0) {
|
||||
+ if (strcmp(blkio_driver, "io_uring") == 0) {
|
||||
ret = blkio_io_uring_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) {
|
||||
+ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
|
||||
ret = blkio_nvme_io_uring(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VFIO_PCI) == 0) {
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
|
||||
ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) {
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
|
||||
ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) {
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
|
||||
ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
} else {
|
||||
g_assert_not_reached();
|
||||
@@ -1027,50 +1017,64 @@ static void blkio_refresh_limits(BlockDriverState *bs, Error **errp)
|
||||
* - truncate
|
||||
*/
|
||||
|
||||
-#define BLKIO_DRIVER(name, ...) \
|
||||
- { \
|
||||
- .format_name = name, \
|
||||
- .protocol_name = name, \
|
||||
- .instance_size = sizeof(BDRVBlkioState), \
|
||||
- .bdrv_file_open = blkio_file_open, \
|
||||
- .bdrv_close = blkio_close, \
|
||||
- .bdrv_co_getlength = blkio_co_getlength, \
|
||||
- .bdrv_co_truncate = blkio_truncate, \
|
||||
- .bdrv_co_get_info = blkio_co_get_info, \
|
||||
- .bdrv_attach_aio_context = blkio_attach_aio_context, \
|
||||
- .bdrv_detach_aio_context = blkio_detach_aio_context, \
|
||||
- .bdrv_co_pdiscard = blkio_co_pdiscard, \
|
||||
- .bdrv_co_preadv = blkio_co_preadv, \
|
||||
- .bdrv_co_pwritev = blkio_co_pwritev, \
|
||||
- .bdrv_co_flush_to_disk = blkio_co_flush, \
|
||||
- .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
|
||||
- .bdrv_co_io_unplug = blkio_co_io_unplug, \
|
||||
- .bdrv_refresh_limits = blkio_refresh_limits, \
|
||||
- .bdrv_register_buf = blkio_register_buf, \
|
||||
- .bdrv_unregister_buf = blkio_unregister_buf, \
|
||||
- __VA_ARGS__ \
|
||||
- }
|
||||
-
|
||||
-static BlockDriver bdrv_io_uring = BLKIO_DRIVER(
|
||||
- DRIVER_IO_URING,
|
||||
- .bdrv_needs_filename = true,
|
||||
-);
|
||||
-
|
||||
-static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER(
|
||||
- DRIVER_NVME_IO_URING,
|
||||
-);
|
||||
-
|
||||
-static BlockDriver bdrv_virtio_blk_vfio_pci = BLKIO_DRIVER(
|
||||
- DRIVER_VIRTIO_BLK_VFIO_PCI
|
||||
-);
|
||||
+/*
|
||||
+ * Do not include .format_name and .protocol_name because module_block.py
|
||||
+ * does not parse macros in the source code.
|
||||
+ */
|
||||
+#define BLKIO_DRIVER_COMMON \
|
||||
+ .instance_size = sizeof(BDRVBlkioState), \
|
||||
+ .bdrv_file_open = blkio_file_open, \
|
||||
+ .bdrv_close = blkio_close, \
|
||||
+ .bdrv_co_getlength = blkio_co_getlength, \
|
||||
+ .bdrv_co_truncate = blkio_truncate, \
|
||||
+ .bdrv_co_get_info = blkio_co_get_info, \
|
||||
+ .bdrv_attach_aio_context = blkio_attach_aio_context, \
|
||||
+ .bdrv_detach_aio_context = blkio_detach_aio_context, \
|
||||
+ .bdrv_co_pdiscard = blkio_co_pdiscard, \
|
||||
+ .bdrv_co_preadv = blkio_co_preadv, \
|
||||
+ .bdrv_co_pwritev = blkio_co_pwritev, \
|
||||
+ .bdrv_co_flush_to_disk = blkio_co_flush, \
|
||||
+ .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
|
||||
+ .bdrv_co_io_unplug = blkio_co_io_unplug, \
|
||||
+ .bdrv_refresh_limits = blkio_refresh_limits, \
|
||||
+ .bdrv_register_buf = blkio_register_buf, \
|
||||
+ .bdrv_unregister_buf = blkio_unregister_buf,
|
||||
|
||||
-static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER(
|
||||
- DRIVER_VIRTIO_BLK_VHOST_USER
|
||||
-);
|
||||
+/*
|
||||
+ * Use the same .format_name and .protocol_name as the libblkio driver name for
|
||||
+ * consistency.
|
||||
+ */
|
||||
|
||||
-static BlockDriver bdrv_virtio_blk_vhost_vdpa = BLKIO_DRIVER(
|
||||
- DRIVER_VIRTIO_BLK_VHOST_VDPA
|
||||
-);
|
||||
+static BlockDriver bdrv_io_uring = {
|
||||
+ .format_name = "io_uring",
|
||||
+ .protocol_name = "io_uring",
|
||||
+ .bdrv_needs_filename = true,
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
+
|
||||
+static BlockDriver bdrv_nvme_io_uring = {
|
||||
+ .format_name = "nvme-io_uring",
|
||||
+ .protocol_name = "nvme-io_uring",
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
+
|
||||
+static BlockDriver bdrv_virtio_blk_vfio_pci = {
|
||||
+ .format_name = "virtio-blk-vfio-pci",
|
||||
+ .protocol_name = "virtio-blk-vfio-pci",
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
+
|
||||
+static BlockDriver bdrv_virtio_blk_vhost_user = {
|
||||
+ .format_name = "virtio-blk-vhost-user",
|
||||
+ .protocol_name = "virtio-blk-vhost-user",
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
+
|
||||
+static BlockDriver bdrv_virtio_blk_vhost_vdpa = {
|
||||
+ .format_name = "virtio-blk-vhost-vdpa",
|
||||
+ .protocol_name = "virtio-blk-vhost-vdpa",
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
|
||||
static void bdrv_blkio_init(void)
|
||||
{
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,151 +0,0 @@
|
||||
From 458c33c9f19ed01beeb9b2b494ce6ed10d2ed4ac Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Thu, 27 Jul 2023 18:10:17 +0200
|
||||
Subject: [PATCH 03/14] block/blkio: move blkio_connect() in the drivers
|
||||
functions
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [3/6] c356108d7dfe1ba2098c094f8d12b6e40853560c (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
This is in preparation for the next patch, where for virtio-blk
|
||||
drivers we need to handle the failure of blkio_connect().
|
||||
|
||||
Let's also rename the *_open() functions to *_connect() to make
|
||||
the code reflect the changes applied.
|
||||
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230727161020.84213-2-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 69785d66ae1ec43f77fc65109a21721992bead9f)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 67 ++++++++++++++++++++++++++++++---------------------
|
||||
1 file changed, 40 insertions(+), 27 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 5a82c6cb1a..85d1eed5fb 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -602,8 +602,8 @@ static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size)
|
||||
}
|
||||
}
|
||||
|
||||
-static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
- Error **errp)
|
||||
+static int blkio_io_uring_connect(BlockDriverState *bs, QDict *options,
|
||||
+ int flags, Error **errp)
|
||||
{
|
||||
const char *filename = qdict_get_str(options, "filename");
|
||||
BDRVBlkioState *s = bs->opaque;
|
||||
@@ -626,11 +626,18 @@ static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
}
|
||||
}
|
||||
|
||||
+ ret = blkio_connect(s->blkio);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags,
|
||||
- Error **errp)
|
||||
+static int blkio_nvme_io_uring_connect(BlockDriverState *bs, QDict *options,
|
||||
+ int flags, Error **errp)
|
||||
{
|
||||
const char *path = qdict_get_try_str(options, "path");
|
||||
BDRVBlkioState *s = bs->opaque;
|
||||
@@ -654,11 +661,18 @@ static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
+ ret = blkio_connect(s->blkio);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int blkio_virtio_blk_common_open(BlockDriverState *bs,
|
||||
- QDict *options, int flags, Error **errp)
|
||||
+static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
|
||||
+ int flags, Error **errp)
|
||||
{
|
||||
const char *path = qdict_get_try_str(options, "path");
|
||||
BDRVBlkioState *s = bs->opaque;
|
||||
@@ -717,6 +731,13 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs,
|
||||
}
|
||||
}
|
||||
|
||||
+ ret = blkio_connect(s->blkio);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
qdict_del(options, "path");
|
||||
|
||||
return 0;
|
||||
@@ -736,24 +757,6 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
return ret;
|
||||
}
|
||||
|
||||
- if (strcmp(blkio_driver, "io_uring") == 0) {
|
||||
- ret = blkio_io_uring_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
|
||||
- ret = blkio_nvme_io_uring(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
|
||||
- ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
|
||||
- ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
|
||||
- ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else {
|
||||
- g_assert_not_reached();
|
||||
- }
|
||||
- if (ret < 0) {
|
||||
- blkio_destroy(&s->blkio);
|
||||
- return ret;
|
||||
- }
|
||||
-
|
||||
if (!(flags & BDRV_O_RDWR)) {
|
||||
ret = blkio_set_bool(s->blkio, "read-only", true);
|
||||
if (ret < 0) {
|
||||
@@ -764,10 +767,20 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
}
|
||||
}
|
||||
|
||||
- ret = blkio_connect(s->blkio);
|
||||
+ if (strcmp(blkio_driver, "io_uring") == 0) {
|
||||
+ ret = blkio_io_uring_connect(bs, options, flags, errp);
|
||||
+ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
|
||||
+ ret = blkio_nvme_io_uring_connect(bs, options, flags, errp);
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
|
||||
+ ret = blkio_virtio_blk_connect(bs, options, flags, errp);
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
|
||||
+ ret = blkio_virtio_blk_connect(bs, options, flags, errp);
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
|
||||
+ ret = blkio_virtio_blk_connect(bs, options, flags, errp);
|
||||
+ } else {
|
||||
+ g_assert_not_reached();
|
||||
+ }
|
||||
if (ret < 0) {
|
||||
- error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
- blkio_get_error_msg());
|
||||
blkio_destroy(&s->blkio);
|
||||
return ret;
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,85 +0,0 @@
|
||||
From ece855a71d9234c58497f37cb5498f507742167d Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Thu, 27 Jul 2023 18:10:18 +0200
|
||||
Subject: [PATCH 04/14] block/blkio: retry blkio_connect() if it fails using
|
||||
`fd`
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [4/6] 14ebc1f333617ce22c68693dec1c9a186d4f8a08 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
libblkio 1.3.0 added support of "fd" property for virtio-blk-vhost-vdpa
|
||||
driver. In QEMU, starting from commit cad2ccc395 ("block/blkio: use
|
||||
qemu_open() to support fd passing for virtio-blk") we are using
|
||||
`blkio_get_int(..., "fd")` to check if the "fd" property is supported
|
||||
for all the virtio-blk-* driver.
|
||||
|
||||
Unfortunately that property is also available for those driver that do
|
||||
not support it, such as virtio-blk-vhost-user.
|
||||
|
||||
So, `blkio_get_int()` is not enough to check whether the driver supports
|
||||
the `fd` property or not. This is because the virito-blk common libblkio
|
||||
driver only checks whether or not `fd` is set during `blkio_connect()`
|
||||
and fails with -EINVAL for those transports that do not support it
|
||||
(all except vhost-vdpa for now).
|
||||
|
||||
So let's handle the `blkio_connect()` failure, retrying it using `path`
|
||||
directly.
|
||||
|
||||
Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk")
|
||||
Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230727161020.84213-3-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 809c319f8a089fbc49223dc29e1cc2b978beeada)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 29 +++++++++++++++++++++++++++++
|
||||
1 file changed, 29 insertions(+)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 85d1eed5fb..93a8f8fc5c 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -732,6 +732,35 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
|
||||
}
|
||||
|
||||
ret = blkio_connect(s->blkio);
|
||||
+ /*
|
||||
+ * If the libblkio driver doesn't support the `fd` property, blkio_connect()
|
||||
+ * will fail with -EINVAL. So let's try calling blkio_connect() again by
|
||||
+ * directly setting `path`.
|
||||
+ */
|
||||
+ if (fd_supported && ret == -EINVAL) {
|
||||
+ qemu_close(fd);
|
||||
+
|
||||
+ /*
|
||||
+ * We need to clear the `fd` property we set previously by setting
|
||||
+ * it to -1.
|
||||
+ */
|
||||
+ ret = blkio_set_int(s->blkio, "fd", -1);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "failed to set fd: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ ret = blkio_set_str(s->blkio, "path", path);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "failed to set path: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ ret = blkio_connect(s->blkio);
|
||||
+ }
|
||||
+
|
||||
if (ret < 0) {
|
||||
error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
blkio_get_error_msg());
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,49 +0,0 @@
|
||||
From 2f4436e7cc2f63d198229dc8ba32783460c0b185 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Thu, 27 Jul 2023 18:10:20 +0200
|
||||
Subject: [PATCH 06/14] block/blkio: use blkio_set_int("fd") to check fd
|
||||
support
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [6/6] d57aafb2c3a8ed13aa3c6dcce5525a9cc8f5aa21 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Setting the `fd` property fails with virtio-blk-* libblkio drivers
|
||||
that do not support fd passing since
|
||||
https://gitlab.com/libblkio/libblkio/-/merge_requests/208.
|
||||
|
||||
Getting the `fd` property, on the other hand, always succeeds for
|
||||
virtio-blk-* libblkio drivers even when they don't support fd passing.
|
||||
|
||||
This patch switches to setting the `fd` property because it is a
|
||||
better mechanism for probing fd passing support than getting the `fd`
|
||||
property.
|
||||
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230727161020.84213-5-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 1c38fe69e2b8a05c1762b122292fa7e3662f06fd)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index eef80e9ce5..8defbf744f 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -689,7 +689,7 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
- if (blkio_get_int(s->blkio, "fd", &fd) == 0) {
|
||||
+ if (blkio_set_int(s->blkio, "fd", -1) == 0) {
|
||||
fd_supported = true;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,108 +0,0 @@
|
||||
From fd57241cf0f8c2906fa56118f8da1e65a5b1e4d8 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Tue, 30 May 2023 09:19:40 +0200
|
||||
Subject: [PATCH 3/5] block/blkio: use qemu_open() to support fd passing for
|
||||
virtio-blk
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 169: block/blkio: support fd passing for virtio-blk-vhost-vdpa driver
|
||||
RH-Bugzilla: 2180076
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [1/2] 9ff1a1510500db101648341207a36318a0c41c5a (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Some virtio-blk drivers (e.g. virtio-blk-vhost-vdpa) supports the fd
|
||||
passing. Let's expose this to the user, so the management layer
|
||||
can pass the file descriptor of an already opened path.
|
||||
|
||||
If the libblkio virtio-blk driver supports fd passing, let's always
|
||||
use qemu_open() to open the `path`, so we can handle fd passing
|
||||
from the management layer through the "/dev/fdset/N" special path.
|
||||
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230530071941.8954-2-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit cad2ccc395c7113fb30bc9390774b67b34f06c68)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 53 ++++++++++++++++++++++++++++++++++++++++++---------
|
||||
1 file changed, 44 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 0cdc99a729..6a6f20f923 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -672,25 +672,60 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs,
|
||||
{
|
||||
const char *path = qdict_get_try_str(options, "path");
|
||||
BDRVBlkioState *s = bs->opaque;
|
||||
- int ret;
|
||||
+ bool fd_supported = false;
|
||||
+ int fd, ret;
|
||||
|
||||
if (!path) {
|
||||
error_setg(errp, "missing 'path' option");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
- ret = blkio_set_str(s->blkio, "path", path);
|
||||
- qdict_del(options, "path");
|
||||
- if (ret < 0) {
|
||||
- error_setg_errno(errp, -ret, "failed to set path: %s",
|
||||
- blkio_get_error_msg());
|
||||
- return ret;
|
||||
- }
|
||||
-
|
||||
if (!(flags & BDRV_O_NOCACHE)) {
|
||||
error_setg(errp, "cache.direct=off is not supported");
|
||||
return -EINVAL;
|
||||
}
|
||||
+
|
||||
+ if (blkio_get_int(s->blkio, "fd", &fd) == 0) {
|
||||
+ fd_supported = true;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * If the libblkio driver supports fd passing, let's always use qemu_open()
|
||||
+ * to open the `path`, so we can handle fd passing from the management
|
||||
+ * layer through the "/dev/fdset/N" special path.
|
||||
+ */
|
||||
+ if (fd_supported) {
|
||||
+ int open_flags;
|
||||
+
|
||||
+ if (flags & BDRV_O_RDWR) {
|
||||
+ open_flags = O_RDWR;
|
||||
+ } else {
|
||||
+ open_flags = O_RDONLY;
|
||||
+ }
|
||||
+
|
||||
+ fd = qemu_open(path, open_flags, errp);
|
||||
+ if (fd < 0) {
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ ret = blkio_set_int(s->blkio, "fd", fd);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "failed to set fd: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ qemu_close(fd);
|
||||
+ return ret;
|
||||
+ }
|
||||
+ } else {
|
||||
+ ret = blkio_set_str(s->blkio, "path", path);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "failed to set path: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ qdict_del(options, "path");
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,121 +0,0 @@
|
||||
From d9190117f3c701380701d6e9b2aa3c2446b9708f Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Mon, 1 May 2023 13:34:43 -0400
|
||||
Subject: [PATCH 01/21] block: compile out assert_bdrv_graph_readable() by
|
||||
default
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 166: block/graph-lock: Disable locking for now
|
||||
RH-Bugzilla: 2186725
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [1/4] d8cb4bb832c85e8216d97e57679a34c7bc6a8f71 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
reader_count() is a performance bottleneck because the global
|
||||
aio_context_list_lock mutex causes thread contention. Put this debugging
|
||||
assertion behind a new ./configure --enable-debug-graph-lock option and
|
||||
disable it by default.
|
||||
|
||||
The --enable-debug-graph-lock option is also enabled by the more general
|
||||
--enable-debug option.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20230501173443.153062-1-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 58a2e3f5c37be02dac3086b81bdda9414b931edf)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block/graph-lock.c | 3 +++
|
||||
configure | 1 +
|
||||
meson.build | 2 ++
|
||||
meson_options.txt | 2 ++
|
||||
scripts/meson-buildoptions.sh | 4 ++++
|
||||
5 files changed, 12 insertions(+)
|
||||
|
||||
diff --git a/block/graph-lock.c b/block/graph-lock.c
|
||||
index 454c31e691..259a7a0bde 100644
|
||||
--- a/block/graph-lock.c
|
||||
+++ b/block/graph-lock.c
|
||||
@@ -265,7 +265,10 @@ void bdrv_graph_rdunlock_main_loop(void)
|
||||
|
||||
void assert_bdrv_graph_readable(void)
|
||||
{
|
||||
+ /* reader_count() is slow due to aio_context_list_lock lock contention */
|
||||
+#ifdef CONFIG_DEBUG_GRAPH_LOCK
|
||||
assert(qemu_in_main_thread() || reader_count());
|
||||
+#endif
|
||||
}
|
||||
|
||||
void assert_bdrv_graph_writable(void)
|
||||
diff --git a/configure b/configure
|
||||
index 800b5850f4..a62a3e6be9 100755
|
||||
--- a/configure
|
||||
+++ b/configure
|
||||
@@ -806,6 +806,7 @@ for opt do
|
||||
--enable-debug)
|
||||
# Enable debugging options that aren't excessively noisy
|
||||
debug_tcg="yes"
|
||||
+ meson_option_parse --enable-debug-graph-lock ""
|
||||
meson_option_parse --enable-debug-mutex ""
|
||||
meson_option_add -Doptimization=0
|
||||
fortify_source="no"
|
||||
diff --git a/meson.build b/meson.build
|
||||
index c44d05a13f..d964e741e7 100644
|
||||
--- a/meson.build
|
||||
+++ b/meson.build
|
||||
@@ -1956,6 +1956,7 @@ if get_option('debug_stack_usage') and have_coroutine_pool
|
||||
have_coroutine_pool = false
|
||||
endif
|
||||
config_host_data.set10('CONFIG_COROUTINE_POOL', have_coroutine_pool)
|
||||
+config_host_data.set('CONFIG_DEBUG_GRAPH_LOCK', get_option('debug_graph_lock'))
|
||||
config_host_data.set('CONFIG_DEBUG_MUTEX', get_option('debug_mutex'))
|
||||
config_host_data.set('CONFIG_DEBUG_STACK_USAGE', get_option('debug_stack_usage'))
|
||||
config_host_data.set('CONFIG_GPROF', get_option('gprof'))
|
||||
@@ -3833,6 +3834,7 @@ summary_info += {'PIE': get_option('b_pie')}
|
||||
summary_info += {'static build': config_host.has_key('CONFIG_STATIC')}
|
||||
summary_info += {'malloc trim support': has_malloc_trim}
|
||||
summary_info += {'membarrier': have_membarrier}
|
||||
+summary_info += {'debug graph lock': get_option('debug_graph_lock')}
|
||||
summary_info += {'debug stack usage': get_option('debug_stack_usage')}
|
||||
summary_info += {'mutex debugging': get_option('debug_mutex')}
|
||||
summary_info += {'memory allocator': get_option('malloc')}
|
||||
diff --git a/meson_options.txt b/meson_options.txt
|
||||
index fc9447d267..bc857fe68b 100644
|
||||
--- a/meson_options.txt
|
||||
+++ b/meson_options.txt
|
||||
@@ -311,6 +311,8 @@ option('rng_none', type: 'boolean', value: false,
|
||||
description: 'dummy RNG, avoid using /dev/(u)random and getrandom()')
|
||||
option('coroutine_pool', type: 'boolean', value: true,
|
||||
description: 'coroutine freelist (better performance)')
|
||||
+option('debug_graph_lock', type: 'boolean', value: false,
|
||||
+ description: 'graph lock debugging support')
|
||||
option('debug_mutex', type: 'boolean', value: false,
|
||||
description: 'mutex debugging support')
|
||||
option('debug_stack_usage', type: 'boolean', value: false,
|
||||
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
|
||||
index 009fab1515..30e1f25259 100644
|
||||
--- a/scripts/meson-buildoptions.sh
|
||||
+++ b/scripts/meson-buildoptions.sh
|
||||
@@ -21,6 +21,8 @@ meson_options_help() {
|
||||
printf "%s\n" ' QEMU'
|
||||
printf "%s\n" ' --enable-cfi Control-Flow Integrity (CFI)'
|
||||
printf "%s\n" ' --enable-cfi-debug Verbose errors in case of CFI violation'
|
||||
+ printf "%s\n" ' --enable-debug-graph-lock'
|
||||
+ printf "%s\n" ' graph lock debugging support'
|
||||
printf "%s\n" ' --enable-debug-mutex mutex debugging support'
|
||||
printf "%s\n" ' --enable-debug-stack-usage'
|
||||
printf "%s\n" ' measure coroutine stack usage'
|
||||
@@ -249,6 +251,8 @@ _meson_option_parse() {
|
||||
--datadir=*) quote_sh "-Ddatadir=$2" ;;
|
||||
--enable-dbus-display) printf "%s" -Ddbus_display=enabled ;;
|
||||
--disable-dbus-display) printf "%s" -Ddbus_display=disabled ;;
|
||||
+ --enable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=true ;;
|
||||
+ --disable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=false ;;
|
||||
--enable-debug-mutex) printf "%s" -Ddebug_mutex=true ;;
|
||||
--disable-debug-mutex) printf "%s" -Ddebug_mutex=false ;;
|
||||
--enable-debug-stack-usage) printf "%s" -Ddebug_stack_usage=true ;;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,69 @@
|
||||
From b1a68aebadecd7d339cf5eaffeda15099c998528 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 12 Sep 2023 19:10:37 -0400
|
||||
Subject: [PATCH 095/101] block-coroutine-wrapper: use
|
||||
qemu_get_current_aio_context()
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [26/26] cde767bcdc626e90721792e3889952057a548ac5 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
Use qemu_get_current_aio_context() in mixed wrappers and coroutine
|
||||
wrappers so that code runs in the caller's AioContext instead of moving
|
||||
to the BlockDriverState's AioContext. This change is necessary for the
|
||||
multi-queue block layer where any thread can call into the block layer.
|
||||
|
||||
Most wrappers are IO_CODE where it's safe to use the current AioContext
|
||||
nowadays. BlockDrivers and the core block layer use their own locks and
|
||||
no longer depend on the AioContext lock for thread-safety.
|
||||
|
||||
The bdrv_create() wrapper invokes GLOBAL_STATE code. Using the current
|
||||
AioContext is safe because this code is only called with the BQL held
|
||||
from the main loop thread.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20230912231037.826804-6-stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
scripts/block-coroutine-wrapper.py | 6 ++----
|
||||
1 file changed, 2 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py
|
||||
index c9c09fcacd..dbbde99e39 100644
|
||||
--- a/scripts/block-coroutine-wrapper.py
|
||||
+++ b/scripts/block-coroutine-wrapper.py
|
||||
@@ -92,8 +92,6 @@ def __init__(self, wrapper_type: str, return_type: str, name: str,
|
||||
f"{self.name}")
|
||||
self.target_name = f'{subsystem}_{subname}'
|
||||
|
||||
- self.ctx = self.gen_ctx()
|
||||
-
|
||||
self.get_result = 's->ret = '
|
||||
self.ret = 'return s.ret;'
|
||||
self.co_ret = 'return '
|
||||
@@ -167,7 +165,7 @@ def create_mixed_wrapper(func: FuncDecl) -> str:
|
||||
{func.co_ret}{name}({ func.gen_list('{name}') });
|
||||
}} else {{
|
||||
{struct_name} s = {{
|
||||
- .poll_state.ctx = {func.ctx},
|
||||
+ .poll_state.ctx = qemu_get_current_aio_context(),
|
||||
.poll_state.in_progress = true,
|
||||
|
||||
{ func.gen_block(' .{name} = {name},') }
|
||||
@@ -191,7 +189,7 @@ def create_co_wrapper(func: FuncDecl) -> str:
|
||||
{func.return_type} {func.name}({ func.gen_list('{decl}') })
|
||||
{{
|
||||
{struct_name} s = {{
|
||||
- .poll_state.ctx = {func.ctx},
|
||||
+ .poll_state.ctx = qemu_get_current_aio_context(),
|
||||
.poll_state.in_progress = true,
|
||||
|
||||
{ func.gen_block(' .{name} = {name},') }
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,217 @@
|
||||
From 25cce5df341861e8ba8ec57722558e2dee3ce56a Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Thu, 14 Sep 2023 10:00:58 -0400
|
||||
Subject: [PATCH 073/101] block/file-posix: set up Linux AIO and io_uring in
|
||||
the current thread
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [4/26] 74c7daf805daefe706378308c3afeb28d861164b (kmwolf/centos-qemu-kvm)
|
||||
|
||||
The file-posix block driver currently only sets up Linux AIO and
|
||||
io_uring in the BDS's AioContext. In the multi-queue block layer we must
|
||||
be able to submit I/O requests in AioContexts that do not have Linux AIO
|
||||
and io_uring set up yet since any thread can call into the block driver.
|
||||
|
||||
Set up Linux AIO and io_uring for the current AioContext during request
|
||||
submission. We lose the ability to return an error from
|
||||
.bdrv_file_open() when Linux AIO and io_uring setup fails (e.g. due to
|
||||
resource limits). Instead the user only gets warnings and we fall back
|
||||
to aio=threads. This is still better than a fatal error after startup.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20230914140101.1065008-2-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block/file-posix.c | 103 ++++++++++++++++++++++-----------------------
|
||||
1 file changed, 51 insertions(+), 52 deletions(-)
|
||||
|
||||
diff --git a/block/file-posix.c b/block/file-posix.c
|
||||
index b862406c71..35684f7e21 100644
|
||||
--- a/block/file-posix.c
|
||||
+++ b/block/file-posix.c
|
||||
@@ -712,17 +712,11 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
|
||||
|
||||
#ifdef CONFIG_LINUX_AIO
|
||||
/* Currently Linux does AIO only for files opened with O_DIRECT */
|
||||
- if (s->use_linux_aio) {
|
||||
- if (!(s->open_flags & O_DIRECT)) {
|
||||
- error_setg(errp, "aio=native was specified, but it requires "
|
||||
- "cache.direct=on, which was not specified.");
|
||||
- ret = -EINVAL;
|
||||
- goto fail;
|
||||
- }
|
||||
- if (!aio_setup_linux_aio(bdrv_get_aio_context(bs), errp)) {
|
||||
- error_prepend(errp, "Unable to use native AIO: ");
|
||||
- goto fail;
|
||||
- }
|
||||
+ if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) {
|
||||
+ error_setg(errp, "aio=native was specified, but it requires "
|
||||
+ "cache.direct=on, which was not specified.");
|
||||
+ ret = -EINVAL;
|
||||
+ goto fail;
|
||||
}
|
||||
#else
|
||||
if (s->use_linux_aio) {
|
||||
@@ -733,14 +727,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
|
||||
}
|
||||
#endif /* !defined(CONFIG_LINUX_AIO) */
|
||||
|
||||
-#ifdef CONFIG_LINUX_IO_URING
|
||||
- if (s->use_linux_io_uring) {
|
||||
- if (!aio_setup_linux_io_uring(bdrv_get_aio_context(bs), errp)) {
|
||||
- error_prepend(errp, "Unable to use io_uring: ");
|
||||
- goto fail;
|
||||
- }
|
||||
- }
|
||||
-#else
|
||||
+#ifndef CONFIG_LINUX_IO_URING
|
||||
if (s->use_linux_io_uring) {
|
||||
error_setg(errp, "aio=io_uring was specified, but is not supported "
|
||||
"in this build.");
|
||||
@@ -2444,6 +2431,48 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
|
||||
return true;
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_LINUX_IO_URING
|
||||
+static inline bool raw_check_linux_io_uring(BDRVRawState *s)
|
||||
+{
|
||||
+ Error *local_err = NULL;
|
||||
+ AioContext *ctx;
|
||||
+
|
||||
+ if (!s->use_linux_io_uring) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ ctx = qemu_get_current_aio_context();
|
||||
+ if (unlikely(!aio_setup_linux_io_uring(ctx, &local_err))) {
|
||||
+ error_reportf_err(local_err, "Unable to use linux io_uring, "
|
||||
+ "falling back to thread pool: ");
|
||||
+ s->use_linux_io_uring = false;
|
||||
+ return false;
|
||||
+ }
|
||||
+ return true;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+#ifdef CONFIG_LINUX_AIO
|
||||
+static inline bool raw_check_linux_aio(BDRVRawState *s)
|
||||
+{
|
||||
+ Error *local_err = NULL;
|
||||
+ AioContext *ctx;
|
||||
+
|
||||
+ if (!s->use_linux_aio) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ ctx = qemu_get_current_aio_context();
|
||||
+ if (unlikely(!aio_setup_linux_aio(ctx, &local_err))) {
|
||||
+ error_reportf_err(local_err, "Unable to use Linux AIO, "
|
||||
+ "falling back to thread pool: ");
|
||||
+ s->use_linux_aio = false;
|
||||
+ return false;
|
||||
+ }
|
||||
+ return true;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
|
||||
uint64_t bytes, QEMUIOVector *qiov, int type)
|
||||
{
|
||||
@@ -2474,13 +2503,13 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
|
||||
if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) {
|
||||
type |= QEMU_AIO_MISALIGNED;
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
- } else if (s->use_linux_io_uring) {
|
||||
+ } else if (raw_check_linux_io_uring(s)) {
|
||||
assert(qiov->size == bytes);
|
||||
ret = luring_co_submit(bs, s->fd, offset, qiov, type);
|
||||
goto out;
|
||||
#endif
|
||||
#ifdef CONFIG_LINUX_AIO
|
||||
- } else if (s->use_linux_aio) {
|
||||
+ } else if (raw_check_linux_aio(s)) {
|
||||
assert(qiov->size == bytes);
|
||||
ret = laio_co_submit(s->fd, offset, qiov, type,
|
||||
s->aio_max_batch);
|
||||
@@ -2567,39 +2596,13 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
|
||||
};
|
||||
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
- if (s->use_linux_io_uring) {
|
||||
+ if (raw_check_linux_io_uring(s)) {
|
||||
return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
|
||||
}
|
||||
#endif
|
||||
return raw_thread_pool_submit(handle_aiocb_flush, &acb);
|
||||
}
|
||||
|
||||
-static void raw_aio_attach_aio_context(BlockDriverState *bs,
|
||||
- AioContext *new_context)
|
||||
-{
|
||||
- BDRVRawState __attribute__((unused)) *s = bs->opaque;
|
||||
-#ifdef CONFIG_LINUX_AIO
|
||||
- if (s->use_linux_aio) {
|
||||
- Error *local_err = NULL;
|
||||
- if (!aio_setup_linux_aio(new_context, &local_err)) {
|
||||
- error_reportf_err(local_err, "Unable to use native AIO, "
|
||||
- "falling back to thread pool: ");
|
||||
- s->use_linux_aio = false;
|
||||
- }
|
||||
- }
|
||||
-#endif
|
||||
-#ifdef CONFIG_LINUX_IO_URING
|
||||
- if (s->use_linux_io_uring) {
|
||||
- Error *local_err = NULL;
|
||||
- if (!aio_setup_linux_io_uring(new_context, &local_err)) {
|
||||
- error_reportf_err(local_err, "Unable to use linux io_uring, "
|
||||
- "falling back to thread pool: ");
|
||||
- s->use_linux_io_uring = false;
|
||||
- }
|
||||
- }
|
||||
-#endif
|
||||
-}
|
||||
-
|
||||
static void raw_close(BlockDriverState *bs)
|
||||
{
|
||||
BDRVRawState *s = bs->opaque;
|
||||
@@ -3896,7 +3899,6 @@ BlockDriver bdrv_file = {
|
||||
.bdrv_co_copy_range_from = raw_co_copy_range_from,
|
||||
.bdrv_co_copy_range_to = raw_co_copy_range_to,
|
||||
.bdrv_refresh_limits = raw_refresh_limits,
|
||||
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||||
|
||||
.bdrv_co_truncate = raw_co_truncate,
|
||||
.bdrv_co_getlength = raw_co_getlength,
|
||||
@@ -4266,7 +4268,6 @@ static BlockDriver bdrv_host_device = {
|
||||
.bdrv_co_copy_range_from = raw_co_copy_range_from,
|
||||
.bdrv_co_copy_range_to = raw_co_copy_range_to,
|
||||
.bdrv_refresh_limits = raw_refresh_limits,
|
||||
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||||
|
||||
.bdrv_co_truncate = raw_co_truncate,
|
||||
.bdrv_co_getlength = raw_co_getlength,
|
||||
@@ -4402,7 +4403,6 @@ static BlockDriver bdrv_host_cdrom = {
|
||||
.bdrv_co_pwritev = raw_co_pwritev,
|
||||
.bdrv_co_flush_to_disk = raw_co_flush_to_disk,
|
||||
.bdrv_refresh_limits = cdrom_refresh_limits,
|
||||
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||||
|
||||
.bdrv_co_truncate = raw_co_truncate,
|
||||
.bdrv_co_getlength = raw_co_getlength,
|
||||
@@ -4528,7 +4528,6 @@ static BlockDriver bdrv_host_cdrom = {
|
||||
.bdrv_co_pwritev = raw_co_pwritev,
|
||||
.bdrv_co_flush_to_disk = raw_co_flush_to_disk,
|
||||
.bdrv_refresh_limits = cdrom_refresh_limits,
|
||||
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||||
|
||||
.bdrv_co_truncate = raw_co_truncate,
|
||||
.bdrv_co_getlength = raw_co_getlength,
|
||||
--
|
||||
2.39.3
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,97 @@
|
||||
From d0514c7d5d6cc1aa140119c95d5ea2c1591b01e9 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 5 Dec 2023 13:20:04 -0500
|
||||
Subject: [PATCH 087/101] block: remove bdrv_co_lock()
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [18/26] a303f861ea5e84d8e89fd51e530fd0cb2da17b89 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
The bdrv_co_lock() and bdrv_co_unlock() functions are already no-ops.
|
||||
Remove them.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20231205182011.1976568-8-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block.c | 10 ----------
|
||||
blockdev.c | 5 -----
|
||||
include/block/block-global-state.h | 14 --------------
|
||||
3 files changed, 29 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 91ace5d2d5..434b7f4d72 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -7431,16 +7431,6 @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx)
|
||||
bdrv_dec_in_flight(bs);
|
||||
}
|
||||
|
||||
-void coroutine_fn bdrv_co_lock(BlockDriverState *bs)
|
||||
-{
|
||||
- /* TODO removed in next patch */
|
||||
-}
|
||||
-
|
||||
-void coroutine_fn bdrv_co_unlock(BlockDriverState *bs)
|
||||
-{
|
||||
- /* TODO removed in next patch */
|
||||
-}
|
||||
-
|
||||
static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
|
||||
{
|
||||
GLOBAL_STATE_CODE();
|
||||
diff --git a/blockdev.c b/blockdev.c
|
||||
index 5d8b3a23eb..3a5e7222ec 100644
|
||||
--- a/blockdev.c
|
||||
+++ b/blockdev.c
|
||||
@@ -2264,18 +2264,13 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
|
||||
return;
|
||||
}
|
||||
|
||||
- bdrv_co_lock(bs);
|
||||
bdrv_drained_begin(bs);
|
||||
- bdrv_co_unlock(bs);
|
||||
|
||||
old_ctx = bdrv_co_enter(bs);
|
||||
blk_co_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp);
|
||||
bdrv_co_leave(bs, old_ctx);
|
||||
|
||||
- bdrv_co_lock(bs);
|
||||
bdrv_drained_end(bs);
|
||||
- bdrv_co_unlock(bs);
|
||||
-
|
||||
blk_co_unref(blk);
|
||||
}
|
||||
|
||||
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
|
||||
index 0327f1c605..4ec0b217f0 100644
|
||||
--- a/include/block/block-global-state.h
|
||||
+++ b/include/block/block-global-state.h
|
||||
@@ -267,20 +267,6 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
|
||||
int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
|
||||
bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
|
||||
|
||||
-/**
|
||||
- * Locks the AioContext of @bs if it's not the current AioContext. This avoids
|
||||
- * double locking which could lead to deadlocks: This is a coroutine_fn, so we
|
||||
- * know we already own the lock of the current AioContext.
|
||||
- *
|
||||
- * May only be called in the main thread.
|
||||
- */
|
||||
-void coroutine_fn bdrv_co_lock(BlockDriverState *bs);
|
||||
-
|
||||
-/**
|
||||
- * Unlocks the AioContext of @bs if it's not the current AioContext.
|
||||
- */
|
||||
-void coroutine_fn bdrv_co_unlock(BlockDriverState *bs);
|
||||
-
|
||||
bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx,
|
||||
GHashTable *visited, Transaction *tran,
|
||||
Error **errp);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,411 @@
|
||||
From dc4eb64185957a01948217814478abc450ce5f26 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 5 Dec 2023 13:20:11 -0500
|
||||
Subject: [PATCH 094/101] block: remove outdated AioContext locking comments
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [25/26] 395e18fb40d28d4bc961acee1a00da7f60748076 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
The AioContext lock no longer exists.
|
||||
|
||||
There is one noteworthy change:
|
||||
|
||||
- * More specifically, these functions use BDRV_POLL_WHILE(bs), which
|
||||
- * requires the caller to be either in the main thread and hold
|
||||
- * the BlockdriverState (bs) AioContext lock, or directly in the
|
||||
- * home thread that runs the bs AioContext. Calling them from
|
||||
- * another thread in another AioContext would cause deadlocks.
|
||||
+ * More specifically, these functions use BDRV_POLL_WHILE(bs), which requires
|
||||
+ * the caller to be either in the main thread or directly in the home thread
|
||||
+ * that runs the bs AioContext. Calling them from another thread in another
|
||||
+ * AioContext would cause deadlocks.
|
||||
|
||||
I am not sure whether deadlocks are still possible. Maybe they have just
|
||||
moved to the fine-grained locks that have replaced the AioContext. Since
|
||||
I am not sure if the deadlocks are gone, I have kept the substance
|
||||
unchanged and just removed mention of the AioContext.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20231205182011.1976568-15-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block.c | 73 ++++++----------------------
|
||||
block/block-backend.c | 8 ---
|
||||
block/export/vhost-user-blk-server.c | 4 --
|
||||
include/block/block-common.h | 3 --
|
||||
include/block/block-io.h | 9 ++--
|
||||
include/block/block_int-common.h | 2 -
|
||||
tests/qemu-iotests/202 | 2 +-
|
||||
tests/qemu-iotests/203 | 3 +-
|
||||
8 files changed, 22 insertions(+), 82 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 434b7f4d72..a097772238 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -1616,11 +1616,6 @@ out:
|
||||
g_free(gen_node_name);
|
||||
}
|
||||
|
||||
-/*
|
||||
- * The caller must always hold @bs AioContext lock, because this function calls
|
||||
- * bdrv_refresh_total_sectors() which polls when called from non-coroutine
|
||||
- * context.
|
||||
- */
|
||||
static int no_coroutine_fn GRAPH_UNLOCKED
|
||||
bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name,
|
||||
QDict *options, int open_flags, Error **errp)
|
||||
@@ -2901,7 +2896,7 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
|
||||
* Replaces the node that a BdrvChild points to without updating permissions.
|
||||
*
|
||||
* If @new_bs is non-NULL, the parent of @child must already be drained through
|
||||
- * @child and the caller must hold the AioContext lock for @new_bs.
|
||||
+ * @child.
|
||||
*/
|
||||
static void GRAPH_WRLOCK
|
||||
bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs)
|
||||
@@ -3041,9 +3036,8 @@ static TransactionActionDrv bdrv_attach_child_common_drv = {
|
||||
*
|
||||
* Returns new created child.
|
||||
*
|
||||
- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
|
||||
- * @child_bs can move to a different AioContext in this function. Callers must
|
||||
- * make sure that their AioContext locking is still correct after this.
|
||||
+ * Both @parent_bs and @child_bs can move to a different AioContext in this
|
||||
+ * function.
|
||||
*/
|
||||
static BdrvChild * GRAPH_WRLOCK
|
||||
bdrv_attach_child_common(BlockDriverState *child_bs,
|
||||
@@ -3142,9 +3136,8 @@ bdrv_attach_child_common(BlockDriverState *child_bs,
|
||||
/*
|
||||
* Function doesn't update permissions, caller is responsible for this.
|
||||
*
|
||||
- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
|
||||
- * @child_bs can move to a different AioContext in this function. Callers must
|
||||
- * make sure that their AioContext locking is still correct after this.
|
||||
+ * Both @parent_bs and @child_bs can move to a different AioContext in this
|
||||
+ * function.
|
||||
*
|
||||
* After calling this function, the transaction @tran may only be completed
|
||||
* while holding a writer lock for the graph.
|
||||
@@ -3184,9 +3177,6 @@ bdrv_attach_child_noperm(BlockDriverState *parent_bs,
|
||||
*
|
||||
* On failure NULL is returned, errp is set and the reference to
|
||||
* child_bs is also dropped.
|
||||
- *
|
||||
- * The caller must hold the AioContext lock @child_bs, but not that of @ctx
|
||||
- * (unless @child_bs is already in @ctx).
|
||||
*/
|
||||
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
|
||||
const char *child_name,
|
||||
@@ -3226,9 +3216,6 @@ out:
|
||||
*
|
||||
* On failure NULL is returned, errp is set and the reference to
|
||||
* child_bs is also dropped.
|
||||
- *
|
||||
- * If @parent_bs and @child_bs are in different AioContexts, the caller must
|
||||
- * hold the AioContext lock for @child_bs, but not for @parent_bs.
|
||||
*/
|
||||
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
|
||||
BlockDriverState *child_bs,
|
||||
@@ -3418,9 +3405,8 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
|
||||
*
|
||||
* Function doesn't update permissions, caller is responsible for this.
|
||||
*
|
||||
- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
|
||||
- * @child_bs can move to a different AioContext in this function. Callers must
|
||||
- * make sure that their AioContext locking is still correct after this.
|
||||
+ * Both @parent_bs and @child_bs can move to a different AioContext in this
|
||||
+ * function.
|
||||
*
|
||||
* After calling this function, the transaction @tran may only be completed
|
||||
* while holding a writer lock for the graph.
|
||||
@@ -3513,9 +3499,8 @@ out:
|
||||
}
|
||||
|
||||
/*
|
||||
- * The caller must hold the AioContext lock for @backing_hd. Both @bs and
|
||||
- * @backing_hd can move to a different AioContext in this function. Callers must
|
||||
- * make sure that their AioContext locking is still correct after this.
|
||||
+ * Both @bs and @backing_hd can move to a different AioContext in this
|
||||
+ * function.
|
||||
*
|
||||
* If a backing child is already present (i.e. we're detaching a node), that
|
||||
* child node must be drained.
|
||||
@@ -3574,8 +3559,6 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
|
||||
* itself, all options starting with "${bdref_key}." are considered part of the
|
||||
* BlockdevRef.
|
||||
*
|
||||
- * The caller must hold the main AioContext lock.
|
||||
- *
|
||||
* TODO Can this be unified with bdrv_open_image()?
|
||||
*/
|
||||
int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
|
||||
@@ -3745,9 +3728,7 @@ done:
|
||||
*
|
||||
* The BlockdevRef will be removed from the options QDict.
|
||||
*
|
||||
- * The caller must hold the lock of the main AioContext and no other AioContext.
|
||||
- * @parent can move to a different AioContext in this function. Callers must
|
||||
- * make sure that their AioContext locking is still correct after this.
|
||||
+ * @parent can move to a different AioContext in this function.
|
||||
*/
|
||||
BdrvChild *bdrv_open_child(const char *filename,
|
||||
QDict *options, const char *bdref_key,
|
||||
@@ -3778,9 +3759,7 @@ BdrvChild *bdrv_open_child(const char *filename,
|
||||
/*
|
||||
* Wrapper on bdrv_open_child() for most popular case: open primary child of bs.
|
||||
*
|
||||
- * The caller must hold the lock of the main AioContext and no other AioContext.
|
||||
- * @parent can move to a different AioContext in this function. Callers must
|
||||
- * make sure that their AioContext locking is still correct after this.
|
||||
+ * @parent can move to a different AioContext in this function.
|
||||
*/
|
||||
int bdrv_open_file_child(const char *filename,
|
||||
QDict *options, const char *bdref_key,
|
||||
@@ -3923,8 +3902,6 @@ out:
|
||||
* The reference parameter may be used to specify an existing block device which
|
||||
* should be opened. If specified, neither options nor a filename may be given,
|
||||
* nor can an existing BDS be reused (that is, *pbs has to be NULL).
|
||||
- *
|
||||
- * The caller must always hold the main AioContext lock.
|
||||
*/
|
||||
static BlockDriverState * no_coroutine_fn
|
||||
bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
|
||||
@@ -4217,7 +4194,6 @@ close_and_fail:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
-/* The caller must always hold the main AioContext lock. */
|
||||
BlockDriverState *bdrv_open(const char *filename, const char *reference,
|
||||
QDict *options, int flags, Error **errp)
|
||||
{
|
||||
@@ -4665,10 +4641,7 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
|
||||
*
|
||||
* Return 0 on success, otherwise return < 0 and set @errp.
|
||||
*
|
||||
- * The caller must hold the AioContext lock of @reopen_state->bs.
|
||||
* @reopen_state->bs can move to a different AioContext in this function.
|
||||
- * Callers must make sure that their AioContext locking is still correct after
|
||||
- * this.
|
||||
*/
|
||||
static int GRAPH_UNLOCKED
|
||||
bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
|
||||
@@ -4801,8 +4774,6 @@ out_rdlock:
|
||||
* It is the responsibility of the caller to then call the abort() or
|
||||
* commit() for any other BDS that have been left in a prepare() state
|
||||
*
|
||||
- * The caller must hold the AioContext lock of @reopen_state->bs.
|
||||
- *
|
||||
* After calling this function, the transaction @change_child_tran may only be
|
||||
* completed while holding a writer lock for the graph.
|
||||
*/
|
||||
@@ -5437,8 +5408,6 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
|
||||
* child.
|
||||
*
|
||||
* This function does not create any image files.
|
||||
- *
|
||||
- * The caller must hold the AioContext lock for @bs_top.
|
||||
*/
|
||||
int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
|
||||
Error **errp)
|
||||
@@ -5545,9 +5514,8 @@ static void bdrv_delete(BlockDriverState *bs)
|
||||
* after the call (even on failure), so if the caller intends to reuse the
|
||||
* dictionary, it needs to use qobject_ref() before calling bdrv_open.
|
||||
*
|
||||
- * The caller holds the AioContext lock for @bs. It must make sure that @bs
|
||||
- * stays in the same AioContext, i.e. @options must not refer to nodes in a
|
||||
- * different AioContext.
|
||||
+ * The caller must make sure that @bs stays in the same AioContext, i.e.
|
||||
+ * @options must not refer to nodes in a different AioContext.
|
||||
*/
|
||||
BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
|
||||
int flags, Error **errp)
|
||||
@@ -7565,10 +7533,6 @@ static TransactionActionDrv set_aio_context = {
|
||||
*
|
||||
* Must be called from the main AioContext.
|
||||
*
|
||||
- * The caller must own the AioContext lock for the old AioContext of bs, but it
|
||||
- * must not own the AioContext lock for new_context (unless new_context is the
|
||||
- * same as the current context of bs).
|
||||
- *
|
||||
* @visited will accumulate all visited BdrvChild objects. The caller is
|
||||
* responsible for freeing the list afterwards.
|
||||
*/
|
||||
@@ -7621,13 +7585,6 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
|
||||
*
|
||||
* If ignore_child is not NULL, that child (and its subgraph) will not
|
||||
* be touched.
|
||||
- *
|
||||
- * This function still requires the caller to take the bs current
|
||||
- * AioContext lock, otherwise draining will fail since AIO_WAIT_WHILE
|
||||
- * assumes the lock is always held if bs is in another AioContext.
|
||||
- * For the same reason, it temporarily also holds the new AioContext, since
|
||||
- * bdrv_drained_end calls BDRV_POLL_WHILE that assumes the lock is taken too.
|
||||
- * Therefore the new AioContext lock must not be taken by the caller.
|
||||
*/
|
||||
int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
|
||||
BdrvChild *ignore_child, Error **errp)
|
||||
@@ -7653,8 +7610,8 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
|
||||
|
||||
/*
|
||||
* Linear phase: go through all callbacks collected in the transaction.
|
||||
- * Run all callbacks collected in the recursion to switch all nodes
|
||||
- * AioContext lock (transaction commit), or undo all changes done in the
|
||||
+ * Run all callbacks collected in the recursion to switch every node's
|
||||
+ * AioContext (transaction commit), or undo all changes done in the
|
||||
* recursion (transaction abort).
|
||||
*/
|
||||
|
||||
diff --git a/block/block-backend.c b/block/block-backend.c
|
||||
index f412bed274..209eb07528 100644
|
||||
--- a/block/block-backend.c
|
||||
+++ b/block/block-backend.c
|
||||
@@ -390,8 +390,6 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
|
||||
* Both sets of permissions can be changed later using blk_set_perm().
|
||||
*
|
||||
* Return the new BlockBackend on success, null on failure.
|
||||
- *
|
||||
- * Callers must hold the AioContext lock of @bs.
|
||||
*/
|
||||
BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
|
||||
uint64_t shared_perm, Error **errp)
|
||||
@@ -416,8 +414,6 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
|
||||
* Just as with bdrv_open(), after having called this function the reference to
|
||||
* @options belongs to the block layer (even on failure).
|
||||
*
|
||||
- * Called without holding an AioContext lock.
|
||||
- *
|
||||
* TODO: Remove @filename and @flags; it should be possible to specify a whole
|
||||
* BDS tree just by specifying the @options QDict (or @reference,
|
||||
* alternatively). At the time of adding this function, this is not possible,
|
||||
@@ -872,8 +868,6 @@ BlockBackend *blk_by_public(BlockBackendPublic *public)
|
||||
|
||||
/*
|
||||
* Disassociates the currently associated BlockDriverState from @blk.
|
||||
- *
|
||||
- * The caller must hold the AioContext lock for the BlockBackend.
|
||||
*/
|
||||
void blk_remove_bs(BlockBackend *blk)
|
||||
{
|
||||
@@ -915,8 +909,6 @@ void blk_remove_bs(BlockBackend *blk)
|
||||
|
||||
/*
|
||||
* Associates a new BlockDriverState with @blk.
|
||||
- *
|
||||
- * Callers must hold the AioContext lock of @bs.
|
||||
*/
|
||||
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
|
||||
{
|
||||
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
|
||||
index 16f48388d3..50c358e8cd 100644
|
||||
--- a/block/export/vhost-user-blk-server.c
|
||||
+++ b/block/export/vhost-user-blk-server.c
|
||||
@@ -278,7 +278,6 @@ static void vu_blk_exp_resize(void *opaque)
|
||||
vu_config_change_msg(&vexp->vu_server.vu_dev);
|
||||
}
|
||||
|
||||
-/* Called with vexp->export.ctx acquired */
|
||||
static void vu_blk_drained_begin(void *opaque)
|
||||
{
|
||||
VuBlkExport *vexp = opaque;
|
||||
@@ -287,7 +286,6 @@ static void vu_blk_drained_begin(void *opaque)
|
||||
vhost_user_server_detach_aio_context(&vexp->vu_server);
|
||||
}
|
||||
|
||||
-/* Called with vexp->export.blk AioContext acquired */
|
||||
static void vu_blk_drained_end(void *opaque)
|
||||
{
|
||||
VuBlkExport *vexp = opaque;
|
||||
@@ -300,8 +298,6 @@ static void vu_blk_drained_end(void *opaque)
|
||||
* Ensures that bdrv_drained_begin() waits until in-flight requests complete
|
||||
* and the server->co_trip coroutine has terminated. It will be restarted in
|
||||
* vhost_user_server_attach_aio_context().
|
||||
- *
|
||||
- * Called with vexp->export.ctx acquired.
|
||||
*/
|
||||
static bool vu_blk_drained_poll(void *opaque)
|
||||
{
|
||||
diff --git a/include/block/block-common.h b/include/block/block-common.h
|
||||
index d7599564db..a846023a09 100644
|
||||
--- a/include/block/block-common.h
|
||||
+++ b/include/block/block-common.h
|
||||
@@ -70,9 +70,6 @@
|
||||
* automatically takes the graph rdlock when calling the wrapped function. In
|
||||
* the same way, no_co_wrapper_bdrv_wrlock functions automatically take the
|
||||
* graph wrlock.
|
||||
- *
|
||||
- * If the first parameter of the function is a BlockDriverState, BdrvChild or
|
||||
- * BlockBackend pointer, the AioContext lock for it is taken in the wrapper.
|
||||
*/
|
||||
#define no_co_wrapper
|
||||
#define no_co_wrapper_bdrv_rdlock
|
||||
diff --git a/include/block/block-io.h b/include/block/block-io.h
|
||||
index 8eb39a858b..b49e0537dd 100644
|
||||
--- a/include/block/block-io.h
|
||||
+++ b/include/block/block-io.h
|
||||
@@ -332,11 +332,10 @@ bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
|
||||
* "I/O or GS" API functions. These functions can run without
|
||||
* the BQL, but only in one specific iothread/main loop.
|
||||
*
|
||||
- * More specifically, these functions use BDRV_POLL_WHILE(bs), which
|
||||
- * requires the caller to be either in the main thread and hold
|
||||
- * the BlockdriverState (bs) AioContext lock, or directly in the
|
||||
- * home thread that runs the bs AioContext. Calling them from
|
||||
- * another thread in another AioContext would cause deadlocks.
|
||||
+ * More specifically, these functions use BDRV_POLL_WHILE(bs), which requires
|
||||
+ * the caller to be either in the main thread or directly in the home thread
|
||||
+ * that runs the bs AioContext. Calling them from another thread in another
|
||||
+ * AioContext would cause deadlocks.
|
||||
*
|
||||
* Therefore, these functions are not proper I/O, because they
|
||||
* can't run in *any* iothreads, but only in a specific one.
|
||||
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
|
||||
index 4e31d161c5..151279d481 100644
|
||||
--- a/include/block/block_int-common.h
|
||||
+++ b/include/block/block_int-common.h
|
||||
@@ -1192,8 +1192,6 @@ struct BlockDriverState {
|
||||
/* The error object in use for blocking operations on backing_hd */
|
||||
Error *backing_blocker;
|
||||
|
||||
- /* Protected by AioContext lock */
|
||||
-
|
||||
/*
|
||||
* If we are reading a disk image, give its size in sectors.
|
||||
* Generally read-only; it is written to by load_snapshot and
|
||||
diff --git a/tests/qemu-iotests/202 b/tests/qemu-iotests/202
|
||||
index b784dcd791..13304242e5 100755
|
||||
--- a/tests/qemu-iotests/202
|
||||
+++ b/tests/qemu-iotests/202
|
||||
@@ -21,7 +21,7 @@
|
||||
# Check that QMP 'transaction' blockdev-snapshot-sync with multiple drives on a
|
||||
# single IOThread completes successfully. This particular command triggered a
|
||||
# hang due to recursive AioContext locking and BDRV_POLL_WHILE(). Protect
|
||||
-# against regressions.
|
||||
+# against regressions even though the AioContext lock no longer exists.
|
||||
|
||||
import iotests
|
||||
|
||||
diff --git a/tests/qemu-iotests/203 b/tests/qemu-iotests/203
|
||||
index ab80fd0e44..1ba878522b 100755
|
||||
--- a/tests/qemu-iotests/203
|
||||
+++ b/tests/qemu-iotests/203
|
||||
@@ -21,7 +21,8 @@
|
||||
# Check that QMP 'migrate' with multiple drives on a single IOThread completes
|
||||
# successfully. This particular command triggered a hang in the source QEMU
|
||||
# process due to recursive AioContext locking in bdrv_invalidate_all() and
|
||||
-# BDRV_POLL_WHILE().
|
||||
+# BDRV_POLL_WHILE(). Protect against regressions even though the AioContext
|
||||
+# lock no longer exists.
|
||||
|
||||
import iotests
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,55 +0,0 @@
|
||||
From 961bc392ee60743344236ddd247ab646a0eec914 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 07/21] checkpatch: add qemu_bh_new/aio_bh_new checks
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [3/13] e0473487f0e3186c42559a5c36a8650f27ab26ae (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit ef56ffbdd6b0605dc1e305611287b948c970e236
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:08 2023 -0400
|
||||
|
||||
checkpatch: add qemu_bh_new/aio_bh_new checks
|
||||
|
||||
Advise authors to use the _guarded versions of the APIs, instead.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Message-Id: <20230427211013.2994127-4-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
scripts/checkpatch.pl | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
|
||||
index d768171dcf..eeaec436eb 100755
|
||||
--- a/scripts/checkpatch.pl
|
||||
+++ b/scripts/checkpatch.pl
|
||||
@@ -2865,6 +2865,14 @@ sub process {
|
||||
if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) {
|
||||
ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr);
|
||||
}
|
||||
+# recommend qemu_bh_new_guarded instead of qemu_bh_new
|
||||
+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) {
|
||||
+ ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr);
|
||||
+ }
|
||||
+# recommend aio_bh_new_guarded instead of aio_bh_new
|
||||
+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) {
|
||||
+ ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr);
|
||||
+ }
|
||||
# check for module_init(), use category-specific init macros explicitly please
|
||||
if ($line =~ /^module_init\s*\(/) {
|
||||
ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,75 @@
|
||||
From ac9dc8ea241ef6d3a0447d696620d4d4053b71bf Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Mon, 4 Dec 2023 11:42:59 -0500
|
||||
Subject: [PATCH 080/101] dma-helpers: don't lock AioContext in dma_blk_cb()
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [11/26] a8580463ba6aee4ca248c0b947b9e72bd9e87aab (kmwolf/centos-qemu-kvm)
|
||||
|
||||
Commit abfcd2760b3e ("dma-helpers: prevent dma_blk_cb() vs
|
||||
dma_aio_cancel() race") acquired the AioContext lock inside dma_blk_cb()
|
||||
to avoid a race with scsi_device_purge_requests() running in the main
|
||||
loop thread.
|
||||
|
||||
The SCSI code no longer calls dma_aio_cancel() from the main loop thread
|
||||
while I/O is running in the IOThread AioContext. Therefore it is no
|
||||
longer necessary to take this lock to protect DMAAIOCB fields. The
|
||||
->cb() function also does not require the lock because blk_aio_*() and
|
||||
friends do not need the AioContext lock.
|
||||
|
||||
Both hw/ide/core.c and hw/ide/macio.c also call dma_blk_io() but don't
|
||||
rely on it taking the AioContext lock, so this change is safe.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-ID: <20231204164259.1515217-5-stefanha@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
system/dma-helpers.c | 7 ++-----
|
||||
1 file changed, 2 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/system/dma-helpers.c b/system/dma-helpers.c
|
||||
index 36211acc7e..528117f256 100644
|
||||
--- a/system/dma-helpers.c
|
||||
+++ b/system/dma-helpers.c
|
||||
@@ -119,13 +119,12 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||
|
||||
trace_dma_blk_cb(dbs, ret);
|
||||
|
||||
- aio_context_acquire(ctx);
|
||||
dbs->acb = NULL;
|
||||
dbs->offset += dbs->iov.size;
|
||||
|
||||
if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
|
||||
dma_complete(dbs, ret);
|
||||
- goto out;
|
||||
+ return;
|
||||
}
|
||||
dma_blk_unmap(dbs);
|
||||
|
||||
@@ -168,7 +167,7 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||
trace_dma_map_wait(dbs);
|
||||
dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
|
||||
cpu_register_map_client(dbs->bh);
|
||||
- goto out;
|
||||
+ return;
|
||||
}
|
||||
|
||||
if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
|
||||
@@ -179,8 +178,6 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
|
||||
dma_blk_cb, dbs, dbs->io_func_opaque);
|
||||
assert(dbs->acb);
|
||||
-out:
|
||||
- aio_context_release(ctx);
|
||||
}
|
||||
|
||||
static void dma_aio_cancel(BlockAIOCB *acb)
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,228 @@
|
||||
From 71aa0219f7c84cbf175eb2a091d48d5fd5daa40b Mon Sep 17 00:00:00 2001
|
||||
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Date: Tue, 21 Nov 2023 16:44:26 +0800
|
||||
Subject: [PATCH 047/101] docs/devel: Add VFIO iommufd backend documentation
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [46/67] 6cf49d00e87788f894d690a985bb6798eae24505 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Suggested-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit 98dad2b01931f6064c6c4b48ca3c2a1d9f542cd8)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
MAINTAINERS | 1 +
|
||||
docs/devel/index-internals.rst | 1 +
|
||||
docs/devel/vfio-iommufd.rst | 166 +++++++++++++++++++++++++++++++++
|
||||
3 files changed, 168 insertions(+)
|
||||
create mode 100644 docs/devel/vfio-iommufd.rst
|
||||
|
||||
diff --git a/MAINTAINERS b/MAINTAINERS
|
||||
index ca70bb4e64..0ddb20a35f 100644
|
||||
--- a/MAINTAINERS
|
||||
+++ b/MAINTAINERS
|
||||
@@ -2176,6 +2176,7 @@ F: backends/iommufd.c
|
||||
F: include/sysemu/iommufd.h
|
||||
F: include/qemu/chardev_open.h
|
||||
F: util/chardev_open.c
|
||||
+F: docs/devel/vfio-iommufd.rst
|
||||
|
||||
vhost
|
||||
M: Michael S. Tsirkin <mst@redhat.com>
|
||||
diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst
|
||||
index 6f81df92bc..3def4a138b 100644
|
||||
--- a/docs/devel/index-internals.rst
|
||||
+++ b/docs/devel/index-internals.rst
|
||||
@@ -18,5 +18,6 @@ Details about QEMU's various subsystems including how to add features to them.
|
||||
s390-dasd-ipl
|
||||
tracing
|
||||
vfio-migration
|
||||
+ vfio-iommufd
|
||||
writing-monitor-commands
|
||||
virtio-backends
|
||||
diff --git a/docs/devel/vfio-iommufd.rst b/docs/devel/vfio-iommufd.rst
|
||||
new file mode 100644
|
||||
index 0000000000..3d1c11f175
|
||||
--- /dev/null
|
||||
+++ b/docs/devel/vfio-iommufd.rst
|
||||
@@ -0,0 +1,166 @@
|
||||
+===============================
|
||||
+IOMMUFD BACKEND usage with VFIO
|
||||
+===============================
|
||||
+
|
||||
+(Same meaning for backend/container/BE)
|
||||
+
|
||||
+With the introduction of iommufd, the Linux kernel provides a generic
|
||||
+interface for user space drivers to propagate their DMA mappings to kernel
|
||||
+for assigned devices. While the legacy kernel interface is group-centric,
|
||||
+the new iommufd interface is device-centric, relying on device fd and iommufd.
|
||||
+
|
||||
+To support both interfaces in the QEMU VFIO device, introduce a base container
|
||||
+to abstract the common part of VFIO legacy and iommufd container. So that the
|
||||
+generic VFIO code can use either container.
|
||||
+
|
||||
+The base container implements generic functions such as memory_listener and
|
||||
+address space management whereas the derived container implements callbacks
|
||||
+specific to either legacy or iommufd. Each container has its own way to setup
|
||||
+secure context and dma management interface. The below diagram shows how it
|
||||
+looks like with both containers.
|
||||
+
|
||||
+::
|
||||
+
|
||||
+ VFIO AddressSpace/Memory
|
||||
+ +-------+ +----------+ +-----+ +-----+
|
||||
+ | pci | | platform | | ap | | ccw |
|
||||
+ +---+---+ +----+-----+ +--+--+ +--+--+ +----------------------+
|
||||
+ | | | | | AddressSpace |
|
||||
+ | | | | +------------+---------+
|
||||
+ +---V-----------V-----------V--------V----+ /
|
||||
+ | VFIOAddressSpace | <------------+
|
||||
+ | | | MemoryListener
|
||||
+ | VFIOContainerBase list |
|
||||
+ +-------+----------------------------+----+
|
||||
+ | |
|
||||
+ | |
|
||||
+ +-------V------+ +--------V----------+
|
||||
+ | iommufd | | vfio legacy |
|
||||
+ | container | | container |
|
||||
+ +-------+------+ +--------+----------+
|
||||
+ | |
|
||||
+ | /dev/iommu | /dev/vfio/vfio
|
||||
+ | /dev/vfio/devices/vfioX | /dev/vfio/$group_id
|
||||
+ Userspace | |
|
||||
+ ============+============================+===========================
|
||||
+ Kernel | device fd |
|
||||
+ +---------------+ | group/container fd
|
||||
+ | (BIND_IOMMUFD | | (SET_CONTAINER/SET_IOMMU)
|
||||
+ | ATTACH_IOAS) | | device fd
|
||||
+ | | |
|
||||
+ | +-------V------------V-----------------+
|
||||
+ iommufd | | vfio |
|
||||
+ (map/unmap | +---------+--------------------+-------+
|
||||
+ ioas_copy) | | | map/unmap
|
||||
+ | | |
|
||||
+ +------V------+ +-----V------+ +------V--------+
|
||||
+ | iommfd core | | device | | vfio iommu |
|
||||
+ +-------------+ +------------+ +---------------+
|
||||
+
|
||||
+* Secure Context setup
|
||||
+
|
||||
+ - iommufd BE: uses device fd and iommufd to setup secure context
|
||||
+ (bind_iommufd, attach_ioas)
|
||||
+ - vfio legacy BE: uses group fd and container fd to setup secure context
|
||||
+ (set_container, set_iommu)
|
||||
+
|
||||
+* Device access
|
||||
+
|
||||
+ - iommufd BE: device fd is opened through ``/dev/vfio/devices/vfioX``
|
||||
+ - vfio legacy BE: device fd is retrieved from group fd ioctl
|
||||
+
|
||||
+* DMA Mapping flow
|
||||
+
|
||||
+ 1. VFIOAddressSpace receives MemoryRegion add/del via MemoryListener
|
||||
+ 2. VFIO populates DMA map/unmap via the container BEs
|
||||
+ * iommufd BE: uses iommufd
|
||||
+ * vfio legacy BE: uses container fd
|
||||
+
|
||||
+Example configuration
|
||||
+=====================
|
||||
+
|
||||
+Step 1: configure the host device
|
||||
+---------------------------------
|
||||
+
|
||||
+It's exactly same as the VFIO device with legacy VFIO container.
|
||||
+
|
||||
+Step 2: configure QEMU
|
||||
+----------------------
|
||||
+
|
||||
+Interactions with the ``/dev/iommu`` are abstracted by a new iommufd
|
||||
+object (compiled in with the ``CONFIG_IOMMUFD`` option).
|
||||
+
|
||||
+Any QEMU device (e.g. VFIO device) wishing to use ``/dev/iommu`` must
|
||||
+be linked with an iommufd object. It gets a new optional property
|
||||
+named iommufd which allows to pass an iommufd object. Take ``vfio-pci``
|
||||
+device for example:
|
||||
+
|
||||
+.. code-block:: bash
|
||||
+
|
||||
+ -object iommufd,id=iommufd0
|
||||
+ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0
|
||||
+
|
||||
+Note the ``/dev/iommu`` and VFIO cdev can be externally opened by a
|
||||
+management layer. In such a case the fd is passed, the fd supports a
|
||||
+string naming the fd or a number, for example:
|
||||
+
|
||||
+.. code-block:: bash
|
||||
+
|
||||
+ -object iommufd,id=iommufd0,fd=22
|
||||
+ -device vfio-pci,iommufd=iommufd0,fd=23
|
||||
+
|
||||
+If the ``fd`` property is not passed, the fd is opened by QEMU.
|
||||
+
|
||||
+If no ``iommufd`` object is passed to the ``vfio-pci`` device, iommufd
|
||||
+is not used and the user gets the behavior based on the legacy VFIO
|
||||
+container:
|
||||
+
|
||||
+.. code-block:: bash
|
||||
+
|
||||
+ -device vfio-pci,host=0000:02:00.0
|
||||
+
|
||||
+Supported platform
|
||||
+==================
|
||||
+
|
||||
+Supports x86, ARM and s390x currently.
|
||||
+
|
||||
+Caveats
|
||||
+=======
|
||||
+
|
||||
+Dirty page sync
|
||||
+---------------
|
||||
+
|
||||
+Dirty page sync with iommufd backend is unsupported yet, live migration is
|
||||
+disabled by default. But it can be force enabled like below, low efficient
|
||||
+though.
|
||||
+
|
||||
+.. code-block:: bash
|
||||
+
|
||||
+ -object iommufd,id=iommufd0
|
||||
+ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0,enable-migration=on
|
||||
+
|
||||
+P2P DMA
|
||||
+-------
|
||||
+
|
||||
+PCI p2p DMA is unsupported as IOMMUFD doesn't support mapping hardware PCI
|
||||
+BAR region yet. Below warning shows for assigned PCI device, it's not a bug.
|
||||
+
|
||||
+.. code-block:: none
|
||||
+
|
||||
+ qemu-system-x86_64: warning: IOMMU_IOAS_MAP failed: Bad address, PCI BAR?
|
||||
+ qemu-system-x86_64: vfio_container_dma_map(0x560cb6cb1620, 0xe000000021000, 0x3000, 0x7f32ed55c000) = -14 (Bad address)
|
||||
+
|
||||
+FD passing with mdev
|
||||
+--------------------
|
||||
+
|
||||
+``vfio-pci`` device checks sysfsdev property to decide if backend is a mdev.
|
||||
+If FD passing is used, there is no way to know that and the mdev is treated
|
||||
+like a real PCI device. There is an error as below if user wants to enable
|
||||
+RAM discarding for mdev.
|
||||
+
|
||||
+.. code-block:: none
|
||||
+
|
||||
+ qemu-system-x86_64: -device vfio-pci,iommufd=iommufd0,x-balloon-allowed=on,fd=9: vfio VFIO_FD9: x-balloon-allowed only potentially compatible with mdev devices
|
||||
+
|
||||
+``vfio-ap`` and ``vfio-ccw`` devices don't have same issue as their backend
|
||||
+devices are always mdev and RAM discarding is force enabled.
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,98 @@
|
||||
From fc69df3a70bed5722643cc16828ca20beae3a20d Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 5 Dec 2023 13:20:08 -0500
|
||||
Subject: [PATCH 091/101] docs: remove AioContext lock from IOThread docs
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [22/26] ab89cda483e74ded983d26e1c6e50217405e0a55 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
Encourage the use of locking primitives and stop mentioning the
|
||||
AioContext lock since it is being removed.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20231205182011.1976568-12-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
docs/devel/multiple-iothreads.txt | 47 +++++++++++--------------------
|
||||
1 file changed, 16 insertions(+), 31 deletions(-)
|
||||
|
||||
diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt
|
||||
index a3e949f6b3..4865196bde 100644
|
||||
--- a/docs/devel/multiple-iothreads.txt
|
||||
+++ b/docs/devel/multiple-iothreads.txt
|
||||
@@ -88,27 +88,18 @@ loop, depending on which AioContext instance the caller passes in.
|
||||
|
||||
How to synchronize with an IOThread
|
||||
-----------------------------------
|
||||
-AioContext is not thread-safe so some rules must be followed when using file
|
||||
-descriptors, event notifiers, timers, or BHs across threads:
|
||||
+Variables that can be accessed by multiple threads require some form of
|
||||
+synchronization such as qemu_mutex_lock(), rcu_read_lock(), etc.
|
||||
|
||||
-1. AioContext functions can always be called safely. They handle their
|
||||
-own locking internally.
|
||||
-
|
||||
-2. Other threads wishing to access the AioContext must use
|
||||
-aio_context_acquire()/aio_context_release() for mutual exclusion. Once the
|
||||
-context is acquired no other thread can access it or run event loop iterations
|
||||
-in this AioContext.
|
||||
-
|
||||
-Legacy code sometimes nests aio_context_acquire()/aio_context_release() calls.
|
||||
-Do not use nesting anymore, it is incompatible with the BDRV_POLL_WHILE() macro
|
||||
-used in the block layer and can lead to hangs.
|
||||
-
|
||||
-There is currently no lock ordering rule if a thread needs to acquire multiple
|
||||
-AioContexts simultaneously. Therefore, it is only safe for code holding the
|
||||
-QEMU global mutex to acquire other AioContexts.
|
||||
+AioContext functions like aio_set_fd_handler(), aio_set_event_notifier(),
|
||||
+aio_bh_new(), and aio_timer_new() are thread-safe. They can be used to trigger
|
||||
+activity in an IOThread.
|
||||
|
||||
Side note: the best way to schedule a function call across threads is to call
|
||||
-aio_bh_schedule_oneshot(). No acquire/release or locking is needed.
|
||||
+aio_bh_schedule_oneshot().
|
||||
+
|
||||
+The main loop thread can wait synchronously for a condition using
|
||||
+AIO_WAIT_WHILE().
|
||||
|
||||
AioContext and the block layer
|
||||
------------------------------
|
||||
@@ -124,22 +115,16 @@ Block layer code must therefore expect to run in an IOThread and avoid using
|
||||
old APIs that implicitly use the main loop. See the "How to program for
|
||||
IOThreads" above for information on how to do that.
|
||||
|
||||
-If main loop code such as a QMP function wishes to access a BlockDriverState
|
||||
-it must first call aio_context_acquire(bdrv_get_aio_context(bs)) to ensure
|
||||
-that callbacks in the IOThread do not run in parallel.
|
||||
-
|
||||
Code running in the monitor typically needs to ensure that past
|
||||
requests from the guest are completed. When a block device is running
|
||||
in an IOThread, the IOThread can also process requests from the guest
|
||||
(via ioeventfd). To achieve both objects, wrap the code between
|
||||
bdrv_drained_begin() and bdrv_drained_end(), thus creating a "drained
|
||||
-section". The functions must be called between aio_context_acquire()
|
||||
-and aio_context_release(). You can freely release and re-acquire the
|
||||
-AioContext within a drained section.
|
||||
-
|
||||
-Long-running jobs (usually in the form of coroutines) are best scheduled in
|
||||
-the BlockDriverState's AioContext to avoid the need to acquire/release around
|
||||
-each bdrv_*() call. The functions bdrv_add/remove_aio_context_notifier,
|
||||
-or alternatively blk_add/remove_aio_context_notifier if you use BlockBackends,
|
||||
-can be used to get a notification whenever bdrv_try_change_aio_context() moves a
|
||||
+section".
|
||||
+
|
||||
+Long-running jobs (usually in the form of coroutines) are often scheduled in
|
||||
+the BlockDriverState's AioContext. The functions
|
||||
+bdrv_add/remove_aio_context_notifier, or alternatively
|
||||
+blk_add/remove_aio_context_notifier if you use BlockBackends, can be used to
|
||||
+get a notification whenever bdrv_try_change_aio_context() moves a
|
||||
BlockDriverState to a different AioContext.
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,153 +0,0 @@
|
||||
From 516bf44de08a13d97c08e210137078e642ce8e88 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Wed, 17 May 2023 17:28:32 +0200
|
||||
Subject: [PATCH 02/21] graph-lock: Disable locking for now
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 166: block/graph-lock: Disable locking for now
|
||||
RH-Bugzilla: 2186725
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [2/4] 39d42fb527aad0491a018743289de7b762108317 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
In QEMU 8.0, we've been seeing deadlocks in bdrv_graph_wrlock(). They
|
||||
come from callers that hold an AioContext lock, which is not allowed
|
||||
during polling. In theory, we could temporarily release the lock, but
|
||||
callers are inconsistent about whether they hold a lock, and if they do,
|
||||
some are also confused about which one they hold. While all of this is
|
||||
fixable, it's not trivial, and the best course of action for 8.0.1 is
|
||||
probably just disabling the graph locking code temporarily.
|
||||
|
||||
We don't currently rely on graph locking yet. It is supposed to replace
|
||||
the AioContext lock eventually to enable multiqueue support, but as long
|
||||
as we still have the AioContext lock, it is sufficient without the graph
|
||||
lock. Once the AioContext lock goes away, the deadlock doesn't exist any
|
||||
more either and this commit can be reverted. (Of course, it can also be
|
||||
reverted while the AioContext lock still exists if the callers have been
|
||||
fixed.)
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20230517152834.277483-2-kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 80fc5d260002432628710f8b0c7cfc7d9b97bb9d)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block/graph-lock.c | 24 ++++++++++++++++++++++++
|
||||
1 file changed, 24 insertions(+)
|
||||
|
||||
diff --git a/block/graph-lock.c b/block/graph-lock.c
|
||||
index 259a7a0bde..2490926c90 100644
|
||||
--- a/block/graph-lock.c
|
||||
+++ b/block/graph-lock.c
|
||||
@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock;
|
||||
/* Protects the list of aiocontext and orphaned_reader_count */
|
||||
static QemuMutex aio_context_list_lock;
|
||||
|
||||
+#if 0
|
||||
/* Written and read with atomic operations. */
|
||||
static int has_writer;
|
||||
+#endif
|
||||
|
||||
/*
|
||||
* A reader coroutine could move from an AioContext to another.
|
||||
@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx)
|
||||
g_free(ctx->bdrv_graph);
|
||||
}
|
||||
|
||||
+#if 0
|
||||
static uint32_t reader_count(void)
|
||||
{
|
||||
BdrvGraphRWlock *brdv_graph;
|
||||
@@ -105,10 +108,17 @@ static uint32_t reader_count(void)
|
||||
assert((int32_t)rd >= 0);
|
||||
return rd;
|
||||
}
|
||||
+#endif
|
||||
|
||||
void bdrv_graph_wrlock(void)
|
||||
{
|
||||
GLOBAL_STATE_CODE();
|
||||
+ /*
|
||||
+ * TODO Some callers hold an AioContext lock when this is called, which
|
||||
+ * causes deadlocks. Reenable once the AioContext locking is cleaned up (or
|
||||
+ * AioContext locks are gone).
|
||||
+ */
|
||||
+#if 0
|
||||
assert(!qatomic_read(&has_writer));
|
||||
|
||||
/* Make sure that constantly arriving new I/O doesn't cause starvation */
|
||||
@@ -139,11 +149,13 @@ void bdrv_graph_wrlock(void)
|
||||
} while (reader_count() >= 1);
|
||||
|
||||
bdrv_drain_all_end();
|
||||
+#endif
|
||||
}
|
||||
|
||||
void bdrv_graph_wrunlock(void)
|
||||
{
|
||||
GLOBAL_STATE_CODE();
|
||||
+#if 0
|
||||
QEMU_LOCK_GUARD(&aio_context_list_lock);
|
||||
assert(qatomic_read(&has_writer));
|
||||
|
||||
@@ -155,10 +167,13 @@ void bdrv_graph_wrunlock(void)
|
||||
|
||||
/* Wake up all coroutine that are waiting to read the graph */
|
||||
qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
void coroutine_fn bdrv_graph_co_rdlock(void)
|
||||
{
|
||||
+ /* TODO Reenable when wrlock is reenabled */
|
||||
+#if 0
|
||||
BdrvGraphRWlock *bdrv_graph;
|
||||
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
|
||||
|
||||
@@ -223,10 +238,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void)
|
||||
qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
|
||||
}
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
void coroutine_fn bdrv_graph_co_rdunlock(void)
|
||||
{
|
||||
+#if 0
|
||||
BdrvGraphRWlock *bdrv_graph;
|
||||
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
|
||||
|
||||
@@ -249,6 +266,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void)
|
||||
if (qatomic_read(&has_writer)) {
|
||||
aio_wait_kick();
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
void bdrv_graph_rdlock_main_loop(void)
|
||||
@@ -266,13 +284,19 @@ void bdrv_graph_rdunlock_main_loop(void)
|
||||
void assert_bdrv_graph_readable(void)
|
||||
{
|
||||
/* reader_count() is slow due to aio_context_list_lock lock contention */
|
||||
+ /* TODO Reenable when wrlock is reenabled */
|
||||
+#if 0
|
||||
#ifdef CONFIG_DEBUG_GRAPH_LOCK
|
||||
assert(qemu_in_main_thread() || reader_count());
|
||||
#endif
|
||||
+#endif
|
||||
}
|
||||
|
||||
void assert_bdrv_graph_writable(void)
|
||||
{
|
||||
assert(qemu_in_main_thread());
|
||||
+ /* TODO Reenable when wrlock is reenabled */
|
||||
+#if 0
|
||||
assert(qatomic_read(&has_writer));
|
||||
+#endif
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,94 @@
|
||||
From a5b4eec5f456b1ca3fe753e1d76f96cf3f8914ef Mon Sep 17 00:00:00 2001
|
||||
From: David Hildenbrand <david@redhat.com>
|
||||
Date: Wed, 17 Jan 2024 14:55:53 +0100
|
||||
Subject: [PATCH 01/22] hv-balloon: use get_min_alignment() to express 32 GiB
|
||||
alignment
|
||||
|
||||
RH-Author: David Hildenbrand <david@redhat.com>
|
||||
RH-MergeRequest: 221: memory-device: reintroduce memory region size check
|
||||
RH-Jira: RHEL-20341
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-Commit: [1/2] cbe092fe549552928270892253b31cd8fe199825
|
||||
|
||||
https://issues.redhat.com/browse/RHEL-20341
|
||||
|
||||
Let's implement the get_min_alignment() callback for memory devices, and
|
||||
copy for the device memory region the alignment of the host memory
|
||||
region. This mimics what virtio-mem does, and allows for re-introducing
|
||||
proper alignment checks for the memory region size (where we don't care
|
||||
about additional device requirements) in memory device core.
|
||||
|
||||
Message-ID: <20240117135554.787344-2-david@redhat.com>
|
||||
Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
|
||||
Signed-off-by: David Hildenbrand <david@redhat.com>
|
||||
(cherry picked from commit f77c5f38f49c71bc14cf1019ac92b0b95f572414)
|
||||
Signed-off-by: David Hildenbrand <david@redhat.com>
|
||||
---
|
||||
hw/hyperv/hv-balloon.c | 37 +++++++++++++++++++++----------------
|
||||
1 file changed, 21 insertions(+), 16 deletions(-)
|
||||
|
||||
diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c
|
||||
index 66f297c1d7..0829c495b0 100644
|
||||
--- a/hw/hyperv/hv-balloon.c
|
||||
+++ b/hw/hyperv/hv-balloon.c
|
||||
@@ -1476,22 +1476,7 @@ static void hv_balloon_ensure_mr(HvBalloon *balloon)
|
||||
balloon->mr = g_new0(MemoryRegion, 1);
|
||||
memory_region_init(balloon->mr, OBJECT(balloon), TYPE_HV_BALLOON,
|
||||
memory_region_size(hostmem_mr));
|
||||
-
|
||||
- /*
|
||||
- * The VM can indicate an alignment up to 32 GiB. Memory device core can
|
||||
- * usually only handle/guarantee 1 GiB alignment. The user will have to
|
||||
- * specify a larger maxmem eventually.
|
||||
- *
|
||||
- * The memory device core will warn the user in case maxmem might have to be
|
||||
- * increased and will fail plugging the device if there is not sufficient
|
||||
- * space after alignment.
|
||||
- *
|
||||
- * TODO: we could do the alignment ourselves in a slightly bigger region.
|
||||
- * But this feels better, although the warning might be annoying. Maybe
|
||||
- * we can optimize that in the future (e.g., with such a device on the
|
||||
- * cmdline place/size the device memory region differently.
|
||||
- */
|
||||
- balloon->mr->align = MAX(32 * GiB, memory_region_get_alignment(hostmem_mr));
|
||||
+ balloon->mr->align = memory_region_get_alignment(hostmem_mr);
|
||||
}
|
||||
|
||||
static void hv_balloon_free_mr(HvBalloon *balloon)
|
||||
@@ -1653,6 +1638,25 @@ static MemoryRegion *hv_balloon_md_get_memory_region(MemoryDeviceState *md,
|
||||
return balloon->mr;
|
||||
}
|
||||
|
||||
+static uint64_t hv_balloon_md_get_min_alignment(const MemoryDeviceState *md)
|
||||
+{
|
||||
+ /*
|
||||
+ * The VM can indicate an alignment up to 32 GiB. Memory device core can
|
||||
+ * usually only handle/guarantee 1 GiB alignment. The user will have to
|
||||
+ * specify a larger maxmem eventually.
|
||||
+ *
|
||||
+ * The memory device core will warn the user in case maxmem might have to be
|
||||
+ * increased and will fail plugging the device if there is not sufficient
|
||||
+ * space after alignment.
|
||||
+ *
|
||||
+ * TODO: we could do the alignment ourselves in a slightly bigger region.
|
||||
+ * But this feels better, although the warning might be annoying. Maybe
|
||||
+ * we can optimize that in the future (e.g., with such a device on the
|
||||
+ * cmdline place/size the device memory region differently.
|
||||
+ */
|
||||
+ return 32 * GiB;
|
||||
+}
|
||||
+
|
||||
static void hv_balloon_md_fill_device_info(const MemoryDeviceState *md,
|
||||
MemoryDeviceInfo *info)
|
||||
{
|
||||
@@ -1765,5 +1769,6 @@ static void hv_balloon_class_init(ObjectClass *klass, void *data)
|
||||
mdc->get_memory_region = hv_balloon_md_get_memory_region;
|
||||
mdc->decide_memslots = hv_balloon_decide_memslots;
|
||||
mdc->get_memslots = hv_balloon_get_memslots;
|
||||
+ mdc->get_min_alignment = hv_balloon_md_get_min_alignment;
|
||||
mdc->fill_device_info = hv_balloon_md_fill_device_info;
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,40 +0,0 @@
|
||||
From b4645e7682aa1bde6f89df0eff2a9de83720eecc Mon Sep 17 00:00:00 2001
|
||||
From: Ani Sinha <anisinha@redhat.com>
|
||||
Date: Tue, 2 May 2023 15:51:53 +0530
|
||||
Subject: [PATCH 3/3] hw/acpi: Mark acpi blobs as resizable on RHEL pc machines
|
||||
version 7.6 and above
|
||||
|
||||
RH-Author: Ani Sinha <None>
|
||||
RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3
|
||||
RH-Bugzilla: 1934134
|
||||
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Commit: [2/2] 95d443af6e75c569d89d04d028012c3c56c0c3a4 (anisinha/centos-qemu-kvm)
|
||||
|
||||
Please look at QEMU upstream commit
|
||||
1af507756bae7 ("hw/acpi: limit warning on acpi table size to pc machines older than version 2.3")
|
||||
This patch adapts the above change so that it applies to RHEL pc machines of
|
||||
version 7.6 and newer. These are the machine types that are currently supported
|
||||
in RHEL. Q35 machines are not affected.
|
||||
|
||||
Signed-off-by: Ani Sinha <anisinha@redhat.com>
|
||||
---
|
||||
hw/i386/pc_piix.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index 4d5880e249..6c7be628e1 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -914,6 +914,7 @@ static void pc_machine_rhel7_options(MachineClass *m)
|
||||
m->default_machine_opts = "firmware=bios-256k.bin,hpet=off";
|
||||
pcmc->default_nic_model = "e1000";
|
||||
pcmc->pci_root_uid = 0;
|
||||
+ pcmc->resizable_acpi_blob = true;
|
||||
m->default_display = "std";
|
||||
m->no_parallel = 1;
|
||||
m->numa_mem_supported = true;
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,101 +0,0 @@
|
||||
From 3f70da88788c398877b8ded0b27689530385302b Mon Sep 17 00:00:00 2001
|
||||
From: Ani Sinha <anisinha@redhat.com>
|
||||
Date: Wed, 29 Mar 2023 10:27:26 +0530
|
||||
Subject: [PATCH 2/3] hw/acpi: limit warning on acpi table size to pc machines
|
||||
older than version 2.3
|
||||
|
||||
RH-Author: Ani Sinha <None>
|
||||
RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3
|
||||
RH-Bugzilla: 1934134
|
||||
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Commit: [1/2] 96c3b6d51e16734eb4e8de52635e0ca036964090 (anisinha/centos-qemu-kvm)
|
||||
|
||||
i440fx machine versions 2.3 and newer supports dynamic ram
|
||||
resizing. See commit a1666142db6233 ("acpi-build: make ROMs RAM blocks resizeable") .
|
||||
Currently supported all q35 machine types (versions 2.4 and newer) supports
|
||||
resizable RAM/ROM blocks.Therefore the warning generated when the ACPI table
|
||||
size exceeds a pre-defined value does not apply to those machine versions.
|
||||
Add a check limiting the warning message to only those machines that does not
|
||||
support expandable ram blocks (that is, i440fx machines with version 2.2
|
||||
and older).
|
||||
|
||||
Signed-off-by: Ani Sinha <anisinha@redhat.com>
|
||||
Message-Id: <20230329045726.14028-1-anisinha@redhat.com>
|
||||
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit 1af507756bae775028c27d30e602e2b9c72cd074)
|
||||
---
|
||||
hw/i386/acpi-build.c | 6 ++++--
|
||||
hw/i386/pc.c | 1 +
|
||||
hw/i386/pc_piix.c | 1 +
|
||||
include/hw/i386/pc.h | 3 +++
|
||||
4 files changed, 9 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
|
||||
index ec857a117e..9bc4d8a981 100644
|
||||
--- a/hw/i386/acpi-build.c
|
||||
+++ b/hw/i386/acpi-build.c
|
||||
@@ -2695,7 +2695,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
|
||||
int legacy_table_size =
|
||||
ROUND_UP(tables_blob->len - aml_len + legacy_aml_len,
|
||||
ACPI_BUILD_ALIGN_SIZE);
|
||||
- if (tables_blob->len > legacy_table_size) {
|
||||
+ if ((tables_blob->len > legacy_table_size) &&
|
||||
+ !pcmc->resizable_acpi_blob) {
|
||||
/* Should happen only with PCI bridges and -M pc-i440fx-2.0. */
|
||||
warn_report("ACPI table size %u exceeds %d bytes,"
|
||||
" migration may not work",
|
||||
@@ -2706,7 +2707,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
|
||||
g_array_set_size(tables_blob, legacy_table_size);
|
||||
} else {
|
||||
/* Make sure we have a buffer in case we need to resize the tables. */
|
||||
- if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) {
|
||||
+ if ((tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) &&
|
||||
+ !pcmc->resizable_acpi_blob) {
|
||||
/* As of QEMU 2.1, this fires with 160 VCPUs and 255 memory slots. */
|
||||
warn_report("ACPI table size %u exceeds %d bytes,"
|
||||
" migration may not work",
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index f216922cee..7db5a2348f 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -2092,6 +2092,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
|
||||
pcmc->acpi_data_size = 0x20000 + 0x8000;
|
||||
pcmc->pvh_enabled = true;
|
||||
pcmc->kvmclock_create_always = true;
|
||||
+ pcmc->resizable_acpi_blob = true;
|
||||
assert(!mc->get_hotplug_handler);
|
||||
mc->async_pf_vmexit_disable = false;
|
||||
mc->get_hotplug_handler = pc_get_hotplug_handler;
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index fc704d783f..4d5880e249 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -750,6 +750,7 @@ static void pc_i440fx_2_2_machine_options(MachineClass *m)
|
||||
compat_props_add(m->compat_props, hw_compat_2_2, hw_compat_2_2_len);
|
||||
compat_props_add(m->compat_props, pc_compat_2_2, pc_compat_2_2_len);
|
||||
pcmc->rsdp_in_ram = false;
|
||||
+ pcmc->resizable_acpi_blob = false;
|
||||
}
|
||||
|
||||
DEFINE_I440FX_MACHINE(v2_2, "pc-i440fx-2.2", pc_compat_2_2_fn,
|
||||
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
|
||||
index d218ad1628..2f514d13d8 100644
|
||||
--- a/include/hw/i386/pc.h
|
||||
+++ b/include/hw/i386/pc.h
|
||||
@@ -130,6 +130,9 @@ struct PCMachineClass {
|
||||
|
||||
/* create kvmclock device even when KVM PV features are not exposed */
|
||||
bool kvmclock_create_always;
|
||||
+
|
||||
+ /* resizable acpi blob compat */
|
||||
+ bool resizable_acpi_blob;
|
||||
};
|
||||
|
||||
#define TYPE_PC_MACHINE "generic-pc-machine"
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,42 @@
|
||||
From ceaee9c4372bbdc4196cb6808515047388f7aa26 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Tue, 21 Nov 2023 16:44:18 +0800
|
||||
Subject: [PATCH 039/101] hw/arm: Activate IOMMUFD for virt machines
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [38/67] 0a059ae661616e95eb8455e17f35774495cae8e7 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||||
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit 0970238343af45a8b547695bfc22f18d4eb7da7e)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/arm/Kconfig | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
|
||||
index 3ada335a24..660f49db49 100644
|
||||
--- a/hw/arm/Kconfig
|
||||
+++ b/hw/arm/Kconfig
|
||||
@@ -8,6 +8,7 @@ config ARM_VIRT
|
||||
imply TPM_TIS_SYSBUS
|
||||
imply TPM_TIS_I2C
|
||||
imply NVDIMM
|
||||
+ imply IOMMUFD
|
||||
select ARM_GIC
|
||||
select ACPI
|
||||
select ARM_SMMUV3
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,60 +0,0 @@
|
||||
From 7b57aec372fc238cbaafe86557f9fb4b560895b1 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Tue, 27 Jun 2023 20:20:09 +1000
|
||||
Subject: [PATCH 2/6] hw/arm: Validate cluster and NUMA node boundary
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines
|
||||
RH-Bugzilla: 2171363
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [2/3] fcac7ea85d9f73613989903c642fc1bf6c51946b
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363
|
||||
|
||||
There are two ARM machines where NUMA is aware: 'virt' and 'sbsa-ref'.
|
||||
Both of them are required to follow cluster-NUMA-node boundary. To
|
||||
enable the validation to warn about the irregular configuration where
|
||||
multiple CPUs in one cluster have been associated with different NUMA
|
||||
nodes.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Message-Id: <20230509002739.18388-3-gshan@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit fecff672351ace5e39adf7dbcf7a8ee748b201cb)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/sbsa-ref.c | 2 ++
|
||||
hw/arm/virt.c | 2 ++
|
||||
2 files changed, 4 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
|
||||
index 0b93558dde..efb380e7c8 100644
|
||||
--- a/hw/arm/sbsa-ref.c
|
||||
+++ b/hw/arm/sbsa-ref.c
|
||||
@@ -864,6 +864,8 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data)
|
||||
mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids;
|
||||
mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props;
|
||||
mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id;
|
||||
+ /* platform instead of architectural choice */
|
||||
+ mc->cpu_cluster_has_numa_boundary = true;
|
||||
}
|
||||
|
||||
static const TypeInfo sbsa_ref_info = {
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 9be53e9355..df6a0231bc 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3083,6 +3083,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
|
||||
mc->smp_props.clusters_supported = true;
|
||||
mc->auto_enable_numa_with_memhp = true;
|
||||
mc->auto_enable_numa_with_memdev = true;
|
||||
+ /* platform instead of architectural choice */
|
||||
+ mc->cpu_cluster_has_numa_boundary = true;
|
||||
mc->default_ram_id = "mach-virt.ram";
|
||||
|
||||
object_class_property_add(oc, "acpi", "OnOffAuto",
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,166 +0,0 @@
|
||||
From a3412036477e8c91e0b71fcd91de4e24a9904077 Mon Sep 17 00:00:00 2001
|
||||
From: Peter Maydell <peter.maydell@linaro.org>
|
||||
Date: Tue, 25 Jul 2023 10:56:51 +0100
|
||||
Subject: [PATCH 09/14] hw/arm/smmu: Handle big-endian hosts correctly
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes
|
||||
RH-Bugzilla: 2229133
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
RH-Commit: [3/3] df9c8d228b25273e0c4927a10b21e66fb4bef5f0 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133
|
||||
|
||||
The implementation of the SMMUv3 has multiple places where it reads a
|
||||
data structure from the guest and directly operates on it without
|
||||
doing a guest-to-host endianness conversion. Since all SMMU data
|
||||
structures are little-endian, this means that the SMMU doesn't work
|
||||
on a big-endian host. In particular, this causes the Avocado test
|
||||
machine_aarch64_virt.py:Aarch64VirtMachine.test_alpine_virt_tcg_gic_max
|
||||
to fail on an s390x host.
|
||||
|
||||
Add appropriate byte-swapping on reads and writes of guest in-memory
|
||||
data structures so that the device works correctly on big-endian
|
||||
hosts.
|
||||
|
||||
As part of this we constrain queue_read() to operate only on Cmd
|
||||
structs and queue_write() on Evt structs, because in practice these
|
||||
are the only data structures the two functions are used with, and we
|
||||
need to know what the data structure is to be able to byte-swap its
|
||||
parts correctly.
|
||||
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Tested-by: Thomas Huth <thuth@redhat.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Message-id: 20230717132641.764660-1-peter.maydell@linaro.org
|
||||
Cc: qemu-stable@nongnu.org
|
||||
(cherry picked from commit c6445544d4cea2628fbad3bad09f3d3a03c749d3)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/arm/smmu-common.c | 3 +--
|
||||
hw/arm/smmuv3.c | 39 +++++++++++++++++++++++++++++++--------
|
||||
2 files changed, 32 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
|
||||
index e7f1c1f219..daa02ce798 100644
|
||||
--- a/hw/arm/smmu-common.c
|
||||
+++ b/hw/arm/smmu-common.c
|
||||
@@ -192,8 +192,7 @@ static int get_pte(dma_addr_t baseaddr, uint32_t index, uint64_t *pte,
|
||||
dma_addr_t addr = baseaddr + index * sizeof(*pte);
|
||||
|
||||
/* TODO: guarantee 64-bit single-copy atomicity */
|
||||
- ret = dma_memory_read(&address_space_memory, addr, pte, sizeof(*pte),
|
||||
- MEMTXATTRS_UNSPECIFIED);
|
||||
+ ret = ldq_le_dma(&address_space_memory, addr, pte, MEMTXATTRS_UNSPECIFIED);
|
||||
|
||||
if (ret != MEMTX_OK) {
|
||||
info->type = SMMU_PTW_ERR_WALK_EABT;
|
||||
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
|
||||
index 270c80b665..cfb56725a6 100644
|
||||
--- a/hw/arm/smmuv3.c
|
||||
+++ b/hw/arm/smmuv3.c
|
||||
@@ -98,20 +98,34 @@ static void smmuv3_write_gerrorn(SMMUv3State *s, uint32_t new_gerrorn)
|
||||
trace_smmuv3_write_gerrorn(toggled & pending, s->gerrorn);
|
||||
}
|
||||
|
||||
-static inline MemTxResult queue_read(SMMUQueue *q, void *data)
|
||||
+static inline MemTxResult queue_read(SMMUQueue *q, Cmd *cmd)
|
||||
{
|
||||
dma_addr_t addr = Q_CONS_ENTRY(q);
|
||||
+ MemTxResult ret;
|
||||
+ int i;
|
||||
|
||||
- return dma_memory_read(&address_space_memory, addr, data, q->entry_size,
|
||||
- MEMTXATTRS_UNSPECIFIED);
|
||||
+ ret = dma_memory_read(&address_space_memory, addr, cmd, sizeof(Cmd),
|
||||
+ MEMTXATTRS_UNSPECIFIED);
|
||||
+ if (ret != MEMTX_OK) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+ for (i = 0; i < ARRAY_SIZE(cmd->word); i++) {
|
||||
+ le32_to_cpus(&cmd->word[i]);
|
||||
+ }
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
-static MemTxResult queue_write(SMMUQueue *q, void *data)
|
||||
+static MemTxResult queue_write(SMMUQueue *q, Evt *evt_in)
|
||||
{
|
||||
dma_addr_t addr = Q_PROD_ENTRY(q);
|
||||
MemTxResult ret;
|
||||
+ Evt evt = *evt_in;
|
||||
+ int i;
|
||||
|
||||
- ret = dma_memory_write(&address_space_memory, addr, data, q->entry_size,
|
||||
+ for (i = 0; i < ARRAY_SIZE(evt.word); i++) {
|
||||
+ cpu_to_le32s(&evt.word[i]);
|
||||
+ }
|
||||
+ ret = dma_memory_write(&address_space_memory, addr, &evt, sizeof(Evt),
|
||||
MEMTXATTRS_UNSPECIFIED);
|
||||
if (ret != MEMTX_OK) {
|
||||
return ret;
|
||||
@@ -291,7 +305,7 @@ static void smmuv3_init_regs(SMMUv3State *s)
|
||||
static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf,
|
||||
SMMUEventInfo *event)
|
||||
{
|
||||
- int ret;
|
||||
+ int ret, i;
|
||||
|
||||
trace_smmuv3_get_ste(addr);
|
||||
/* TODO: guarantee 64-bit single-copy atomicity */
|
||||
@@ -304,6 +318,9 @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf,
|
||||
event->u.f_ste_fetch.addr = addr;
|
||||
return -EINVAL;
|
||||
}
|
||||
+ for (i = 0; i < ARRAY_SIZE(buf->word); i++) {
|
||||
+ le32_to_cpus(&buf->word[i]);
|
||||
+ }
|
||||
return 0;
|
||||
|
||||
}
|
||||
@@ -313,7 +330,7 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
|
||||
CD *buf, SMMUEventInfo *event)
|
||||
{
|
||||
dma_addr_t addr = STE_CTXPTR(ste);
|
||||
- int ret;
|
||||
+ int ret, i;
|
||||
|
||||
trace_smmuv3_get_cd(addr);
|
||||
/* TODO: guarantee 64-bit single-copy atomicity */
|
||||
@@ -326,6 +343,9 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
|
||||
event->u.f_ste_fetch.addr = addr;
|
||||
return -EINVAL;
|
||||
}
|
||||
+ for (i = 0; i < ARRAY_SIZE(buf->word); i++) {
|
||||
+ le32_to_cpus(&buf->word[i]);
|
||||
+ }
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -407,7 +427,7 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
|
||||
return -EINVAL;
|
||||
}
|
||||
if (s->features & SMMU_FEATURE_2LVL_STE) {
|
||||
- int l1_ste_offset, l2_ste_offset, max_l2_ste, span;
|
||||
+ int l1_ste_offset, l2_ste_offset, max_l2_ste, span, i;
|
||||
dma_addr_t l1ptr, l2ptr;
|
||||
STEDesc l1std;
|
||||
|
||||
@@ -431,6 +451,9 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
|
||||
event->u.f_ste_fetch.addr = l1ptr;
|
||||
return -EINVAL;
|
||||
}
|
||||
+ for (i = 0; i < ARRAY_SIZE(l1std.word); i++) {
|
||||
+ le32_to_cpus(&l1std.word[i]);
|
||||
+ }
|
||||
|
||||
span = L1STD_SPAN(&l1std);
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,88 @@
|
||||
From e670722b9a6460d41497688d820d5a9a9b51d8e9 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Tue, 9 Jan 2024 11:36:42 +1000
|
||||
Subject: [PATCH 001/101] hw/arm/virt: Add properties to disable high memory
|
||||
regions
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 210: hw/arm/virt: Add properties to disable high memory regions
|
||||
RH-Jira: RHEL-19738
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [1/1] 4097ba5133a67126e30b84202cb40df4e019c5f4
|
||||
|
||||
Upstream: RHEL-only
|
||||
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=57927352
|
||||
|
||||
There are 3 high memory regions for GICv3 or GICv4 redistributor, PCI
|
||||
ECAM and PCI MMIO. Each of them has a property introduced by upstream
|
||||
commit 6a48c64eec ("hw/arm/virt: Add properties to disable high memory
|
||||
regions") so that the corresponding high memory region can be disabled.
|
||||
|
||||
It's notable that another property ("compact-highmem") introduced by
|
||||
upstream commit f40408a9fe ("hw/arm/virt: Add 'compact-highmem' property")
|
||||
so that the compact high memory region layout during assignment can be
|
||||
disabled, compatible to the old machine types. However, we don't have
|
||||
the compatible issue since the compact high memory region layout is
|
||||
always kept as disabled until RHEL9.2.0 machine type and onwards.
|
||||
|
||||
Expose those 3 properties: "highmem-redists", "highmem-ecam" and
|
||||
"highmem-mmio". The property "compact-highmem" is kept as hidden.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 24 +++++++++++++++++++++++-
|
||||
1 file changed, 23 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 5cab00b4cd..60f117f0d2 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -2456,6 +2456,7 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp)
|
||||
|
||||
vms->highmem_compact = value;
|
||||
}
|
||||
+#endif /* disabled for RHEL */
|
||||
|
||||
static bool virt_get_highmem_redists(Object *obj, Error **errp)
|
||||
{
|
||||
@@ -2498,7 +2499,6 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp)
|
||||
|
||||
vms->highmem_mmio = value;
|
||||
}
|
||||
-#endif /* disabled for RHEL */
|
||||
|
||||
static bool virt_get_its(Object *obj, Error **errp)
|
||||
{
|
||||
@@ -3521,6 +3521,28 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data)
|
||||
"Set on/off to enable/disable using "
|
||||
"physical address space above 32 bits");
|
||||
|
||||
+ object_class_property_add_bool(oc, "highmem-redists",
|
||||
+ virt_get_highmem_redists,
|
||||
+ virt_set_highmem_redists);
|
||||
+ object_class_property_set_description(oc, "highmem-redists",
|
||||
+ "Set on/off to enable/disable high "
|
||||
+ "memory region for GICv3 or GICv4 "
|
||||
+ "redistributor");
|
||||
+
|
||||
+ object_class_property_add_bool(oc, "highmem-ecam",
|
||||
+ virt_get_highmem_ecam,
|
||||
+ virt_set_highmem_ecam);
|
||||
+ object_class_property_set_description(oc, "highmem-ecam",
|
||||
+ "Set on/off to enable/disable high "
|
||||
+ "memory region for PCI ECAM");
|
||||
+
|
||||
+ object_class_property_add_bool(oc, "highmem-mmio",
|
||||
+ virt_get_highmem_mmio,
|
||||
+ virt_set_highmem_mmio);
|
||||
+ object_class_property_set_description(oc, "highmem-mmio",
|
||||
+ "Set on/off to enable/disable high "
|
||||
+ "memory region for PCI MMIO");
|
||||
+
|
||||
object_class_property_add_str(oc, "gic-version", virt_get_gic_version,
|
||||
virt_set_gic_version);
|
||||
object_class_property_set_description(oc, "gic-version",
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,132 @@
|
||||
From 3f58194f8642a71c47d91d3c00a34faf44ea2c11 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Wed, 3 Jan 2024 05:57:38 -0500
|
||||
Subject: [PATCH] hw/arm/virt: Fix compats
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 209: hw/arm/virt: Fix compats
|
||||
RH-Jira: RHEL-17168
|
||||
RH-Acked-by: Gavin Shan <gshan@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [1/1] bcdf6493bbd6d7b52b0b88ff44441d22aeddfde2 (eauger1/centos-qemu-kvm)
|
||||
|
||||
arm_rhel_compat is not added for virt-rhel9.4.0 machine causing
|
||||
the efi-virtio.rom to be looked for when instantiating a virtio-net-pci
|
||||
device and it won't be found since not shipped on ARM. This is a
|
||||
regression compared to 9.2.
|
||||
|
||||
Actually we do not need any rom file for any virtio-net-pci variant
|
||||
because edk2 already brings the functionality. So for 9.4 onwards, we
|
||||
want to set romfiles to "" for all of them.
|
||||
|
||||
However at the moment we apply arm_rhel_compat from the latest
|
||||
rhel*_virt_options(). This is not aligned with the generic compat
|
||||
usage which sets compats for a given machine type to accomodate for
|
||||
changes that occured after its advent. Here we are somehow abusing
|
||||
the compat infra to set general driver options that should apply for
|
||||
all machines. On top of that this is really error prone and we have
|
||||
forgotten to add arm_rhel_compat several times in the past.
|
||||
|
||||
So let's introduce set_arm_rhel_compat() being called before any
|
||||
*virt_options in the non abstract machine class. That way the setting
|
||||
will apply to any machine type without any need to add it in any
|
||||
future machine types.
|
||||
|
||||
For < 9.4 machines we don't really care keeping non void romfiles
|
||||
for transitional and non transitional devices because anyway this was
|
||||
not working. So let's keep things simple and apply the new defaults for
|
||||
all RHEL9 machine types.
|
||||
|
||||
Finally, to follow the generic pattern we should set hw_compat_rhel_9_0
|
||||
in 9.0 machine as it is done on x86 or ccw. This has no consequence on
|
||||
aarch64 because it only contains x86 stuff but that helps understanding
|
||||
the consistency.
|
||||
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 43 +++++++++++++++++++++++++++++--------------
|
||||
1 file changed, 29 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 0b17c94ad7..5cab00b4cd 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -111,11 +111,39 @@
|
||||
DEFINE_VIRT_MACHINE_LATEST(major, minor, false)
|
||||
#endif /* disabled for RHEL */
|
||||
|
||||
+/*
|
||||
+ * This variable is for changes to properties that are RHEL specific,
|
||||
+ * different to the current upstream and to be applied to the latest
|
||||
+ * machine type. They may be overriden by older machine compats.
|
||||
+ *
|
||||
+ * virtio-net-pci variant romfiles are not needed because edk2 does
|
||||
+ * fully support the pxe boot. Besides virtio romfiles are not shipped
|
||||
+ * on rhel/aarch64.
|
||||
+ */
|
||||
+GlobalProperty arm_rhel_compat[] = {
|
||||
+ {"virtio-net-pci", "romfile", "" },
|
||||
+ {"virtio-net-pci-transitional", "romfile", "" },
|
||||
+ {"virtio-net-pci-non-transitional", "romfile", "" },
|
||||
+};
|
||||
+const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat);
|
||||
+
|
||||
+/*
|
||||
+ * This cannot be called from the rhel_virt_class_init() because
|
||||
+ * TYPE_RHEL_MACHINE is abstract and mc->compat_props g_ptr_array_new()
|
||||
+ * only is called on virt-rhelm.n.s non abstract class init.
|
||||
+ */
|
||||
+static void arm_rhel_compat_set(MachineClass *mc)
|
||||
+{
|
||||
+ compat_props_add(mc->compat_props, arm_rhel_compat,
|
||||
+ arm_rhel_compat_len);
|
||||
+}
|
||||
+
|
||||
#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \
|
||||
static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \
|
||||
void *data) \
|
||||
{ \
|
||||
MachineClass *mc = MACHINE_CLASS(oc); \
|
||||
+ arm_rhel_compat_set(mc); \
|
||||
rhel##m##n##s##_virt_options(mc); \
|
||||
mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \
|
||||
if (latest) { \
|
||||
@@ -139,19 +167,6 @@
|
||||
#define DEFINE_RHEL_MACHINE(major, minor, subminor) \
|
||||
DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false)
|
||||
|
||||
-/* This variable is for changes to properties that are RHEL specific,
|
||||
- * different to the current upstream and to be applied to the latest
|
||||
- * machine type.
|
||||
- */
|
||||
-GlobalProperty arm_rhel_compat[] = {
|
||||
- {
|
||||
- .driver = "virtio-net-pci",
|
||||
- .property = "romfile",
|
||||
- .value = "",
|
||||
- },
|
||||
-};
|
||||
-const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat);
|
||||
-
|
||||
/* Number of external interrupt lines to configure the GIC with */
|
||||
#define NUM_IRQS 256
|
||||
|
||||
@@ -3639,7 +3654,6 @@ static void rhel920_virt_options(MachineClass *mc)
|
||||
{
|
||||
rhel940_virt_options(mc);
|
||||
|
||||
- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
|
||||
@@ -3653,6 +3667,7 @@ static void rhel900_virt_options(MachineClass *mc)
|
||||
rhel920_virt_options(mc);
|
||||
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
|
||||
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len);
|
||||
|
||||
/* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */
|
||||
vmc->no_tcg_lpa2 = true;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,41 +0,0 @@
|
||||
From 022529f6d0ee306da857825c72a98bf7ddf5de22 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Tue, 27 Jun 2023 20:20:09 +1000
|
||||
Subject: [PATCH 3/6] hw/arm/virt: Validate cluster and NUMA node boundary for
|
||||
RHEL machines
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines
|
||||
RH-Bugzilla: 2171363
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [3/3] a396c499259b566861ca007b01f8539bf6113711
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363
|
||||
Upstream Status: RHEL only
|
||||
|
||||
Set mc->cpu_cluster_has_numa_boundary to true so that the boundary of
|
||||
CPU cluster and NUMA node will be validated for 'virt-rhel*' machines.
|
||||
A warning message will be printed if the boundary is broken.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index df6a0231bc..faf68488d5 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3530,6 +3530,8 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data)
|
||||
mc->smp_props.clusters_supported = true;
|
||||
mc->auto_enable_numa_with_memhp = true;
|
||||
mc->auto_enable_numa_with_memdev = true;
|
||||
+ /* platform instead of architectural choice */
|
||||
+ mc->cpu_cluster_has_numa_boundary = true;
|
||||
mc->default_ram_id = "mach-virt.ram";
|
||||
|
||||
object_class_property_add(oc, "acpi", "OnOffAuto",
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,41 @@
|
||||
From 4c1d07995a7afb6fae68a7e7a8b6b6c94fa0a7bb Mon Sep 17 00:00:00 2001
|
||||
From: Cornelia Huck <cohuck@redhat.com>
|
||||
Date: Mon, 12 Feb 2024 10:37:54 +0100
|
||||
Subject: [PATCH 5/6] hw/arm/virt: deprecate virt-rhel9.{0,2}.0 machine types
|
||||
|
||||
RH-Author: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-MergeRequest: 225: hw/arm/virt: deprecate virt-rhel9.{0,2}.0 machine types
|
||||
RH-Jira: RHEL-24988
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [1/1] f15579db44808fa8a2d7bc01b3915aa59c064411 (cohuck/qemu-kvm-c9s)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-24988
|
||||
Upstream: RHEL only
|
||||
|
||||
We do not plan to support any machine types prior to 9.4.0; leave them
|
||||
in, but mark as deprecated.
|
||||
|
||||
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 60f117f0d2..943c563391 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3679,6 +3679,10 @@ static void rhel920_virt_options(MachineClass *mc)
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
|
||||
+
|
||||
+ /* RHEL 9.4 is the first supported release */
|
||||
+ mc->deprecation_reason =
|
||||
+ "machine types for versions prior to 9.4 are deprecated";
|
||||
}
|
||||
DEFINE_RHEL_MACHINE(9, 2, 0)
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,41 @@
|
||||
From 7a6be312c11911bdd2ce82566be22a3e014947c2 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Tue, 21 Nov 2023 16:44:20 +0800
|
||||
Subject: [PATCH 041/101] hw/i386: Activate IOMMUFD for q35 machines
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [40/67] b15764ab24fd57389a8d219736613484acd7d29e (eauger1/centos-qemu-kvm)
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit 64ad06f6eba66c514477f490bcba409439a480d8)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/i386/Kconfig | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
|
||||
index 55850791df..a1846be6f7 100644
|
||||
--- a/hw/i386/Kconfig
|
||||
+++ b/hw/i386/Kconfig
|
||||
@@ -95,6 +95,7 @@ config Q35
|
||||
imply E1000E_PCI_EXPRESS
|
||||
imply VMPORT
|
||||
imply VMMOUSE
|
||||
+ imply IOMMUFD
|
||||
select PC_PCI
|
||||
select PC_ACPI
|
||||
select PCI_EXPRESS_Q35
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,44 +0,0 @@
|
||||
From 491cf9e251026d135f315b7fe0d8771841f06e9f Mon Sep 17 00:00:00 2001
|
||||
From: Leonardo Bras <leobras@redhat.com>
|
||||
Date: Tue, 25 Jul 2023 15:34:45 -0300
|
||||
Subject: [PATCH 8/9] hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type
|
||||
<= pc-q35-rhel9.2.0
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Leonardo Brás <leobras@redhat.com>
|
||||
RH-MergeRequest: 192: hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type <= pc-q35-rhel9.2.0
|
||||
RH-Bugzilla: 2223691
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: quintela1 <quintela@redhat.com>
|
||||
RH-Commit: [1/1] e57816f8ad15a9ce5f342b061c103ae011ec1223 (LeoBras/centos-qemu-kvm)
|
||||
|
||||
This is a downstream-only patch to that sets off the property
|
||||
x-pcie-err-unc-mask for machine types <= pc-q35-rhel9.2.0, allowing
|
||||
live migrations to RHEL9.2 happen successfully.
|
||||
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2223691
|
||||
Fixes: 293a34b4be ("hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine
|
||||
type < 8.0")
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
---
|
||||
hw/core/machine.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 5ea52317b9..6f5117669d 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -62,6 +62,8 @@ GlobalProperty hw_compat_rhel_9_2[] = {
|
||||
{ "virtio-mem", "x-early-migration", "false" },
|
||||
/* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
{ "migration", "x-preempt-pre-7-2", "true" },
|
||||
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
+ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" },
|
||||
};
|
||||
const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2);
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,118 +0,0 @@
|
||||
From 3ac01bb90da12538898f95b2fb4e7f6bc1557eb3 Mon Sep 17 00:00:00 2001
|
||||
From: Leonardo Bras <leobras@redhat.com>
|
||||
Date: Tue, 2 May 2023 21:27:02 -0300
|
||||
Subject: [PATCH 18/21] hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine
|
||||
type < 8.0
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Leonardo Brás <leobras@redhat.com>
|
||||
RH-MergeRequest: 170: hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0
|
||||
RH-Bugzilla: 2189423
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/1] ad62dd5a8567f386770577513c00a0bf36bd3df1 (LeoBras/centos-qemu-kvm)
|
||||
|
||||
Since it's implementation on v8.0.0-rc0, having the PCI_ERR_UNCOR_MASK
|
||||
set for machine types < 8.0 will cause migration to fail if the target
|
||||
QEMU version is < 8.0.0 :
|
||||
|
||||
qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10a read: 40 device: 0 cmask: ff wmask: 0 w1cmask:0
|
||||
qemu-system-x86_64: Failed to load PCIDevice:config
|
||||
qemu-system-x86_64: Failed to load e1000e:parent_obj
|
||||
qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:02.0/e1000e'
|
||||
qemu-system-x86_64: load of migration failed: Invalid argument
|
||||
|
||||
The above test migrated a 7.2 machine type from QEMU master to QEMU 7.2.0,
|
||||
with this cmdline:
|
||||
|
||||
./qemu-system-x86_64 -M pc-q35-7.2 [-incoming XXX]
|
||||
|
||||
In order to fix this, property x-pcie-err-unc-mask was introduced to
|
||||
control when PCI_ERR_UNCOR_MASK is enabled. This property is enabled by
|
||||
default, but is disabled if machine type <= 7.2.
|
||||
|
||||
Fixes: 010746ae1d ("hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register")
|
||||
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
Message-Id: <20230503002701.854329-1-leobras@redhat.com>
|
||||
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Reviewed-by: Juan Quintela <quintela@redhat.com>
|
||||
Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1576
|
||||
Tested-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit 5ed3dabe57dd9f4c007404345e5f5bf0e347317f)
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
---
|
||||
hw/core/machine.c | 1 +
|
||||
hw/pci/pci.c | 2 ++
|
||||
hw/pci/pcie_aer.c | 11 +++++++----
|
||||
include/hw/pci/pci.h | 2 ++
|
||||
4 files changed, 12 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 0e0120b7f2..c28702b690 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -43,6 +43,7 @@ GlobalProperty hw_compat_7_2[] = {
|
||||
{ "e1000e", "migrate-timadj", "off" },
|
||||
{ "virtio-mem", "x-early-migration", "false" },
|
||||
{ "migration", "x-preempt-pre-7-2", "true" },
|
||||
+ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" },
|
||||
};
|
||||
const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
|
||||
|
||||
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
|
||||
index def5000e7b..8ad4349e96 100644
|
||||
--- a/hw/pci/pci.c
|
||||
+++ b/hw/pci/pci.c
|
||||
@@ -79,6 +79,8 @@ static Property pci_props[] = {
|
||||
DEFINE_PROP_STRING("failover_pair_id", PCIDevice,
|
||||
failover_pair_id),
|
||||
DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0),
|
||||
+ DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
|
||||
+ QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
|
||||
DEFINE_PROP_END_OF_LIST()
|
||||
};
|
||||
|
||||
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
|
||||
index 103667c368..374d593ead 100644
|
||||
--- a/hw/pci/pcie_aer.c
|
||||
+++ b/hw/pci/pcie_aer.c
|
||||
@@ -112,10 +112,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset,
|
||||
|
||||
pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
|
||||
PCI_ERR_UNC_SUPPORTED);
|
||||
- pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK,
|
||||
- PCI_ERR_UNC_MASK_DEFAULT);
|
||||
- pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
|
||||
- PCI_ERR_UNC_SUPPORTED);
|
||||
+
|
||||
+ if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) {
|
||||
+ pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK,
|
||||
+ PCI_ERR_UNC_MASK_DEFAULT);
|
||||
+ pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
|
||||
+ PCI_ERR_UNC_SUPPORTED);
|
||||
+ }
|
||||
|
||||
pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
|
||||
PCI_ERR_UNC_SEVERITY_DEFAULT);
|
||||
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
|
||||
index d5a40cd058..6dc6742fc4 100644
|
||||
--- a/include/hw/pci/pci.h
|
||||
+++ b/include/hw/pci/pci.h
|
||||
@@ -207,6 +207,8 @@ enum {
|
||||
QEMU_PCIE_EXTCAP_INIT = (1 << QEMU_PCIE_EXTCAP_INIT_BITNR),
|
||||
#define QEMU_PCIE_CXL_BITNR 10
|
||||
QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR),
|
||||
+#define QEMU_PCIE_ERR_UNC_MASK_BITNR 11
|
||||
+ QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
|
||||
};
|
||||
|
||||
typedef struct PCIINTxRoute {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,116 @@
|
||||
From 84f378c41832602dcf9bad6167b1f532c7c53e37 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Tue, 21 Nov 2023 15:03:55 +0100
|
||||
Subject: [PATCH 048/101] hw/ppc/Kconfig: Imply VFIO_PCI
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [47/67] c1a40cdab9bf62b16cb428d57a20b3e0eaa6de38 (eauger1/centos-qemu-kvm)
|
||||
|
||||
When the legacy and iommufd backends were introduced, a set of common
|
||||
vfio-pci routines were exported in pci.c for both backends to use :
|
||||
|
||||
vfio_pci_pre_reset
|
||||
vfio_pci_get_pci_hot_reset_info
|
||||
vfio_pci_host_match
|
||||
vfio_pci_post_reset
|
||||
|
||||
This introduced a build failure on PPC when --without-default-devices
|
||||
is use because VFIO is always selected in ppc/Kconfig but VFIO_PCI is
|
||||
not.
|
||||
|
||||
Use an 'imply VFIO_PCI' in ppc/Kconfig and bypass compilation of the
|
||||
VFIO EEH hooks routines defined in hw/ppc/spapr_pci_vfio.c with
|
||||
CONFIG_VFIO_PCI.
|
||||
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit 4278df9d1d2383b738338c857406357660f11e42)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/ppc/Kconfig | 2 +-
|
||||
hw/ppc/spapr_pci_vfio.c | 36 ++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 37 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
|
||||
index 56f0475a8e..44263a58c4 100644
|
||||
--- a/hw/ppc/Kconfig
|
||||
+++ b/hw/ppc/Kconfig
|
||||
@@ -3,11 +3,11 @@ config PSERIES
|
||||
imply PCI_DEVICES
|
||||
imply TEST_DEVICES
|
||||
imply VIRTIO_VGA
|
||||
+ imply VFIO_PCI if LINUX # needed by spapr_pci_vfio.c
|
||||
select NVDIMM
|
||||
select DIMM
|
||||
select PCI
|
||||
select SPAPR_VSCSI
|
||||
- select VFIO if LINUX # needed by spapr_pci_vfio.c
|
||||
select XICS
|
||||
select XIVE
|
||||
select MSI_NONBROKEN
|
||||
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
|
||||
index d1d07bec46..76b2a3487b 100644
|
||||
--- a/hw/ppc/spapr_pci_vfio.c
|
||||
+++ b/hw/ppc/spapr_pci_vfio.c
|
||||
@@ -26,10 +26,12 @@
|
||||
#include "hw/pci/pci_device.h"
|
||||
#include "hw/vfio/vfio-common.h"
|
||||
#include "qemu/error-report.h"
|
||||
+#include CONFIG_DEVICES /* CONFIG_VFIO_PCI */
|
||||
|
||||
/*
|
||||
* Interfaces for IBM EEH (Enhanced Error Handling)
|
||||
*/
|
||||
+#ifdef CONFIG_VFIO_PCI
|
||||
static bool vfio_eeh_container_ok(VFIOContainer *container)
|
||||
{
|
||||
/*
|
||||
@@ -314,3 +316,37 @@ int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
|
||||
|
||||
return RTAS_OUT_SUCCESS;
|
||||
}
|
||||
+
|
||||
+#else
|
||||
+
|
||||
+bool spapr_phb_eeh_available(SpaprPhbState *sphb)
|
||||
+{
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+void spapr_phb_vfio_reset(DeviceState *qdev)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
|
||||
+ unsigned int addr, int option)
|
||||
+{
|
||||
+ return RTAS_OUT_NOT_SUPPORTED;
|
||||
+}
|
||||
+
|
||||
+int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state)
|
||||
+{
|
||||
+ return RTAS_OUT_NOT_SUPPORTED;
|
||||
+}
|
||||
+
|
||||
+int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option)
|
||||
+{
|
||||
+ return RTAS_OUT_NOT_SUPPORTED;
|
||||
+}
|
||||
+
|
||||
+int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
|
||||
+{
|
||||
+ return RTAS_OUT_NOT_SUPPORTED;
|
||||
+}
|
||||
+
|
||||
+#endif /* CONFIG_VFIO_PCI */
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,470 +0,0 @@
|
||||
From d1b7a9b25c0df9016cd8e93d40837314b1a81d70 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 08/21] hw: replace most qemu_bh_new calls with
|
||||
qemu_bh_new_guarded
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [4/13] bcbc67dd0023aee2b3a342665237daa83b183c7b (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit f63192b0544af5d3e4d5edfd85ab520fcf671377
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:09 2023 -0400
|
||||
|
||||
hw: replace most qemu_bh_new calls with qemu_bh_new_guarded
|
||||
|
||||
This protects devices from bh->mmio reentrancy issues.
|
||||
|
||||
Thanks: Thomas Huth <thuth@redhat.com> for diagnosing OS X test failure.
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Reviewed-by: Paul Durrant <paul@xen.org>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Message-Id: <20230427211013.2994127-5-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/9pfs/xen-9p-backend.c | 5 ++++-
|
||||
hw/block/dataplane/virtio-blk.c | 3 ++-
|
||||
hw/block/dataplane/xen-block.c | 5 +++--
|
||||
hw/char/virtio-serial-bus.c | 3 ++-
|
||||
hw/display/qxl.c | 9 ++++++---
|
||||
hw/display/virtio-gpu.c | 6 ++++--
|
||||
hw/ide/ahci.c | 3 ++-
|
||||
hw/ide/ahci_internal.h | 1 +
|
||||
hw/ide/core.c | 4 +++-
|
||||
hw/misc/imx_rngc.c | 6 ++++--
|
||||
hw/misc/macio/mac_dbdma.c | 2 +-
|
||||
hw/net/virtio-net.c | 3 ++-
|
||||
hw/nvme/ctrl.c | 6 ++++--
|
||||
hw/scsi/mptsas.c | 3 ++-
|
||||
hw/scsi/scsi-bus.c | 3 ++-
|
||||
hw/scsi/vmw_pvscsi.c | 3 ++-
|
||||
hw/usb/dev-uas.c | 3 ++-
|
||||
hw/usb/hcd-dwc2.c | 3 ++-
|
||||
hw/usb/hcd-ehci.c | 3 ++-
|
||||
hw/usb/hcd-uhci.c | 2 +-
|
||||
hw/usb/host-libusb.c | 6 ++++--
|
||||
hw/usb/redirect.c | 6 ++++--
|
||||
hw/usb/xen-usb.c | 3 ++-
|
||||
hw/virtio/virtio-balloon.c | 5 +++--
|
||||
hw/virtio/virtio-crypto.c | 3 ++-
|
||||
25 files changed, 66 insertions(+), 33 deletions(-)
|
||||
|
||||
diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
|
||||
index 74f3a05f88..0e266c552b 100644
|
||||
--- a/hw/9pfs/xen-9p-backend.c
|
||||
+++ b/hw/9pfs/xen-9p-backend.c
|
||||
@@ -61,6 +61,7 @@ typedef struct Xen9pfsDev {
|
||||
|
||||
int num_rings;
|
||||
Xen9pfsRing *rings;
|
||||
+ MemReentrancyGuard mem_reentrancy_guard;
|
||||
} Xen9pfsDev;
|
||||
|
||||
static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev);
|
||||
@@ -443,7 +444,9 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
|
||||
xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data +
|
||||
XEN_FLEX_RING_SIZE(ring_order);
|
||||
|
||||
- xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]);
|
||||
+ xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh,
|
||||
+ &xen_9pdev->rings[i],
|
||||
+ &xen_9pdev->mem_reentrancy_guard);
|
||||
xen_9pdev->rings[i].out_cons = 0;
|
||||
xen_9pdev->rings[i].out_size = 0;
|
||||
xen_9pdev->rings[i].inprogress = false;
|
||||
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
|
||||
index b28d81737e..a6202997ee 100644
|
||||
--- a/hw/block/dataplane/virtio-blk.c
|
||||
+++ b/hw/block/dataplane/virtio-blk.c
|
||||
@@ -127,7 +127,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
|
||||
} else {
|
||||
s->ctx = qemu_get_aio_context();
|
||||
}
|
||||
- s->bh = aio_bh_new(s->ctx, notify_guest_bh, s);
|
||||
+ s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s,
|
||||
+ &DEVICE(vdev)->mem_reentrancy_guard);
|
||||
s->batch_notify_vqs = bitmap_new(conf->num_queues);
|
||||
|
||||
*dataplane = s;
|
||||
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
|
||||
index 734da42ea7..d8bc39d359 100644
|
||||
--- a/hw/block/dataplane/xen-block.c
|
||||
+++ b/hw/block/dataplane/xen-block.c
|
||||
@@ -633,8 +633,9 @@ XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev,
|
||||
} else {
|
||||
dataplane->ctx = qemu_get_aio_context();
|
||||
}
|
||||
- dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh,
|
||||
- dataplane);
|
||||
+ dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh,
|
||||
+ dataplane,
|
||||
+ &DEVICE(xendev)->mem_reentrancy_guard);
|
||||
|
||||
return dataplane;
|
||||
}
|
||||
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
|
||||
index 7d4601cb5d..dd619f0731 100644
|
||||
--- a/hw/char/virtio-serial-bus.c
|
||||
+++ b/hw/char/virtio-serial-bus.c
|
||||
@@ -985,7 +985,8 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp)
|
||||
return;
|
||||
}
|
||||
|
||||
- port->bh = qemu_bh_new(flush_queued_data_bh, port);
|
||||
+ port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
port->elem = NULL;
|
||||
}
|
||||
|
||||
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
|
||||
index 80ce1e9a93..f1c0eb7dfc 100644
|
||||
--- a/hw/display/qxl.c
|
||||
+++ b/hw/display/qxl.c
|
||||
@@ -2201,11 +2201,14 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp)
|
||||
|
||||
qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl);
|
||||
|
||||
- qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl);
|
||||
+ qxl->update_irq = qemu_bh_new_guarded(qxl_update_irq_bh, qxl,
|
||||
+ &DEVICE(qxl)->mem_reentrancy_guard);
|
||||
qxl_reset_state(qxl);
|
||||
|
||||
- qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl);
|
||||
- qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd);
|
||||
+ qxl->update_area_bh = qemu_bh_new_guarded(qxl_render_update_area_bh, qxl,
|
||||
+ &DEVICE(qxl)->mem_reentrancy_guard);
|
||||
+ qxl->ssd.cursor_bh = qemu_bh_new_guarded(qemu_spice_cursor_refresh_bh, &qxl->ssd,
|
||||
+ &DEVICE(qxl)->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static void qxl_realize_primary(PCIDevice *dev, Error **errp)
|
||||
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
|
||||
index 5e15c79b94..66ac9b6cc5 100644
|
||||
--- a/hw/display/virtio-gpu.c
|
||||
+++ b/hw/display/virtio-gpu.c
|
||||
@@ -1339,8 +1339,10 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
|
||||
|
||||
g->ctrl_vq = virtio_get_queue(vdev, 0);
|
||||
g->cursor_vq = virtio_get_queue(vdev, 1);
|
||||
- g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g);
|
||||
- g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g);
|
||||
+ g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g,
|
||||
+ &qdev->mem_reentrancy_guard);
|
||||
+ g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g,
|
||||
+ &qdev->mem_reentrancy_guard);
|
||||
QTAILQ_INIT(&g->reslist);
|
||||
QTAILQ_INIT(&g->cmdq);
|
||||
QTAILQ_INIT(&g->fenceq);
|
||||
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
|
||||
index 55902e1df7..4e76d6b191 100644
|
||||
--- a/hw/ide/ahci.c
|
||||
+++ b/hw/ide/ahci.c
|
||||
@@ -1509,7 +1509,8 @@ static void ahci_cmd_done(const IDEDMA *dma)
|
||||
ahci_write_fis_d2h(ad);
|
||||
|
||||
if (ad->port_regs.cmd_issue && !ad->check_bh) {
|
||||
- ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad);
|
||||
+ ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad,
|
||||
+ &ad->mem_reentrancy_guard);
|
||||
qemu_bh_schedule(ad->check_bh);
|
||||
}
|
||||
}
|
||||
diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h
|
||||
index 303fcd7235..2480455372 100644
|
||||
--- a/hw/ide/ahci_internal.h
|
||||
+++ b/hw/ide/ahci_internal.h
|
||||
@@ -321,6 +321,7 @@ struct AHCIDevice {
|
||||
bool init_d2h_sent;
|
||||
AHCICmdHdr *cur_cmd;
|
||||
NCQTransferState ncq_tfs[AHCI_MAX_CMDS];
|
||||
+ MemReentrancyGuard mem_reentrancy_guard;
|
||||
};
|
||||
|
||||
struct AHCIPCIState {
|
||||
diff --git a/hw/ide/core.c b/hw/ide/core.c
|
||||
index 45d14a25e9..de48ff9f86 100644
|
||||
--- a/hw/ide/core.c
|
||||
+++ b/hw/ide/core.c
|
||||
@@ -513,6 +513,7 @@ BlockAIOCB *ide_issue_trim(
|
||||
BlockCompletionFunc *cb, void *cb_opaque, void *opaque)
|
||||
{
|
||||
IDEState *s = opaque;
|
||||
+ IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master;
|
||||
TrimAIOCB *iocb;
|
||||
|
||||
/* Paired with a decrement in ide_trim_bh_cb() */
|
||||
@@ -520,7 +521,8 @@ BlockAIOCB *ide_issue_trim(
|
||||
|
||||
iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque);
|
||||
iocb->s = s;
|
||||
- iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
|
||||
+ iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
iocb->ret = 0;
|
||||
iocb->qiov = qiov;
|
||||
iocb->i = -1;
|
||||
diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c
|
||||
index 632c03779c..082c6980ad 100644
|
||||
--- a/hw/misc/imx_rngc.c
|
||||
+++ b/hw/misc/imx_rngc.c
|
||||
@@ -228,8 +228,10 @@ static void imx_rngc_realize(DeviceState *dev, Error **errp)
|
||||
sysbus_init_mmio(sbd, &s->iomem);
|
||||
|
||||
sysbus_init_irq(sbd, &s->irq);
|
||||
- s->self_test_bh = qemu_bh_new(imx_rngc_self_test, s);
|
||||
- s->seed_bh = qemu_bh_new(imx_rngc_seed, s);
|
||||
+ s->self_test_bh = qemu_bh_new_guarded(imx_rngc_self_test, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
+ s->seed_bh = qemu_bh_new_guarded(imx_rngc_seed, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static void imx_rngc_reset(DeviceState *dev)
|
||||
diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c
|
||||
index 43bb1f56ba..80a789f32b 100644
|
||||
--- a/hw/misc/macio/mac_dbdma.c
|
||||
+++ b/hw/misc/macio/mac_dbdma.c
|
||||
@@ -914,7 +914,7 @@ static void mac_dbdma_realize(DeviceState *dev, Error **errp)
|
||||
{
|
||||
DBDMAState *s = MAC_DBDMA(dev);
|
||||
|
||||
- s->bh = qemu_bh_new(DBDMA_run_bh, s);
|
||||
+ s->bh = qemu_bh_new_guarded(DBDMA_run_bh, s, &dev->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static void mac_dbdma_class_init(ObjectClass *oc, void *data)
|
||||
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
|
||||
index 53e1c32643..447f669921 100644
|
||||
--- a/hw/net/virtio-net.c
|
||||
+++ b/hw/net/virtio-net.c
|
||||
@@ -2917,7 +2917,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index)
|
||||
n->vqs[index].tx_vq =
|
||||
virtio_add_queue(vdev, n->net_conf.tx_queue_size,
|
||||
virtio_net_handle_tx_bh);
|
||||
- n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
|
||||
+ n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
|
||||
+ &DEVICE(vdev)->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
n->vqs[index].tx_waiting = 0;
|
||||
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
|
||||
index ac24eeb5ed..e5a468975e 100644
|
||||
--- a/hw/nvme/ctrl.c
|
||||
+++ b/hw/nvme/ctrl.c
|
||||
@@ -4607,7 +4607,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
|
||||
QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry);
|
||||
}
|
||||
|
||||
- sq->bh = qemu_bh_new(nvme_process_sq, sq);
|
||||
+ sq->bh = qemu_bh_new_guarded(nvme_process_sq, sq,
|
||||
+ &DEVICE(sq->ctrl)->mem_reentrancy_guard);
|
||||
|
||||
if (n->dbbuf_enabled) {
|
||||
sq->db_addr = n->dbbuf_dbs + (sqid << 3);
|
||||
@@ -5253,7 +5254,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
|
||||
}
|
||||
}
|
||||
n->cq[cqid] = cq;
|
||||
- cq->bh = qemu_bh_new(nvme_post_cqes, cq);
|
||||
+ cq->bh = qemu_bh_new_guarded(nvme_post_cqes, cq,
|
||||
+ &DEVICE(cq->ctrl)->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
|
||||
diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c
|
||||
index c485da792c..3de288b454 100644
|
||||
--- a/hw/scsi/mptsas.c
|
||||
+++ b/hw/scsi/mptsas.c
|
||||
@@ -1322,7 +1322,8 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp)
|
||||
}
|
||||
s->max_devices = MPTSAS_NUM_PORTS;
|
||||
|
||||
- s->request_bh = qemu_bh_new(mptsas_fetch_requests, s);
|
||||
+ s->request_bh = qemu_bh_new_guarded(mptsas_fetch_requests, s,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
|
||||
scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info);
|
||||
}
|
||||
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
|
||||
index c97176110c..3c20b47ad0 100644
|
||||
--- a/hw/scsi/scsi-bus.c
|
||||
+++ b/hw/scsi/scsi-bus.c
|
||||
@@ -193,7 +193,8 @@ static void scsi_dma_restart_cb(void *opaque, bool running, RunState state)
|
||||
AioContext *ctx = blk_get_aio_context(s->conf.blk);
|
||||
/* The reference is dropped in scsi_dma_restart_bh.*/
|
||||
object_ref(OBJECT(s));
|
||||
- s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s);
|
||||
+ s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s,
|
||||
+ &DEVICE(s)->mem_reentrancy_guard);
|
||||
qemu_bh_schedule(s->bh);
|
||||
}
|
||||
}
|
||||
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
|
||||
index fa76696855..4de34536e9 100644
|
||||
--- a/hw/scsi/vmw_pvscsi.c
|
||||
+++ b/hw/scsi/vmw_pvscsi.c
|
||||
@@ -1184,7 +1184,8 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp)
|
||||
pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET);
|
||||
}
|
||||
|
||||
- s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s);
|
||||
+ s->completion_worker = qemu_bh_new_guarded(pvscsi_process_completion_queue, s,
|
||||
+ &DEVICE(pci_dev)->mem_reentrancy_guard);
|
||||
|
||||
scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info);
|
||||
/* override default SCSI bus hotplug-handler, with pvscsi's one */
|
||||
diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
|
||||
index 88f99c05d5..f013ded91e 100644
|
||||
--- a/hw/usb/dev-uas.c
|
||||
+++ b/hw/usb/dev-uas.c
|
||||
@@ -937,7 +937,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp)
|
||||
|
||||
QTAILQ_INIT(&uas->results);
|
||||
QTAILQ_INIT(&uas->requests);
|
||||
- uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas);
|
||||
+ uas->status_bh = qemu_bh_new_guarded(usb_uas_send_status_bh, uas,
|
||||
+ &d->mem_reentrancy_guard);
|
||||
|
||||
dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE);
|
||||
scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info);
|
||||
diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c
|
||||
index 8755e9cbb0..a0c4e782b2 100644
|
||||
--- a/hw/usb/hcd-dwc2.c
|
||||
+++ b/hw/usb/hcd-dwc2.c
|
||||
@@ -1364,7 +1364,8 @@ static void dwc2_realize(DeviceState *dev, Error **errp)
|
||||
s->fi = USB_FRMINTVL - 1;
|
||||
s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s);
|
||||
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s);
|
||||
- s->async_bh = qemu_bh_new(dwc2_work_bh, s);
|
||||
+ s->async_bh = qemu_bh_new_guarded(dwc2_work_bh, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
|
||||
sysbus_init_irq(sbd, &s->irq);
|
||||
}
|
||||
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
|
||||
index d4da8dcb8d..c930c60921 100644
|
||||
--- a/hw/usb/hcd-ehci.c
|
||||
+++ b/hw/usb/hcd-ehci.c
|
||||
@@ -2533,7 +2533,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp)
|
||||
}
|
||||
|
||||
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s);
|
||||
- s->async_bh = qemu_bh_new(ehci_work_bh, s);
|
||||
+ s->async_bh = qemu_bh_new_guarded(ehci_work_bh, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
s->device = dev;
|
||||
|
||||
s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s);
|
||||
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
|
||||
index 8ac1175ad2..77baaa7a6b 100644
|
||||
--- a/hw/usb/hcd-uhci.c
|
||||
+++ b/hw/usb/hcd-uhci.c
|
||||
@@ -1190,7 +1190,7 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp)
|
||||
USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL);
|
||||
}
|
||||
}
|
||||
- s->bh = qemu_bh_new(uhci_bh, s);
|
||||
+ s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard);
|
||||
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s);
|
||||
s->num_ports_vmstate = NB_PORTS;
|
||||
QTAILQ_INIT(&s->queues);
|
||||
diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c
|
||||
index 176868d345..f500db85ab 100644
|
||||
--- a/hw/usb/host-libusb.c
|
||||
+++ b/hw/usb/host-libusb.c
|
||||
@@ -1141,7 +1141,8 @@ static void usb_host_nodev_bh(void *opaque)
|
||||
static void usb_host_nodev(USBHostDevice *s)
|
||||
{
|
||||
if (!s->bh_nodev) {
|
||||
- s->bh_nodev = qemu_bh_new(usb_host_nodev_bh, s);
|
||||
+ s->bh_nodev = qemu_bh_new_guarded(usb_host_nodev_bh, s,
|
||||
+ &DEVICE(s)->mem_reentrancy_guard);
|
||||
}
|
||||
qemu_bh_schedule(s->bh_nodev);
|
||||
}
|
||||
@@ -1739,7 +1740,8 @@ static int usb_host_post_load(void *opaque, int version_id)
|
||||
USBHostDevice *dev = opaque;
|
||||
|
||||
if (!dev->bh_postld) {
|
||||
- dev->bh_postld = qemu_bh_new(usb_host_post_load_bh, dev);
|
||||
+ dev->bh_postld = qemu_bh_new_guarded(usb_host_post_load_bh, dev,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
}
|
||||
qemu_bh_schedule(dev->bh_postld);
|
||||
dev->bh_postld_pending = true;
|
||||
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
|
||||
index fd7df599bc..39fbaaab16 100644
|
||||
--- a/hw/usb/redirect.c
|
||||
+++ b/hw/usb/redirect.c
|
||||
@@ -1441,8 +1441,10 @@ static void usbredir_realize(USBDevice *udev, Error **errp)
|
||||
}
|
||||
}
|
||||
|
||||
- dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev);
|
||||
- dev->device_reject_bh = qemu_bh_new(usbredir_device_reject_bh, dev);
|
||||
+ dev->chardev_close_bh = qemu_bh_new_guarded(usbredir_chardev_close_bh, dev,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
+ dev->device_reject_bh = qemu_bh_new_guarded(usbredir_device_reject_bh, dev,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
dev->attach_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, usbredir_do_attach, dev);
|
||||
|
||||
packet_id_queue_init(&dev->cancelled, dev, "cancelled");
|
||||
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
|
||||
index 66cb3f7c24..38ee660a30 100644
|
||||
--- a/hw/usb/xen-usb.c
|
||||
+++ b/hw/usb/xen-usb.c
|
||||
@@ -1032,7 +1032,8 @@ static void usbback_alloc(struct XenLegacyDevice *xendev)
|
||||
|
||||
QTAILQ_INIT(&usbif->req_free_q);
|
||||
QSIMPLEQ_INIT(&usbif->hotplug_q);
|
||||
- usbif->bh = qemu_bh_new(usbback_bh, usbif);
|
||||
+ usbif->bh = qemu_bh_new_guarded(usbback_bh, usbif,
|
||||
+ &DEVICE(xendev)->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static int usbback_free(struct XenLegacyDevice *xendev)
|
||||
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
|
||||
index 43092aa634..5186e831dd 100644
|
||||
--- a/hw/virtio/virtio-balloon.c
|
||||
+++ b/hw/virtio/virtio-balloon.c
|
||||
@@ -909,8 +909,9 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
|
||||
precopy_add_notifier(&s->free_page_hint_notify);
|
||||
|
||||
object_ref(OBJECT(s->iothread));
|
||||
- s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
|
||||
- virtio_ballloon_get_free_page_hints, s);
|
||||
+ s->free_page_bh = aio_bh_new_guarded(iothread_get_aio_context(s->iothread),
|
||||
+ virtio_ballloon_get_free_page_hints, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) {
|
||||
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
|
||||
index 802e1b9659..2fe804510f 100644
|
||||
--- a/hw/virtio/virtio-crypto.c
|
||||
+++ b/hw/virtio/virtio-crypto.c
|
||||
@@ -1074,7 +1074,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp)
|
||||
vcrypto->vqs[i].dataq =
|
||||
virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh);
|
||||
vcrypto->vqs[i].dataq_bh =
|
||||
- qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]);
|
||||
+ qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i],
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
vcrypto->vqs[i].vcrypto = vcrypto;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,141 +0,0 @@
|
||||
From 8075a9e05699ef0c4e078017eefc20db3186328f Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Mon, 29 May 2023 14:21:08 -0400
|
||||
Subject: [PATCH 17/21] hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI
|
||||
controller (CVE-2023-0330)
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [13/13] 0b6fa742075ef2db3a354ee672dccca3747051cc (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit b987718bbb1d0eabf95499b976212dd5f0120d75
|
||||
Author: Thomas Huth <thuth@redhat.com>
|
||||
Date: Mon May 22 11:10:11 2023 +0200
|
||||
|
||||
hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI controller (CVE-2023-0330)
|
||||
|
||||
We cannot use the generic reentrancy guard in the LSI code, so
|
||||
we have to manually prevent endless reentrancy here. The problematic
|
||||
lsi_execute_script() function has already a way to detect whether
|
||||
too many instructions have been executed - we just have to slightly
|
||||
change the logic here that it also takes into account if the function
|
||||
has been called too often in a reentrant way.
|
||||
|
||||
The code in fuzz-lsi53c895a-test.c has been taken from an earlier
|
||||
patch by Mauro Matteo Cascella.
|
||||
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1563
|
||||
Message-Id: <20230522091011.1082574-1-thuth@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/scsi/lsi53c895a.c | 23 +++++++++++++++------
|
||||
tests/qtest/fuzz-lsi53c895a-test.c | 33 ++++++++++++++++++++++++++++++
|
||||
2 files changed, 50 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
|
||||
index 048436352b..f7d45b0b20 100644
|
||||
--- a/hw/scsi/lsi53c895a.c
|
||||
+++ b/hw/scsi/lsi53c895a.c
|
||||
@@ -1134,15 +1134,24 @@ static void lsi_execute_script(LSIState *s)
|
||||
uint32_t addr, addr_high;
|
||||
int opcode;
|
||||
int insn_processed = 0;
|
||||
+ static int reentrancy_level;
|
||||
+
|
||||
+ reentrancy_level++;
|
||||
|
||||
s->istat1 |= LSI_ISTAT1_SRUN;
|
||||
again:
|
||||
- if (++insn_processed > LSI_MAX_INSN) {
|
||||
- /* Some windows drivers make the device spin waiting for a memory
|
||||
- location to change. If we have been executed a lot of code then
|
||||
- assume this is the case and force an unexpected device disconnect.
|
||||
- This is apparently sufficient to beat the drivers into submission.
|
||||
- */
|
||||
+ /*
|
||||
+ * Some windows drivers make the device spin waiting for a memory location
|
||||
+ * to change. If we have executed more than LSI_MAX_INSN instructions then
|
||||
+ * assume this is the case and force an unexpected device disconnect. This
|
||||
+ * is apparently sufficient to beat the drivers into submission.
|
||||
+ *
|
||||
+ * Another issue (CVE-2023-0330) can occur if the script is programmed to
|
||||
+ * trigger itself again and again. Avoid this problem by stopping after
|
||||
+ * being called multiple times in a reentrant way (8 is an arbitrary value
|
||||
+ * which should be enough for all valid use cases).
|
||||
+ */
|
||||
+ if (++insn_processed > LSI_MAX_INSN || reentrancy_level > 8) {
|
||||
if (!(s->sien0 & LSI_SIST0_UDC)) {
|
||||
qemu_log_mask(LOG_GUEST_ERROR,
|
||||
"lsi_scsi: inf. loop with UDC masked");
|
||||
@@ -1596,6 +1605,8 @@ again:
|
||||
}
|
||||
}
|
||||
trace_lsi_execute_script_stop();
|
||||
+
|
||||
+ reentrancy_level--;
|
||||
}
|
||||
|
||||
static uint8_t lsi_reg_readb(LSIState *s, int offset)
|
||||
diff --git a/tests/qtest/fuzz-lsi53c895a-test.c b/tests/qtest/fuzz-lsi53c895a-test.c
|
||||
index 2012bd54b7..1b55928b9f 100644
|
||||
--- a/tests/qtest/fuzz-lsi53c895a-test.c
|
||||
+++ b/tests/qtest/fuzz-lsi53c895a-test.c
|
||||
@@ -8,6 +8,36 @@
|
||||
#include "qemu/osdep.h"
|
||||
#include "libqtest.h"
|
||||
|
||||
+/*
|
||||
+ * This used to trigger a DMA reentrancy issue
|
||||
+ * leading to memory corruption bugs like stack
|
||||
+ * overflow or use-after-free
|
||||
+ * https://gitlab.com/qemu-project/qemu/-/issues/1563
|
||||
+ */
|
||||
+static void test_lsi_dma_reentrancy(void)
|
||||
+{
|
||||
+ QTestState *s;
|
||||
+
|
||||
+ s = qtest_init("-M q35 -m 512M -nodefaults "
|
||||
+ "-blockdev driver=null-co,node-name=null0 "
|
||||
+ "-device lsi53c810 -device scsi-cd,drive=null0");
|
||||
+
|
||||
+ qtest_outl(s, 0xcf8, 0x80000804); /* PCI Command Register */
|
||||
+ qtest_outw(s, 0xcfc, 0x7); /* Enables accesses */
|
||||
+ qtest_outl(s, 0xcf8, 0x80000814); /* Memory Bar 1 */
|
||||
+ qtest_outl(s, 0xcfc, 0xff100000); /* Set MMIO Address*/
|
||||
+ qtest_outl(s, 0xcf8, 0x80000818); /* Memory Bar 2 */
|
||||
+ qtest_outl(s, 0xcfc, 0xff000000); /* Set RAM Address*/
|
||||
+ qtest_writel(s, 0xff000000, 0xc0000024);
|
||||
+ qtest_writel(s, 0xff000114, 0x00000080);
|
||||
+ qtest_writel(s, 0xff00012c, 0xff000000);
|
||||
+ qtest_writel(s, 0xff000004, 0xff000114);
|
||||
+ qtest_writel(s, 0xff000008, 0xff100014);
|
||||
+ qtest_writel(s, 0xff10002f, 0x000000ff);
|
||||
+
|
||||
+ qtest_quit(s);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* This used to trigger a UAF in lsi_do_msgout()
|
||||
* https://gitlab.com/qemu-project/qemu/-/issues/972
|
||||
@@ -124,5 +154,8 @@ int main(int argc, char **argv)
|
||||
qtest_add_func("fuzz/lsi53c895a/lsi_do_msgout_cancel_req",
|
||||
test_lsi_do_msgout_cancel_req);
|
||||
|
||||
+ qtest_add_func("fuzz/lsi53c895a/lsi_dma_reentrancy",
|
||||
+ test_lsi_dma_reentrancy);
|
||||
+
|
||||
return g_test_run();
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,73 @@
|
||||
From 8f27893a37e55a31180bb66cd9eae7199911881b Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Volker=20R=C3=BCmelin?= <vr_qemu@t-online.de>
|
||||
Date: Fri, 29 Dec 2023 21:38:54 +0100
|
||||
Subject: [PATCH 060/101] hw/vfio: fix iteration over global VFIODevice list
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [59/67] f926e1233c8c5ad418e8794b1a103371c9dc5eb0 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Commit 3d779abafe ("vfio/common: Introduce a global VFIODevice list")
|
||||
introduced a global VFIODevice list, but forgot to update the list
|
||||
element field name when iterating over the new list. Change the code
|
||||
to use the correct list element field.
|
||||
|
||||
Fixes: 3d779abafe ("vfio/common: Introduce a global VFIODevice list")
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2061
|
||||
Signed-off-by: Volker Rümelin <vr_qemu@t-online.de>
|
||||
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
(cherry picked from commit 9353b6da430f90e47f352dbf6dc31120c8914da6)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/vfio/common.c | 8 ++++----
|
||||
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||
index 0d4d8b8416..0b3352f2a9 100644
|
||||
--- a/hw/vfio/common.c
|
||||
+++ b/hw/vfio/common.c
|
||||
@@ -73,7 +73,7 @@ bool vfio_mig_active(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
|
||||
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
|
||||
if (vbasedev->migration_blocker) {
|
||||
return false;
|
||||
}
|
||||
@@ -94,7 +94,7 @@ static bool vfio_multiple_devices_migration_is_supported(void)
|
||||
unsigned int device_num = 0;
|
||||
bool all_support_p2p = true;
|
||||
|
||||
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
|
||||
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
|
||||
if (vbasedev->migration) {
|
||||
device_num++;
|
||||
|
||||
@@ -1366,13 +1366,13 @@ void vfio_reset_handler(void *opaque)
|
||||
{
|
||||
VFIODevice *vbasedev;
|
||||
|
||||
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
|
||||
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
|
||||
if (vbasedev->dev->realized) {
|
||||
vbasedev->ops->vfio_compute_needs_reset(vbasedev);
|
||||
}
|
||||
}
|
||||
|
||||
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
|
||||
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
|
||||
if (vbasedev->dev->realized && vbasedev->needs_reset) {
|
||||
vbasedev->ops->vfio_hot_reset_multi(vbasedev);
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,76 +0,0 @@
|
||||
From fcd6219a95851d17fd8bde69d87e78c6533be990 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Wed, 12 Jul 2023 17:46:57 +0200
|
||||
Subject: [PATCH 24/37] hw/vfio/pci-quirks: Sanitize capability pointer
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 179: vfio: live migration support
|
||||
RH-Bugzilla: 2192818
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [22/28] cb080409c1912f4365f8e31cd23c914b48f91575 (clegoate/qemu-kvm-c9s)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/2192818
|
||||
|
||||
commit 0ddcb39c9357
|
||||
Author: Alex Williamson <alex.williamson@redhat.com>
|
||||
Date: Fri Jun 30 16:36:08 2023 -0600
|
||||
|
||||
hw/vfio/pci-quirks: Sanitize capability pointer
|
||||
|
||||
Coverity reports a tained scalar when traversing the capabilities
|
||||
chain (CID 1516589). In practice I've never seen a device with a
|
||||
chain so broken as to cause an issue, but it's also pretty easy to
|
||||
sanitize.
|
||||
|
||||
Fixes: f6b30c1984f7 ("hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques")
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
hw/vfio/pci-quirks.c | 10 ++++++++--
|
||||
1 file changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
|
||||
index 0ed2fcd531..f4ff836805 100644
|
||||
--- a/hw/vfio/pci-quirks.c
|
||||
+++ b/hw/vfio/pci-quirks.c
|
||||
@@ -1530,6 +1530,12 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = {
|
||||
.set = set_nv_gpudirect_clique_id,
|
||||
};
|
||||
|
||||
+static bool is_valid_std_cap_offset(uint8_t pos)
|
||||
+{
|
||||
+ return (pos >= PCI_STD_HEADER_SIZEOF &&
|
||||
+ pos <= (PCI_CFG_SPACE_SIZE - PCI_CAP_SIZEOF));
|
||||
+}
|
||||
+
|
||||
static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
{
|
||||
PCIDevice *pdev = &vdev->pdev;
|
||||
@@ -1563,7 +1569,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
*/
|
||||
ret = pread(vdev->vbasedev.fd, &tmp, 1,
|
||||
vdev->config_offset + PCI_CAPABILITY_LIST);
|
||||
- if (ret != 1 || !tmp) {
|
||||
+ if (ret != 1 || !is_valid_std_cap_offset(tmp)) {
|
||||
error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list");
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -1575,7 +1581,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
d4_conflict = true;
|
||||
}
|
||||
tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT];
|
||||
- } while (tmp);
|
||||
+ } while (is_valid_std_cap_offset(tmp));
|
||||
|
||||
if (!c8_conflict) {
|
||||
pos = 0xC8;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,110 +0,0 @@
|
||||
From dd38230a0a375fb8427fa106ff79562e56c51b6c Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Wed, 12 Jul 2023 17:46:57 +0200
|
||||
Subject: [PATCH 18/37] hw/vfio/pci-quirks: Support alternate offset for
|
||||
GPUDirect Cliques
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 179: vfio: live migration support
|
||||
RH-Bugzilla: 2192818
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [16/28] 9befb7c9adaeb58e9d0b49686cf54b751c742832 (clegoate/qemu-kvm-c9s)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/2192818
|
||||
|
||||
commit f6b30c1984f7
|
||||
Author: Alex Williamson <alex.williamson@redhat.com>
|
||||
Date: Thu Jun 8 12:05:07 2023 -0600
|
||||
|
||||
hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques
|
||||
|
||||
NVIDIA Turing and newer GPUs implement the MSI-X capability at the offset
|
||||
previously reserved for use by hypervisors to implement the GPUDirect
|
||||
Cliques capability. A revised specification provides an alternate
|
||||
location. Add a config space walk to the quirk to check for conflicts,
|
||||
allowing us to fall back to the new location or generate an error at the
|
||||
quirk setup rather than when the real conflicting capability is added
|
||||
should there be no available location.
|
||||
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
hw/vfio/pci-quirks.c | 41 ++++++++++++++++++++++++++++++++++++++++-
|
||||
1 file changed, 40 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
|
||||
index f0147a050a..0ed2fcd531 100644
|
||||
--- a/hw/vfio/pci-quirks.c
|
||||
+++ b/hw/vfio/pci-quirks.c
|
||||
@@ -1490,6 +1490,9 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev)
|
||||
* +---------------------------------+---------------------------------+
|
||||
*
|
||||
* https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf
|
||||
+ *
|
||||
+ * Specification for Turning and later GPU architectures:
|
||||
+ * https://lists.gnu.org/archive/html/qemu-devel/2023-06/pdf142OR4O4c2.pdf
|
||||
*/
|
||||
static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v,
|
||||
const char *name, void *opaque,
|
||||
@@ -1530,7 +1533,9 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = {
|
||||
static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
{
|
||||
PCIDevice *pdev = &vdev->pdev;
|
||||
- int ret, pos = 0xC8;
|
||||
+ int ret, pos;
|
||||
+ bool c8_conflict = false, d4_conflict = false;
|
||||
+ uint8_t tmp;
|
||||
|
||||
if (vdev->nv_gpudirect_clique == 0xFF) {
|
||||
return 0;
|
||||
@@ -1547,6 +1552,40 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * Per the updated specification above, it's recommended to use offset
|
||||
+ * D4h for Turing and later GPU architectures due to a conflict of the
|
||||
+ * MSI-X capability at C8h. We don't know how to determine the GPU
|
||||
+ * architecture, instead we walk the capability chain to mark conflicts
|
||||
+ * and choose one or error based on the result.
|
||||
+ *
|
||||
+ * NB. Cap list head in pdev->config is already cleared, read from device.
|
||||
+ */
|
||||
+ ret = pread(vdev->vbasedev.fd, &tmp, 1,
|
||||
+ vdev->config_offset + PCI_CAPABILITY_LIST);
|
||||
+ if (ret != 1 || !tmp) {
|
||||
+ error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ do {
|
||||
+ if (tmp == 0xC8) {
|
||||
+ c8_conflict = true;
|
||||
+ } else if (tmp == 0xD4) {
|
||||
+ d4_conflict = true;
|
||||
+ }
|
||||
+ tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT];
|
||||
+ } while (tmp);
|
||||
+
|
||||
+ if (!c8_conflict) {
|
||||
+ pos = 0xC8;
|
||||
+ } else if (!d4_conflict) {
|
||||
+ pos = 0xD4;
|
||||
+ } else {
|
||||
+ error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid config space");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp);
|
||||
if (ret < 0) {
|
||||
error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: ");
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,62 +0,0 @@
|
||||
From 0a731ac1191182546e80af5f39d178a5a2f3688f Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Mon, 17 Jul 2023 18:21:26 +0200
|
||||
Subject: [PATCH 07/14] hw/virtio-iommu: Fix potential OOB access in
|
||||
virtio_iommu_handle_command()
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes
|
||||
RH-Bugzilla: 2229133
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
RH-Commit: [1/3] ecdb1e1aa6b93761dc87ea79bc0a1093ad649a74 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133
|
||||
|
||||
In the virtio_iommu_handle_command() when a PROBE request is handled,
|
||||
output_size takes a value greater than the tail size and on a subsequent
|
||||
iteration we can get a stack out-of-band access. Initialize the
|
||||
output_size on each iteration.
|
||||
|
||||
The issue was found with ASAN. Credits to:
|
||||
Yiming Tao(Zhejiang University)
|
||||
Gaoning Pan(Zhejiang University)
|
||||
|
||||
Fixes: 1733eebb9e7 ("virtio-iommu: Implement RESV_MEM probe request")
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reported-by: Mauro Matteo Cascella <mcascell@redhat.com>
|
||||
Cc: qemu-stable@nongnu.org
|
||||
|
||||
Message-Id: <20230717162126.11693-1-eric.auger@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit cf2f89edf36a59183166ae8721a8d7ab5cd286bd)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/virtio/virtio-iommu.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
|
||||
index 421e2a944f..17ce630200 100644
|
||||
--- a/hw/virtio/virtio-iommu.c
|
||||
+++ b/hw/virtio/virtio-iommu.c
|
||||
@@ -728,13 +728,15 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
|
||||
VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
|
||||
struct virtio_iommu_req_head head;
|
||||
struct virtio_iommu_req_tail tail = {};
|
||||
- size_t output_size = sizeof(tail), sz;
|
||||
VirtQueueElement *elem;
|
||||
unsigned int iov_cnt;
|
||||
struct iovec *iov;
|
||||
void *buf = NULL;
|
||||
+ size_t sz;
|
||||
|
||||
for (;;) {
|
||||
+ size_t output_size = sizeof(tail);
|
||||
+
|
||||
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
|
||||
if (!elem) {
|
||||
return;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,52 +0,0 @@
|
||||
From f9d982fae156aa9db0506e1e098c1e8a7f7eec94 Mon Sep 17 00:00:00 2001
|
||||
From: Bandan Das <bsd@redhat.com>
|
||||
Date: Thu, 3 Aug 2023 14:29:15 -0400
|
||||
Subject: [PATCH 13/14] i386/cpu: Update how the EBX register of CPUID
|
||||
0x8000001F is set
|
||||
|
||||
RH-Author: Bandan Das <None>
|
||||
RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter
|
||||
RH-Bugzilla: 2214839
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [4/4] efc368b2c844fd4fbc3c755a5e2da288329e7a2c (bdas1/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839
|
||||
|
||||
commit fb6bbafc0f19385fb257ee073ed13dcaf613f2f8
|
||||
Author: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Date: Fri Sep 30 10:14:30 2022 -0500
|
||||
|
||||
i386/cpu: Update how the EBX register of CPUID 0x8000001F is set
|
||||
|
||||
Update the setting of CPUID 0x8000001F EBX to clearly document the ranges
|
||||
associated with fields being set.
|
||||
|
||||
Fixes: 6cb8f2a663 ("cpu/i386: populate CPUID 0x8000_001F when SEV is active")
|
||||
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Message-Id: <5822fd7d02b575121380e1f493a8f6d9eba2b11a.1664550870.git.thomas.lendacky@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Bandan Das <bsd@redhat.com>
|
||||
---
|
||||
target/i386/cpu.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
|
||||
index 839706b430..4ac3046313 100644
|
||||
--- a/target/i386/cpu.c
|
||||
+++ b/target/i386/cpu.c
|
||||
@@ -6008,8 +6008,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
|
||||
if (sev_enabled()) {
|
||||
*eax = 0x2;
|
||||
*eax |= sev_es_enabled() ? 0x8 : 0;
|
||||
- *ebx = sev_get_cbit_position();
|
||||
- *ebx |= sev_get_reduced_phys_bits() << 6;
|
||||
+ *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */
|
||||
+ *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */
|
||||
}
|
||||
break;
|
||||
default:
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,77 +0,0 @@
|
||||
From 5c0d254762caaffd574bd95dbfc1df416e6e2509 Mon Sep 17 00:00:00 2001
|
||||
From: Bandan Das <bsd@redhat.com>
|
||||
Date: Thu, 3 Aug 2023 14:22:55 -0400
|
||||
Subject: [PATCH 12/14] i386/sev: Update checks and information related to
|
||||
reduced-phys-bits
|
||||
|
||||
RH-Author: Bandan Das <None>
|
||||
RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter
|
||||
RH-Bugzilla: 2214839
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [3/4] 7c5e7ea9f6cd39e84e5b60417c849430296399fd (bdas1/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839
|
||||
|
||||
commit 8168fed9f84e3128f7628969ae78af49433d5ce7
|
||||
Author: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Date: Fri Sep 30 10:14:29 2022 -0500
|
||||
|
||||
i386/sev: Update checks and information related to reduced-phys-bits
|
||||
|
||||
The value of the reduced-phys-bits parameter is propogated to the CPUID
|
||||
information exposed to the guest. Update the current validation check to
|
||||
account for the size of the CPUID field (6-bits), ensuring the value is
|
||||
in the range of 1 to 63.
|
||||
|
||||
Maintain backward compatibility, to an extent, by allowing a value greater
|
||||
than 1 (so that the previously documented value of 5 still works), but not
|
||||
allowing anything over 63.
|
||||
|
||||
Fixes: d8575c6c02 ("sev/i386: add command to initialize the memory encryption context")
|
||||
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Message-Id: <cca5341a95ac73f904e6300f10b04f9c62e4e8ff.1664550870.git.thomas.lendacky@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Bandan Das <bsd@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 17 ++++++++++++++---
|
||||
1 file changed, 14 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 859e06f6ad..fe2144c038 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -932,15 +932,26 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL);
|
||||
host_cbitpos = ebx & 0x3f;
|
||||
|
||||
+ /*
|
||||
+ * The cbitpos value will be placed in bit positions 5:0 of the EBX
|
||||
+ * register of CPUID 0x8000001F. No need to verify the range as the
|
||||
+ * comparison against the host value accomplishes that.
|
||||
+ */
|
||||
if (host_cbitpos != sev->cbitpos) {
|
||||
error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'",
|
||||
__func__, host_cbitpos, sev->cbitpos);
|
||||
goto err;
|
||||
}
|
||||
|
||||
- if (sev->reduced_phys_bits < 1) {
|
||||
- error_setg(errp, "%s: reduced_phys_bits check failed, it should be >=1,"
|
||||
- " requested '%d'", __func__, sev->reduced_phys_bits);
|
||||
+ /*
|
||||
+ * The reduced-phys-bits value will be placed in bit positions 11:6 of
|
||||
+ * the EBX register of CPUID 0x8000001F, so verify the supplied value
|
||||
+ * is in the range of 1 to 63.
|
||||
+ */
|
||||
+ if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) {
|
||||
+ error_setg(errp, "%s: reduced_phys_bits check failed,"
|
||||
+ " it should be in the range of 1 to 63, requested '%d'",
|
||||
+ __func__, sev->reduced_phys_bits);
|
||||
goto err;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,54 @@
|
||||
From 51b8f29cddb73eb02f91af5f52a205fdd3af6583 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Wed, 17 Jan 2024 21:08:59 +0100
|
||||
Subject: [PATCH 099/101] include/ui/rect.h: fix qemu_rect_init()
|
||||
mis-assignment
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Thomas Huth <thuth@redhat.com>
|
||||
RH-MergeRequest: 216: Fix regression in QEMU's virtio-gpu VNC sessions
|
||||
RH-Jira: RHEL-21570
|
||||
RH-Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Commit: [1/1] a9d487be04e2c1847b80c479b5cc790af81e3428 (thuth/qemu-kvm-cs9)
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-21570
|
||||
|
||||
commit 9d5b42beb6978dc6219d5dc029c9d453c6b8d503
|
||||
Author: Elen Avan <elen.avan@bk.ru>
|
||||
Date: Fri Dec 22 22:17:21 2023 +0300
|
||||
|
||||
include/ui/rect.h: fix qemu_rect_init() mis-assignment
|
||||
|
||||
Signed-off-by: Elen Avan <elen.avan@bk.ru>
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2051
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2050
|
||||
Fixes: a200d53b1fde "virtio-gpu: replace PIXMAN for region/rect test"
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Reviewed-by: Michael Tokarev <mjt@tls.msk.ru>
|
||||
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
|
||||
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
include/ui/rect.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/ui/rect.h b/include/ui/rect.h
|
||||
index 94898f92d0..68f05d78a8 100644
|
||||
--- a/include/ui/rect.h
|
||||
+++ b/include/ui/rect.h
|
||||
@@ -19,7 +19,7 @@ static inline void qemu_rect_init(QemuRect *rect,
|
||||
uint16_t width, uint16_t height)
|
||||
{
|
||||
rect->x = x;
|
||||
- rect->y = x;
|
||||
+ rect->y = y;
|
||||
rect->width = width;
|
||||
rect->height = height;
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,144 +0,0 @@
|
||||
From 399bfc04fb8352af6d2f4c984e68c334d2043368 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Wed, 17 May 2023 17:28:34 +0200
|
||||
Subject: [PATCH 04/21] iotests: Test commit with iothreads and ongoing I/O
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 166: block/graph-lock: Disable locking for now
|
||||
RH-Bugzilla: 2186725
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [4/4] 1e42fde5951ae12bddc4eea2320f066f7079878f (kmwolf/centos-qemu-kvm)
|
||||
|
||||
This tests exercises graph locking, draining, and graph modifications
|
||||
with AioContext switches a lot. Amongst others, it serves as a
|
||||
regression test for bdrv_graph_wrlock() deadlocking because it is called
|
||||
with a locked AioContext and for AioContext handling in the NBD server.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20230517152834.277483-4-kwolf@redhat.com>
|
||||
Tested-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 95fdd8db61848d31fde1d9b32da7f3f76babfa25)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/iotests.py | 4 ++
|
||||
.../qemu-iotests/tests/graph-changes-while-io | 56 +++++++++++++++++--
|
||||
.../tests/graph-changes-while-io.out | 4 +-
|
||||
3 files changed, 58 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
|
||||
index 3e82c634cf..7073579a7d 100644
|
||||
--- a/tests/qemu-iotests/iotests.py
|
||||
+++ b/tests/qemu-iotests/iotests.py
|
||||
@@ -462,6 +462,10 @@ def qmp(self, cmd: str, args: Optional[Dict[str, object]] = None) \
|
||||
assert self._qmp is not None
|
||||
return self._qmp.cmd(cmd, args)
|
||||
|
||||
+ def get_qmp(self) -> QEMUMonitorProtocol:
|
||||
+ assert self._qmp is not None
|
||||
+ return self._qmp
|
||||
+
|
||||
def stop(self, kill_signal=15):
|
||||
self._p.send_signal(kill_signal)
|
||||
self._p.wait()
|
||||
diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io
|
||||
index 7664f33689..750e7d4d38 100755
|
||||
--- a/tests/qemu-iotests/tests/graph-changes-while-io
|
||||
+++ b/tests/qemu-iotests/tests/graph-changes-while-io
|
||||
@@ -22,19 +22,19 @@
|
||||
import os
|
||||
from threading import Thread
|
||||
import iotests
|
||||
-from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \
|
||||
- QemuStorageDaemon
|
||||
+from iotests import imgfmt, qemu_img, qemu_img_create, qemu_io, \
|
||||
+ QMPTestCase, QemuStorageDaemon
|
||||
|
||||
|
||||
top = os.path.join(iotests.test_dir, 'top.img')
|
||||
nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock')
|
||||
|
||||
|
||||
-def do_qemu_img_bench() -> None:
|
||||
+def do_qemu_img_bench(count: int = 2000000) -> None:
|
||||
"""
|
||||
Do some I/O requests on `nbd_sock`.
|
||||
"""
|
||||
- qemu_img('bench', '-f', 'raw', '-c', '2000000',
|
||||
+ qemu_img('bench', '-f', 'raw', '-c', str(count),
|
||||
f'nbd+unix:///node0?socket={nbd_sock}')
|
||||
|
||||
|
||||
@@ -84,6 +84,54 @@ class TestGraphChangesWhileIO(QMPTestCase):
|
||||
|
||||
bench_thr.join()
|
||||
|
||||
+ def test_commit_while_io(self) -> None:
|
||||
+ # Run qemu-img bench in the background
|
||||
+ bench_thr = Thread(target=do_qemu_img_bench, args=(200000, ))
|
||||
+ bench_thr.start()
|
||||
+
|
||||
+ qemu_io('-c', 'write 0 64k', top)
|
||||
+ qemu_io('-c', 'write 128k 64k', top)
|
||||
+
|
||||
+ result = self.qsd.qmp('blockdev-add', {
|
||||
+ 'driver': imgfmt,
|
||||
+ 'node-name': 'overlay',
|
||||
+ 'backing': None,
|
||||
+ 'file': {
|
||||
+ 'driver': 'file',
|
||||
+ 'filename': top
|
||||
+ }
|
||||
+ })
|
||||
+ self.assert_qmp(result, 'return', {})
|
||||
+
|
||||
+ result = self.qsd.qmp('blockdev-snapshot', {
|
||||
+ 'node': 'node0',
|
||||
+ 'overlay': 'overlay',
|
||||
+ })
|
||||
+ self.assert_qmp(result, 'return', {})
|
||||
+
|
||||
+ # While qemu-img bench is running, repeatedly commit overlay to node0
|
||||
+ while bench_thr.is_alive():
|
||||
+ result = self.qsd.qmp('block-commit', {
|
||||
+ 'job-id': 'job0',
|
||||
+ 'device': 'overlay',
|
||||
+ })
|
||||
+ self.assert_qmp(result, 'return', {})
|
||||
+
|
||||
+ result = self.qsd.qmp('block-job-cancel', {
|
||||
+ 'device': 'job0',
|
||||
+ })
|
||||
+ self.assert_qmp(result, 'return', {})
|
||||
+
|
||||
+ cancelled = False
|
||||
+ while not cancelled:
|
||||
+ for event in self.qsd.get_qmp().get_events(wait=10.0):
|
||||
+ if event['event'] != 'JOB_STATUS_CHANGE':
|
||||
+ continue
|
||||
+ if event['data']['status'] == 'null':
|
||||
+ cancelled = True
|
||||
+
|
||||
+ bench_thr.join()
|
||||
+
|
||||
if __name__ == '__main__':
|
||||
# Format must support raw backing files
|
||||
iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'],
|
||||
diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out
|
||||
index ae1213e6f8..fbc63e62f8 100644
|
||||
--- a/tests/qemu-iotests/tests/graph-changes-while-io.out
|
||||
+++ b/tests/qemu-iotests/tests/graph-changes-while-io.out
|
||||
@@ -1,5 +1,5 @@
|
||||
-.
|
||||
+..
|
||||
----------------------------------------------------------------------
|
||||
-Ran 1 tests
|
||||
+Ran 2 tests
|
||||
|
||||
OK
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,132 +0,0 @@
|
||||
From 2c9e6892369ff99decd4030642b8dcf3875e9ebf Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Tue, 9 May 2023 15:41:33 +0200
|
||||
Subject: [PATCH 55/56] iotests: Test resizing image attached to an iothread
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize()
|
||||
RH-Bugzilla: 2185688
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [4/4] 8d31752d1e6e8c6a422d68d9cb2251fbc34b7aef (kmwolf/centos-qemu-kvm)
|
||||
|
||||
This tests that trying to resize an image with QMP block_resize doesn't
|
||||
hang or otherwise fail when the image is attached to a device running in
|
||||
an iothread.
|
||||
|
||||
This is a regression test for the recent fix that changed
|
||||
qmp_block_resize, which is a coroutine based QMP handler, to avoid
|
||||
calling no_coroutine_fns directly.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20230509134133.373408-1-kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit e113362e4cdfdcfe1d497e569527f70a0021333a)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/tests/iothreads-resize | 71 +++++++++++++++++++
|
||||
tests/qemu-iotests/tests/iothreads-resize.out | 11 +++
|
||||
2 files changed, 82 insertions(+)
|
||||
create mode 100755 tests/qemu-iotests/tests/iothreads-resize
|
||||
create mode 100644 tests/qemu-iotests/tests/iothreads-resize.out
|
||||
|
||||
diff --git a/tests/qemu-iotests/tests/iothreads-resize b/tests/qemu-iotests/tests/iothreads-resize
|
||||
new file mode 100755
|
||||
index 0000000000..36e4598c62
|
||||
--- /dev/null
|
||||
+++ b/tests/qemu-iotests/tests/iothreads-resize
|
||||
@@ -0,0 +1,71 @@
|
||||
+#!/usr/bin/env bash
|
||||
+# group: rw auto quick
|
||||
+#
|
||||
+# Test resizing an image that is attached to a separate iothread
|
||||
+#
|
||||
+# Copyright (C) 2023 Red Hat, Inc.
|
||||
+#
|
||||
+# This program is free software; you can redistribute it and/or modify
|
||||
+# it under the terms of the GNU General Public License as published by
|
||||
+# the Free Software Foundation; either version 2 of the License, or
|
||||
+# (at your option) any later version.
|
||||
+#
|
||||
+# This program is distributed in the hope that it will be useful,
|
||||
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
+# GNU General Public License for more details.
|
||||
+#
|
||||
+# You should have received a copy of the GNU General Public License
|
||||
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
+#
|
||||
+
|
||||
+# creator
|
||||
+owner=kwolf@redhat.com
|
||||
+
|
||||
+seq=`basename $0`
|
||||
+echo "QA output created by $seq"
|
||||
+
|
||||
+status=1 # failure is the default!
|
||||
+
|
||||
+_cleanup()
|
||||
+{
|
||||
+ _cleanup_test_img
|
||||
+}
|
||||
+trap "_cleanup; exit \$status" 0 1 2 3 15
|
||||
+
|
||||
+# get standard environment, filters and checks
|
||||
+cd ..
|
||||
+. ./common.rc
|
||||
+. ./common.filter
|
||||
+
|
||||
+# Resizing images is only supported by a few block drivers
|
||||
+_supported_fmt raw qcow2 qed
|
||||
+_supported_proto file
|
||||
+_require_devices virtio-scsi-pci
|
||||
+
|
||||
+size=64M
|
||||
+_make_test_img $size
|
||||
+
|
||||
+qmp() {
|
||||
+cat <<EOF
|
||||
+{"execute":"qmp_capabilities"}
|
||||
+{'execute': 'block_resize',
|
||||
+ 'arguments': {'node-name': 'img', 'size': 134217728}}
|
||||
+{"execute":"quit"}
|
||||
+EOF
|
||||
+}
|
||||
+
|
||||
+qmp | $QEMU -S -display none \
|
||||
+ -drive if=none,format=$IMGFMT,file="$TEST_IMG",node-name=img \
|
||||
+ -object iothread,id=t0 \
|
||||
+ -device virtio-scsi-pci,iothread=t0 \
|
||||
+ -device scsi-hd,drive=none0 \
|
||||
+ -qmp stdio \
|
||||
+ | _filter_qmp
|
||||
+
|
||||
+_img_info | _filter_img_info
|
||||
+
|
||||
+# success, all done
|
||||
+echo "*** done"
|
||||
+rm -f $seq.full
|
||||
+status=0
|
||||
diff --git a/tests/qemu-iotests/tests/iothreads-resize.out b/tests/qemu-iotests/tests/iothreads-resize.out
|
||||
new file mode 100644
|
||||
index 0000000000..2ca5a9d964
|
||||
--- /dev/null
|
||||
+++ b/tests/qemu-iotests/tests/iothreads-resize.out
|
||||
@@ -0,0 +1,11 @@
|
||||
+QA output created by iothreads-resize
|
||||
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
|
||||
+QMP_VERSION
|
||||
+{"return": {}}
|
||||
+{"return": {}}
|
||||
+{"return": {}}
|
||||
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
|
||||
+image: TEST_DIR/t.IMGFMT
|
||||
+file format: IMGFMT
|
||||
+virtual size: 128 MiB (134217728 bytes)
|
||||
+*** done
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,44 +0,0 @@
|
||||
From 21d1d93abd48f613c18df7dacd986693da774175 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 11 May 2023 13:03:22 +0200
|
||||
Subject: [PATCH 54/56] iotests: Use alternative CPU type that is not
|
||||
deprecated in RHEL
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize()
|
||||
RH-Bugzilla: 2185688
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [3/4] 038d4718c0ee7a17ff5e6f4af8fc04d07e452f8d (kmwolf/centos-qemu-kvm)
|
||||
|
||||
This is a downstream-only patch that is necessary because the default
|
||||
CPU in RHEL is marked as deprecated. This makes test cases fail due to
|
||||
the warning in the output:
|
||||
|
||||
qemu-system-x86_64: warning: CPU model qemu64-x86_64-cpu is deprecated -- use at least 'Nehalem' / 'Opteron_G4', or 'host' / 'max'
|
||||
|
||||
Fixes: 318178778db60b6475d1484509bee136317156d3
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/testenv.py | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py
|
||||
index 9a37ad9152..963514aab3 100644
|
||||
--- a/tests/qemu-iotests/testenv.py
|
||||
+++ b/tests/qemu-iotests/testenv.py
|
||||
@@ -244,6 +244,9 @@ def __init__(self, source_dir: str, build_dir: str,
|
||||
if self.qemu_prog.endswith(f'qemu-system-{suffix}'):
|
||||
self.qemu_options += f' -machine {machine}'
|
||||
|
||||
+ if self.qemu_prog.endswith('qemu-system-x86_64'):
|
||||
+ self.qemu_options += ' -cpu Nehalem'
|
||||
+
|
||||
# QEMU_DEFAULT_MACHINE
|
||||
self.qemu_default_machine = get_default_machine(self.qemu_prog)
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,49 @@
|
||||
From a9be663beaace1c31d75ca353e5d3bb0657a4f6c Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Thu, 18 Jan 2024 09:48:21 -0500
|
||||
Subject: [PATCH 11/22] iotests: add filter_qmp_generated_node_ids()
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter
|
||||
RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Commit: [7/17] 8dd20acc5b1e992294ed422e80897a9c221940dd (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
Add a filter function for QMP responses that contain QEMU's
|
||||
automatically generated node ids. The ids change between runs and must
|
||||
be masked in the reference output.
|
||||
|
||||
The next commit will use this new function.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20240118144823.1497953-2-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit da62b507a20510d819bcfbe8f5e573409b954006)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/iotests.py | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
|
||||
index e5c5798c71..ea48af4a7b 100644
|
||||
--- a/tests/qemu-iotests/iotests.py
|
||||
+++ b/tests/qemu-iotests/iotests.py
|
||||
@@ -651,6 +651,13 @@ def _filter(_key, value):
|
||||
def filter_generated_node_ids(msg):
|
||||
return re.sub("#block[0-9]+", "NODE_NAME", msg)
|
||||
|
||||
+def filter_qmp_generated_node_ids(qmsg):
|
||||
+ def _filter(_key, value):
|
||||
+ if is_str(value):
|
||||
+ return filter_generated_node_ids(value)
|
||||
+ return value
|
||||
+ return filter_qmp(qmsg, _filter)
|
||||
+
|
||||
def filter_img_info(output: str, filename: str,
|
||||
drop_child_info: bool = True) -> str:
|
||||
lines = []
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,49 @@
|
||||
From 453da839a7d81896d03b827a95c1991a60740dc5 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 25 Jan 2024 16:21:50 +0100
|
||||
Subject: [PATCH 21/22] iotests/iothreads-stream: Use the right TimeoutError
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter
|
||||
RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Commit: [17/17] ca5a512ccccb668089b726d7499562d1e294c828 (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
Since Python 3.11 asyncio.TimeoutError is an alias for TimeoutError, but
|
||||
in older versions it's not. We really have to catch asyncio.TimeoutError
|
||||
here, otherwise a slow test run will fail (as has happened multiple
|
||||
times on CI recently).
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-ID: <20240125152150.42389-1-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit c9c0b37ff4c11b712b21efabe8e5381d223d0295)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/tests/iothreads-stream | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/tests/qemu-iotests/tests/iothreads-stream b/tests/qemu-iotests/tests/iothreads-stream
|
||||
index 503f221f16..231195b5e8 100755
|
||||
--- a/tests/qemu-iotests/tests/iothreads-stream
|
||||
+++ b/tests/qemu-iotests/tests/iothreads-stream
|
||||
@@ -18,6 +18,7 @@
|
||||
#
|
||||
# Creator/Owner: Kevin Wolf <kwolf@redhat.com>
|
||||
|
||||
+import asyncio
|
||||
import iotests
|
||||
|
||||
iotests.script_initialize(supported_fmts=['qcow2'],
|
||||
@@ -69,6 +70,6 @@ with iotests.FilePath('disk1.img') as base1_path, \
|
||||
# The test is done once both jobs are gone
|
||||
if finished == 2:
|
||||
break
|
||||
- except TimeoutError:
|
||||
+ except asyncio.TimeoutError:
|
||||
pass
|
||||
vm.cmd('query-jobs')
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,186 +0,0 @@
|
||||
From add833b5de202d6765dda56c8773985fbe7f40a6 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Czenczek <hreitz@redhat.com>
|
||||
Date: Tue, 11 Apr 2023 19:34:18 +0200
|
||||
Subject: [PATCH 4/9] iotests/iov-padding: New test
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX
|
||||
RH-Bugzilla: 2174676
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [4/5] b32715b5c2a3e2add39c5ed6e8f71df56e0b91a0 (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
Test that even vectored IO requests with 1024 vector elements that are
|
||||
not aligned to the device's request alignment will succeed.
|
||||
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Message-Id: <20230411173418.19549-5-hreitz@redhat.com>
|
||||
(cherry picked from commit d7e1905e3f54ff9512db4c7a946a8603b62b108d)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/tests/iov-padding | 85 ++++++++++++++++++++++++
|
||||
tests/qemu-iotests/tests/iov-padding.out | 59 ++++++++++++++++
|
||||
2 files changed, 144 insertions(+)
|
||||
create mode 100755 tests/qemu-iotests/tests/iov-padding
|
||||
create mode 100644 tests/qemu-iotests/tests/iov-padding.out
|
||||
|
||||
diff --git a/tests/qemu-iotests/tests/iov-padding b/tests/qemu-iotests/tests/iov-padding
|
||||
new file mode 100755
|
||||
index 0000000000..b9604900c7
|
||||
--- /dev/null
|
||||
+++ b/tests/qemu-iotests/tests/iov-padding
|
||||
@@ -0,0 +1,85 @@
|
||||
+#!/usr/bin/env bash
|
||||
+# group: rw quick
|
||||
+#
|
||||
+# Check the interaction of request padding (to fit alignment restrictions) with
|
||||
+# vectored I/O from the guest
|
||||
+#
|
||||
+# Copyright Red Hat
|
||||
+#
|
||||
+# This program is free software; you can redistribute it and/or modify
|
||||
+# it under the terms of the GNU General Public License as published by
|
||||
+# the Free Software Foundation; either version 2 of the License, or
|
||||
+# (at your option) any later version.
|
||||
+#
|
||||
+# This program is distributed in the hope that it will be useful,
|
||||
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
+# GNU General Public License for more details.
|
||||
+#
|
||||
+# You should have received a copy of the GNU General Public License
|
||||
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
+#
|
||||
+
|
||||
+seq=$(basename $0)
|
||||
+echo "QA output created by $seq"
|
||||
+
|
||||
+status=1 # failure is the default!
|
||||
+
|
||||
+_cleanup()
|
||||
+{
|
||||
+ _cleanup_test_img
|
||||
+}
|
||||
+trap "_cleanup; exit \$status" 0 1 2 3 15
|
||||
+
|
||||
+# get standard environment, filters and checks
|
||||
+cd ..
|
||||
+. ./common.rc
|
||||
+. ./common.filter
|
||||
+
|
||||
+_supported_fmt raw
|
||||
+_supported_proto file
|
||||
+
|
||||
+_make_test_img 1M
|
||||
+
|
||||
+IMGSPEC="driver=blkdebug,align=4096,image.driver=file,image.filename=$TEST_IMG"
|
||||
+
|
||||
+# Four combinations:
|
||||
+# - Offset 4096, length 1023 * 512 + 512: Fully aligned to 4k
|
||||
+# - Offset 4096, length 1023 * 512 + 4096: Head is aligned, tail is not
|
||||
+# - Offset 512, length 1023 * 512 + 512: Neither head nor tail are aligned
|
||||
+# - Offset 512, length 1023 * 512 + 4096: Tail is aligned, head is not
|
||||
+for start_offset in 4096 512; do
|
||||
+ for last_element_length in 512 4096; do
|
||||
+ length=$((1023 * 512 + $last_element_length))
|
||||
+
|
||||
+ echo
|
||||
+ echo "== performing 1024-element vectored requests to image (offset: $start_offset; length: $length) =="
|
||||
+
|
||||
+ # Fill with data for testing
|
||||
+ $QEMU_IO -c 'write -P 1 0 1M' "$TEST_IMG" | _filter_qemu_io
|
||||
+
|
||||
+ # 1023 512-byte buffers, and then one with length $last_element_length
|
||||
+ cmd_params="-P 2 $start_offset $(yes 512 | head -n 1023 | tr '\n' ' ') $last_element_length"
|
||||
+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \
|
||||
+ -c "writev $cmd_params" \
|
||||
+ --image-opts \
|
||||
+ "$IMGSPEC" \
|
||||
+ | _filter_qemu_io
|
||||
+
|
||||
+ # Read all patterns -- read the part we just wrote with writev twice,
|
||||
+ # once "normally", and once with a readv, so we see that that works, too
|
||||
+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \
|
||||
+ -c "read -P 1 0 $start_offset" \
|
||||
+ -c "read -P 2 $start_offset $length" \
|
||||
+ -c "readv $cmd_params" \
|
||||
+ -c "read -P 1 $((start_offset + length)) $((1024 * 1024 - length - start_offset))" \
|
||||
+ --image-opts \
|
||||
+ "$IMGSPEC" \
|
||||
+ | _filter_qemu_io
|
||||
+ done
|
||||
+done
|
||||
+
|
||||
+# success, all done
|
||||
+echo "*** done"
|
||||
+rm -f $seq.full
|
||||
+status=0
|
||||
diff --git a/tests/qemu-iotests/tests/iov-padding.out b/tests/qemu-iotests/tests/iov-padding.out
|
||||
new file mode 100644
|
||||
index 0000000000..e07a91fac7
|
||||
--- /dev/null
|
||||
+++ b/tests/qemu-iotests/tests/iov-padding.out
|
||||
@@ -0,0 +1,59 @@
|
||||
+QA output created by iov-padding
|
||||
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
|
||||
+
|
||||
+== performing 1024-element vectored requests to image (offset: 4096; length: 524288) ==
|
||||
+wrote 1048576/1048576 bytes at offset 0
|
||||
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+wrote 524288/524288 bytes at offset 4096
|
||||
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 4096/4096 bytes at offset 0
|
||||
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 524288/524288 bytes at offset 4096
|
||||
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 524288/524288 bytes at offset 4096
|
||||
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 520192/520192 bytes at offset 528384
|
||||
+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+
|
||||
+== performing 1024-element vectored requests to image (offset: 4096; length: 527872) ==
|
||||
+wrote 1048576/1048576 bytes at offset 0
|
||||
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+wrote 527872/527872 bytes at offset 4096
|
||||
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 4096/4096 bytes at offset 0
|
||||
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 527872/527872 bytes at offset 4096
|
||||
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 527872/527872 bytes at offset 4096
|
||||
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 516608/516608 bytes at offset 531968
|
||||
+504.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+
|
||||
+== performing 1024-element vectored requests to image (offset: 512; length: 524288) ==
|
||||
+wrote 1048576/1048576 bytes at offset 0
|
||||
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+wrote 524288/524288 bytes at offset 512
|
||||
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 512/512 bytes at offset 0
|
||||
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 524288/524288 bytes at offset 512
|
||||
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 524288/524288 bytes at offset 512
|
||||
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 523776/523776 bytes at offset 524800
|
||||
+511.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+
|
||||
+== performing 1024-element vectored requests to image (offset: 512; length: 527872) ==
|
||||
+wrote 1048576/1048576 bytes at offset 0
|
||||
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+wrote 527872/527872 bytes at offset 512
|
||||
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 512/512 bytes at offset 0
|
||||
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 527872/527872 bytes at offset 512
|
||||
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 527872/527872 bytes at offset 512
|
||||
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 520192/520192 bytes at offset 528384
|
||||
+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+*** done
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,592 @@
|
||||
From 70efc3bbf1f7d7b1b0c2475d9ce3bb70cc9d1cc7 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Thu, 18 Jan 2024 09:48:22 -0500
|
||||
Subject: [PATCH 12/22] iotests: port 141 to Python for reliable QMP testing
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter
|
||||
RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Commit: [8/17] 0783f536508916feac4b4c39e41c22c24a2e52e7 (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
The common.qemu bash functions allow tests to interact with the QMP
|
||||
monitor of a QEMU process. I spent two days trying to update 141 when
|
||||
the order of the test output changed, but found it would still fail
|
||||
occassionally because printf() and QMP events race with synchronous QMP
|
||||
communication.
|
||||
|
||||
I gave up and ported 141 to the existing Python API for QMP tests. The
|
||||
Python API is less affected by the order in which QEMU prints output
|
||||
because it does not print all QMP traffic by default.
|
||||
|
||||
The next commit changes the order in which QMP messages are received.
|
||||
Make 141 reliable first.
|
||||
|
||||
Cc: Hanna Czenczek <hreitz@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20240118144823.1497953-3-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 9ee2dd4c22a3639c5462b3fc20df60c005c3de64)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/141 | 307 ++++++++++++++++---------------------
|
||||
tests/qemu-iotests/141.out | 200 ++++++------------------
|
||||
2 files changed, 176 insertions(+), 331 deletions(-)
|
||||
|
||||
diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141
|
||||
index a37030ee17..a7d3985a02 100755
|
||||
--- a/tests/qemu-iotests/141
|
||||
+++ b/tests/qemu-iotests/141
|
||||
@@ -1,9 +1,12 @@
|
||||
-#!/usr/bin/env bash
|
||||
+#!/usr/bin/env python3
|
||||
# group: rw auto quick
|
||||
#
|
||||
# Test case for ejecting BDSs with block jobs still running on them
|
||||
#
|
||||
-# Copyright (C) 2016 Red Hat, Inc.
|
||||
+# Originally written in bash by Hanna Czenczek, ported to Python by Stefan
|
||||
+# Hajnoczi.
|
||||
+#
|
||||
+# Copyright Red Hat
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@@ -19,177 +22,129 @@
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
-# creator
|
||||
-owner=hreitz@redhat.com
|
||||
-
|
||||
-seq="$(basename $0)"
|
||||
-echo "QA output created by $seq"
|
||||
-
|
||||
-status=1 # failure is the default!
|
||||
-
|
||||
-_cleanup()
|
||||
-{
|
||||
- _cleanup_qemu
|
||||
- _cleanup_test_img
|
||||
- for img in "$TEST_DIR"/{b,m,o}.$IMGFMT; do
|
||||
- _rm_test_img "$img"
|
||||
- done
|
||||
-}
|
||||
-trap "_cleanup; exit \$status" 0 1 2 3 15
|
||||
-
|
||||
-# get standard environment, filters and checks
|
||||
-. ./common.rc
|
||||
-. ./common.filter
|
||||
-. ./common.qemu
|
||||
-
|
||||
-# Needs backing file and backing format support
|
||||
-_supported_fmt qcow2 qed
|
||||
-_supported_proto file
|
||||
-_supported_os Linux
|
||||
-
|
||||
-
|
||||
-test_blockjob()
|
||||
-{
|
||||
- _send_qemu_cmd $QEMU_HANDLE \
|
||||
- "{'execute': 'blockdev-add',
|
||||
- 'arguments': {
|
||||
- 'node-name': 'drv0',
|
||||
- 'driver': '$IMGFMT',
|
||||
- 'file': {
|
||||
- 'driver': 'file',
|
||||
- 'filename': '$TEST_IMG'
|
||||
- }}}" \
|
||||
- 'return'
|
||||
-
|
||||
- # If "$2" is an event, we may or may not see it before the
|
||||
- # {"return": {}}. Therefore, filter the {"return": {}} out both
|
||||
- # here and in the next command. (Naturally, if we do not see it
|
||||
- # here, we will see it before the next command can be executed,
|
||||
- # so it will appear in the next _send_qemu_cmd's output.)
|
||||
- _send_qemu_cmd $QEMU_HANDLE \
|
||||
- "$1" \
|
||||
- "$2" \
|
||||
- | _filter_img_create | _filter_qmp_empty_return
|
||||
-
|
||||
- # We want this to return an error because the block job is still running
|
||||
- _send_qemu_cmd $QEMU_HANDLE \
|
||||
- "{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}" \
|
||||
- 'error' | _filter_generated_node_ids | _filter_qmp_empty_return
|
||||
-
|
||||
- _send_qemu_cmd $QEMU_HANDLE \
|
||||
- "{'execute': 'block-job-cancel',
|
||||
- 'arguments': {'device': 'job0'}}" \
|
||||
- "$3"
|
||||
-
|
||||
- _send_qemu_cmd $QEMU_HANDLE \
|
||||
- "{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}" \
|
||||
- 'return'
|
||||
-}
|
||||
-
|
||||
-
|
||||
-TEST_IMG="$TEST_DIR/b.$IMGFMT" _make_test_img 1M
|
||||
-TEST_IMG="$TEST_DIR/m.$IMGFMT" _make_test_img -b "$TEST_DIR/b.$IMGFMT" -F $IMGFMT 1M
|
||||
-_make_test_img -b "$TEST_DIR/m.$IMGFMT" 1M -F $IMGFMT
|
||||
-
|
||||
-_launch_qemu -nodefaults
|
||||
-
|
||||
-_send_qemu_cmd $QEMU_HANDLE \
|
||||
- "{'execute': 'qmp_capabilities'}" \
|
||||
- 'return'
|
||||
-
|
||||
-echo
|
||||
-echo '=== Testing drive-backup ==='
|
||||
-echo
|
||||
-
|
||||
-# drive-backup will not send BLOCK_JOB_READY by itself, and cancelling the job
|
||||
-# will consequently result in BLOCK_JOB_CANCELLED being emitted.
|
||||
-
|
||||
-test_blockjob \
|
||||
- "{'execute': 'drive-backup',
|
||||
- 'arguments': {'job-id': 'job0',
|
||||
- 'device': 'drv0',
|
||||
- 'target': '$TEST_DIR/o.$IMGFMT',
|
||||
- 'format': '$IMGFMT',
|
||||
- 'sync': 'none'}}" \
|
||||
- 'return' \
|
||||
- '"status": "null"'
|
||||
-
|
||||
-echo
|
||||
-echo '=== Testing drive-mirror ==='
|
||||
-echo
|
||||
-
|
||||
-# drive-mirror will send BLOCK_JOB_READY basically immediately, and cancelling
|
||||
-# the job will consequently result in BLOCK_JOB_COMPLETED being emitted.
|
||||
-
|
||||
-test_blockjob \
|
||||
- "{'execute': 'drive-mirror',
|
||||
- 'arguments': {'job-id': 'job0',
|
||||
- 'device': 'drv0',
|
||||
- 'target': '$TEST_DIR/o.$IMGFMT',
|
||||
- 'format': '$IMGFMT',
|
||||
- 'sync': 'none'}}" \
|
||||
- 'BLOCK_JOB_READY' \
|
||||
- '"status": "null"'
|
||||
-
|
||||
-echo
|
||||
-echo '=== Testing active block-commit ==='
|
||||
-echo
|
||||
-
|
||||
-# An active block-commit will send BLOCK_JOB_READY basically immediately, and
|
||||
-# cancelling the job will consequently result in BLOCK_JOB_COMPLETED being
|
||||
-# emitted.
|
||||
-
|
||||
-test_blockjob \
|
||||
- "{'execute': 'block-commit',
|
||||
- 'arguments': {'job-id': 'job0', 'device': 'drv0'}}" \
|
||||
- 'BLOCK_JOB_READY' \
|
||||
- '"status": "null"'
|
||||
-
|
||||
-echo
|
||||
-echo '=== Testing non-active block-commit ==='
|
||||
-echo
|
||||
-
|
||||
-# Give block-commit something to work on, otherwise it would be done
|
||||
-# immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would work just
|
||||
-# fine without the block job still running.
|
||||
-
|
||||
-$QEMU_IO -c 'write 0 1M' "$TEST_DIR/m.$IMGFMT" | _filter_qemu_io
|
||||
-
|
||||
-test_blockjob \
|
||||
- "{'execute': 'block-commit',
|
||||
- 'arguments': {'job-id': 'job0',
|
||||
- 'device': 'drv0',
|
||||
- 'top': '$TEST_DIR/m.$IMGFMT',
|
||||
- 'speed': 1}}" \
|
||||
- 'return' \
|
||||
- '"status": "null"'
|
||||
-
|
||||
-echo
|
||||
-echo '=== Testing block-stream ==='
|
||||
-echo
|
||||
-
|
||||
-# Give block-stream something to work on, otherwise it would be done
|
||||
-# immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would work just
|
||||
-# fine without the block job still running.
|
||||
-
|
||||
-$QEMU_IO -c 'write 0 1M' "$TEST_DIR/b.$IMGFMT" | _filter_qemu_io
|
||||
-
|
||||
-# With some data to stream (and @speed set to 1), block-stream will not complete
|
||||
-# until we send the block-job-cancel command.
|
||||
-
|
||||
-test_blockjob \
|
||||
- "{'execute': 'block-stream',
|
||||
- 'arguments': {'job-id': 'job0',
|
||||
- 'device': 'drv0',
|
||||
- 'speed': 1}}" \
|
||||
- 'return' \
|
||||
- '"status": "null"'
|
||||
-
|
||||
-_cleanup_qemu
|
||||
-
|
||||
-# success, all done
|
||||
-echo "*** done"
|
||||
-rm -f $seq.full
|
||||
-status=0
|
||||
+import iotests
|
||||
+
|
||||
+# Common filters to mask values that vary in the test output
|
||||
+QMP_FILTERS = [iotests.filter_qmp_testfiles, \
|
||||
+ iotests.filter_qmp_imgfmt]
|
||||
+
|
||||
+
|
||||
+class TestCase:
|
||||
+ def __init__(self, name, vm, image_path, cancel_event):
|
||||
+ self.name = name
|
||||
+ self.vm = vm
|
||||
+ self.image_path = image_path
|
||||
+ self.cancel_event = cancel_event
|
||||
+
|
||||
+ def __enter__(self):
|
||||
+ iotests.log(f'=== Testing {self.name} ===')
|
||||
+ self.vm.qmp_log('blockdev-add', \
|
||||
+ node_name='drv0', \
|
||||
+ driver=iotests.imgfmt, \
|
||||
+ file={'driver': 'file', 'filename': self.image_path}, \
|
||||
+ filters=QMP_FILTERS)
|
||||
+
|
||||
+ def __exit__(self, *exc_details):
|
||||
+ # This is expected to fail because the job still exists
|
||||
+ self.vm.qmp_log('blockdev-del', node_name='drv0', \
|
||||
+ filters=[iotests.filter_qmp_generated_node_ids])
|
||||
+
|
||||
+ self.vm.qmp_log('block-job-cancel', device='job0')
|
||||
+ event = self.vm.event_wait(self.cancel_event)
|
||||
+ iotests.log(event, filters=[iotests.filter_qmp_event])
|
||||
+
|
||||
+ # This time it succeeds
|
||||
+ self.vm.qmp_log('blockdev-del', node_name='drv0')
|
||||
+
|
||||
+ # Separate test cases in output
|
||||
+ iotests.log('')
|
||||
+
|
||||
+
|
||||
+def main() -> None:
|
||||
+ with iotests.FilePath('bottom', 'middle', 'top', 'target') as \
|
||||
+ (bottom_path, middle_path, top_path, target_path), \
|
||||
+ iotests.VM() as vm:
|
||||
+
|
||||
+ iotests.log('Creating bottom <- middle <- top backing file chain...')
|
||||
+ IMAGE_SIZE='1M'
|
||||
+ iotests.qemu_img_create('-f', iotests.imgfmt, bottom_path, IMAGE_SIZE)
|
||||
+ iotests.qemu_img_create('-f', iotests.imgfmt, \
|
||||
+ '-F', iotests.imgfmt, \
|
||||
+ '-b', bottom_path, \
|
||||
+ middle_path, \
|
||||
+ IMAGE_SIZE)
|
||||
+ iotests.qemu_img_create('-f', iotests.imgfmt, \
|
||||
+ '-F', iotests.imgfmt, \
|
||||
+ '-b', middle_path, \
|
||||
+ top_path, \
|
||||
+ IMAGE_SIZE)
|
||||
+
|
||||
+ iotests.log('Starting VM...')
|
||||
+ vm.add_args('-nodefaults')
|
||||
+ vm.launch()
|
||||
+
|
||||
+ # drive-backup will not send BLOCK_JOB_READY by itself, and cancelling
|
||||
+ # the job will consequently result in BLOCK_JOB_CANCELLED being
|
||||
+ # emitted.
|
||||
+ with TestCase('drive-backup', vm, top_path, 'BLOCK_JOB_CANCELLED'):
|
||||
+ vm.qmp_log('drive-backup', \
|
||||
+ job_id='job0', \
|
||||
+ device='drv0', \
|
||||
+ target=target_path, \
|
||||
+ format=iotests.imgfmt, \
|
||||
+ sync='none', \
|
||||
+ filters=QMP_FILTERS)
|
||||
+
|
||||
+ # drive-mirror will send BLOCK_JOB_READY basically immediately, and
|
||||
+ # cancelling the job will consequently result in BLOCK_JOB_COMPLETED
|
||||
+ # being emitted.
|
||||
+ with TestCase('drive-mirror', vm, top_path, 'BLOCK_JOB_COMPLETED'):
|
||||
+ vm.qmp_log('drive-mirror', \
|
||||
+ job_id='job0', \
|
||||
+ device='drv0', \
|
||||
+ target=target_path, \
|
||||
+ format=iotests.imgfmt, \
|
||||
+ sync='none', \
|
||||
+ filters=QMP_FILTERS)
|
||||
+ event = vm.event_wait('BLOCK_JOB_READY')
|
||||
+ assert event is not None # silence mypy
|
||||
+ iotests.log(event, filters=[iotests.filter_qmp_event])
|
||||
+
|
||||
+ # An active block-commit will send BLOCK_JOB_READY basically
|
||||
+ # immediately, and cancelling the job will consequently result in
|
||||
+ # BLOCK_JOB_COMPLETED being emitted.
|
||||
+ with TestCase('active block-commit', vm, top_path, \
|
||||
+ 'BLOCK_JOB_COMPLETED'):
|
||||
+ vm.qmp_log('block-commit', \
|
||||
+ job_id='job0', \
|
||||
+ device='drv0')
|
||||
+ event = vm.event_wait('BLOCK_JOB_READY')
|
||||
+ assert event is not None # silence mypy
|
||||
+ iotests.log(event, filters=[iotests.filter_qmp_event])
|
||||
+
|
||||
+ # Give block-commit something to work on, otherwise it would be done
|
||||
+ # immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would
|
||||
+ # work just fine without the block job still running.
|
||||
+ iotests.qemu_io(middle_path, '-c', f'write 0 {IMAGE_SIZE}')
|
||||
+ with TestCase('non-active block-commit', vm, top_path, \
|
||||
+ 'BLOCK_JOB_CANCELLED'):
|
||||
+ vm.qmp_log('block-commit', \
|
||||
+ job_id='job0', \
|
||||
+ device='drv0', \
|
||||
+ top=middle_path, \
|
||||
+ speed=1, \
|
||||
+ filters=[iotests.filter_qmp_testfiles])
|
||||
+
|
||||
+ # Give block-stream something to work on, otherwise it would be done
|
||||
+ # immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would
|
||||
+ # work just fine without the block job still running.
|
||||
+ iotests.qemu_io(bottom_path, '-c', f'write 0 {IMAGE_SIZE}')
|
||||
+ with TestCase('block-stream', vm, top_path, 'BLOCK_JOB_CANCELLED'):
|
||||
+ vm.qmp_log('block-stream', \
|
||||
+ job_id='job0', \
|
||||
+ device='drv0', \
|
||||
+ speed=1)
|
||||
+
|
||||
+if __name__ == '__main__':
|
||||
+ iotests.script_main(main, supported_fmts=['qcow2', 'qed'],
|
||||
+ supported_protocols=['file'])
|
||||
diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out
|
||||
index 63203d9944..91b7ba50af 100644
|
||||
--- a/tests/qemu-iotests/141.out
|
||||
+++ b/tests/qemu-iotests/141.out
|
||||
@@ -1,179 +1,69 @@
|
||||
-QA output created by 141
|
||||
-Formatting 'TEST_DIR/b.IMGFMT', fmt=IMGFMT size=1048576
|
||||
-Formatting 'TEST_DIR/m.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/b.IMGFMT backing_fmt=IMGFMT
|
||||
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/m.IMGFMT backing_fmt=IMGFMT
|
||||
-{'execute': 'qmp_capabilities'}
|
||||
-{"return": {}}
|
||||
-
|
||||
+Creating bottom <- middle <- top backing file chain...
|
||||
+Starting VM...
|
||||
=== Testing drive-backup ===
|
||||
-
|
||||
-{'execute': 'blockdev-add',
|
||||
- 'arguments': {
|
||||
- 'node-name': 'drv0',
|
||||
- 'driver': 'IMGFMT',
|
||||
- 'file': {
|
||||
- 'driver': 'file',
|
||||
- 'filename': 'TEST_DIR/t.IMGFMT'
|
||||
- }}}
|
||||
-{"return": {}}
|
||||
-{'execute': 'drive-backup',
|
||||
-'arguments': {'job-id': 'job0',
|
||||
-'device': 'drv0',
|
||||
-'target': 'TEST_DIR/o.IMGFMT',
|
||||
-'format': 'IMGFMT',
|
||||
-'sync': 'none'}}
|
||||
-Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}}
|
||||
+{"return": {}}
|
||||
+{"execute": "drive-backup", "arguments": {"device": "drv0", "format": "IMGFMT", "job-id": "job0", "sync": "none", "target": "TEST_DIR/PID-target"}}
|
||||
+{"return": {}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}}
|
||||
-{'execute': 'block-job-cancel',
|
||||
- 'arguments': {'device': 'job0'}}
|
||||
+{"execute": "block-job-cancel", "arguments": {"device": "job0"}}
|
||||
{"return": {}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 0, "speed": 0, "type": "backup"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
+{"data": {"device": "job0", "len": 1048576, "offset": 0, "speed": 0, "type": "backup"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
{"return": {}}
|
||||
|
||||
=== Testing drive-mirror ===
|
||||
-
|
||||
-{'execute': 'blockdev-add',
|
||||
- 'arguments': {
|
||||
- 'node-name': 'drv0',
|
||||
- 'driver': 'IMGFMT',
|
||||
- 'file': {
|
||||
- 'driver': 'file',
|
||||
- 'filename': 'TEST_DIR/t.IMGFMT'
|
||||
- }}}
|
||||
-{"return": {}}
|
||||
-{'execute': 'drive-mirror',
|
||||
-'arguments': {'job-id': 'job0',
|
||||
-'device': 'drv0',
|
||||
-'target': 'TEST_DIR/o.IMGFMT',
|
||||
-'format': 'IMGFMT',
|
||||
-'sync': 'none'}}
|
||||
-Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}}
|
||||
+{"return": {}}
|
||||
+{"execute": "drive-mirror", "arguments": {"device": "drv0", "format": "IMGFMT", "job-id": "job0", "sync": "none", "target": "TEST_DIR/PID-target"}}
|
||||
+{"return": {}}
|
||||
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: mirror"}}
|
||||
-{'execute': 'block-job-cancel',
|
||||
- 'arguments': {'device': 'job0'}}
|
||||
+{"execute": "block-job-cancel", "arguments": {"device": "job0"}}
|
||||
{"return": {}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
{"return": {}}
|
||||
|
||||
=== Testing active block-commit ===
|
||||
-
|
||||
-{'execute': 'blockdev-add',
|
||||
- 'arguments': {
|
||||
- 'node-name': 'drv0',
|
||||
- 'driver': 'IMGFMT',
|
||||
- 'file': {
|
||||
- 'driver': 'file',
|
||||
- 'filename': 'TEST_DIR/t.IMGFMT'
|
||||
- }}}
|
||||
-{"return": {}}
|
||||
-{'execute': 'block-commit',
|
||||
-'arguments': {'job-id': 'job0', 'device': 'drv0'}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}}
|
||||
+{"return": {}}
|
||||
+{"execute": "block-commit", "arguments": {"device": "drv0", "job-id": "job0"}}
|
||||
+{"return": {}}
|
||||
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}}
|
||||
-{'execute': 'block-job-cancel',
|
||||
- 'arguments': {'device': 'job0'}}
|
||||
+{"execute": "block-job-cancel", "arguments": {"device": "job0"}}
|
||||
{"return": {}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
{"return": {}}
|
||||
|
||||
=== Testing non-active block-commit ===
|
||||
-
|
||||
-wrote 1048576/1048576 bytes at offset 0
|
||||
-1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
-{'execute': 'blockdev-add',
|
||||
- 'arguments': {
|
||||
- 'node-name': 'drv0',
|
||||
- 'driver': 'IMGFMT',
|
||||
- 'file': {
|
||||
- 'driver': 'file',
|
||||
- 'filename': 'TEST_DIR/t.IMGFMT'
|
||||
- }}}
|
||||
-{"return": {}}
|
||||
-{'execute': 'block-commit',
|
||||
-'arguments': {'job-id': 'job0',
|
||||
-'device': 'drv0',
|
||||
-'top': 'TEST_DIR/m.IMGFMT',
|
||||
-'speed': 1}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}}
|
||||
+{"return": {}}
|
||||
+{"execute": "block-commit", "arguments": {"device": "drv0", "job-id": "job0", "speed": 1, "top": "TEST_DIR/PID-middle"}}
|
||||
+{"return": {}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}}
|
||||
-{'execute': 'block-job-cancel',
|
||||
- 'arguments': {'device': 'job0'}}
|
||||
+{"execute": "block-job-cancel", "arguments": {"device": "job0"}}
|
||||
{"return": {}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "commit"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
+{"data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "commit"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
{"return": {}}
|
||||
|
||||
=== Testing block-stream ===
|
||||
-
|
||||
-wrote 1048576/1048576 bytes at offset 0
|
||||
-1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
-{'execute': 'blockdev-add',
|
||||
- 'arguments': {
|
||||
- 'node-name': 'drv0',
|
||||
- 'driver': 'IMGFMT',
|
||||
- 'file': {
|
||||
- 'driver': 'file',
|
||||
- 'filename': 'TEST_DIR/t.IMGFMT'
|
||||
- }}}
|
||||
-{"return": {}}
|
||||
-{'execute': 'block-stream',
|
||||
-'arguments': {'job-id': 'job0',
|
||||
-'device': 'drv0',
|
||||
-'speed': 1}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}}
|
||||
+{"return": {}}
|
||||
+{"execute": "block-stream", "arguments": {"device": "drv0", "job-id": "job0", "speed": 1}}
|
||||
+{"return": {}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: stream"}}
|
||||
-{'execute': 'block-job-cancel',
|
||||
- 'arguments': {'device': 'job0'}}
|
||||
+{"execute": "block-job-cancel", "arguments": {"device": "job0"}}
|
||||
{"return": {}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "stream"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
+{"data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "stream"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
{"return": {}}
|
||||
-*** done
|
||||
+
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,105 @@
|
||||
From 4ab25b33831fa207500179bd30f29388d81e4cce Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 5 Dec 2023 13:20:10 -0500
|
||||
Subject: [PATCH 093/101] job: remove outdated AioContext locking comments
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [24/26] 15ff2928be82d6905c22619458487fbb72d6044a (kmwolf/centos-qemu-kvm)
|
||||
|
||||
The AioContext lock no longer exists.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20231205182011.1976568-14-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
include/qemu/job.h | 20 --------------------
|
||||
1 file changed, 20 deletions(-)
|
||||
|
||||
diff --git a/include/qemu/job.h b/include/qemu/job.h
|
||||
index e502787dd8..9ea98b5927 100644
|
||||
--- a/include/qemu/job.h
|
||||
+++ b/include/qemu/job.h
|
||||
@@ -67,8 +67,6 @@ typedef struct Job {
|
||||
|
||||
/**
|
||||
* The completion function that will be called when the job completes.
|
||||
- * Called with AioContext lock held, since many callback implementations
|
||||
- * use bdrv_* functions that require to hold the lock.
|
||||
*/
|
||||
BlockCompletionFunc *cb;
|
||||
|
||||
@@ -264,9 +262,6 @@ struct JobDriver {
|
||||
*
|
||||
* This callback will not be invoked if the job has already failed.
|
||||
* If it fails, abort and then clean will be called.
|
||||
- *
|
||||
- * Called with AioContext lock held, since many callbacs implementations
|
||||
- * use bdrv_* functions that require to hold the lock.
|
||||
*/
|
||||
int (*prepare)(Job *job);
|
||||
|
||||
@@ -277,9 +272,6 @@ struct JobDriver {
|
||||
*
|
||||
* All jobs will complete with a call to either .commit() or .abort() but
|
||||
* never both.
|
||||
- *
|
||||
- * Called with AioContext lock held, since many callback implementations
|
||||
- * use bdrv_* functions that require to hold the lock.
|
||||
*/
|
||||
void (*commit)(Job *job);
|
||||
|
||||
@@ -290,9 +282,6 @@ struct JobDriver {
|
||||
*
|
||||
* All jobs will complete with a call to either .commit() or .abort() but
|
||||
* never both.
|
||||
- *
|
||||
- * Called with AioContext lock held, since many callback implementations
|
||||
- * use bdrv_* functions that require to hold the lock.
|
||||
*/
|
||||
void (*abort)(Job *job);
|
||||
|
||||
@@ -301,9 +290,6 @@ struct JobDriver {
|
||||
* .commit() or .abort(). Regardless of which callback is invoked after
|
||||
* completion, .clean() will always be called, even if the job does not
|
||||
* belong to a transaction group.
|
||||
- *
|
||||
- * Called with AioContext lock held, since many callbacs implementations
|
||||
- * use bdrv_* functions that require to hold the lock.
|
||||
*/
|
||||
void (*clean)(Job *job);
|
||||
|
||||
@@ -318,17 +304,12 @@ struct JobDriver {
|
||||
* READY).
|
||||
* (If the callback is NULL, the job is assumed to terminate
|
||||
* without I/O.)
|
||||
- *
|
||||
- * Called with AioContext lock held, since many callback implementations
|
||||
- * use bdrv_* functions that require to hold the lock.
|
||||
*/
|
||||
bool (*cancel)(Job *job, bool force);
|
||||
|
||||
|
||||
/**
|
||||
* Called when the job is freed.
|
||||
- * Called with AioContext lock held, since many callback implementations
|
||||
- * use bdrv_* functions that require to hold the lock.
|
||||
*/
|
||||
void (*free)(Job *job);
|
||||
};
|
||||
@@ -424,7 +405,6 @@ void job_ref_locked(Job *job);
|
||||
* Release a reference that was previously acquired with job_ref_locked() or
|
||||
* job_create(). If it's the last reference to the object, it will be freed.
|
||||
*
|
||||
- * Takes AioContext lock internally to invoke a job->driver callback.
|
||||
* Called with job lock held.
|
||||
*/
|
||||
void job_unref_locked(Job *job);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,42 @@
|
||||
From 9c2eb4ab03903bc084c53ac29b60b8d2121c9fed Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Tue, 21 Nov 2023 16:44:19 +0800
|
||||
Subject: [PATCH 040/101] kconfig: Activate IOMMUFD for s390x machines
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [39/67] cf0ebe770b8db5916dd35247618c0a325dc1eaab (eauger1/centos-qemu-kvm)
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
Reviewed-by: Eric Farman <farman@linux.ibm.com>
|
||||
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit 73e2df669335047b542b67d37ade060a6ae40dd8)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/s390x/Kconfig | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/hw/s390x/Kconfig b/hw/s390x/Kconfig
|
||||
index 4c068d7960..26ad104485 100644
|
||||
--- a/hw/s390x/Kconfig
|
||||
+++ b/hw/s390x/Kconfig
|
||||
@@ -6,6 +6,7 @@ config S390_CCW_VIRTIO
|
||||
imply VFIO_CCW
|
||||
imply WDT_DIAG288
|
||||
imply PCIE_DEVICES
|
||||
+ imply IOMMUFD
|
||||
select PCI_EXPRESS
|
||||
select S390_FLIC
|
||||
select S390_FLIC_KVM if KVM
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,160 +0,0 @@
|
||||
From a5857fb12fcad46e27c415fe82ce13c0cb5d09c7 Mon Sep 17 00:00:00 2001
|
||||
From: Marcelo Tosatti <mtosatti@redhat.com>
|
||||
Date: Thu, 29 Jun 2023 14:48:32 -0300
|
||||
Subject: [PATCH 5/6] kvm: reuse per-vcpu stats fd to avoid vcpu interruption
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Marcelo Tosatti <None>
|
||||
RH-MergeRequest: 177: kvm: reuse per-vcpu stats fd to avoid vcpu interruption
|
||||
RH-Bugzilla: 2218644
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Leonardo Brás <leobras@redhat.com>
|
||||
RH-Commit: [1/1] 4ec72385a9047888121485f49bacb1aff84f7018 (mtosatti/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2218644
|
||||
Commit: 3b6f485275ae95a81eec589d2773b86ca9ddec4d
|
||||
|
||||
A regression has been detected in latency testing of KVM guests.
|
||||
More specifically, it was observed that the cyclictest
|
||||
numbers inside of an isolated vcpu (running on isolated pcpu) are:
|
||||
|
||||
Where a maximum of 50us is acceptable.
|
||||
|
||||
The implementation of KVM_GET_STATS_FD uses run_on_cpu to query
|
||||
per vcpu statistics, which interrupts the vcpu (and is unnecessary).
|
||||
|
||||
To fix this, open the per vcpu stats fd on vcpu initialization,
|
||||
and read from that fd from QEMU's main thread.
|
||||
|
||||
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
accel/kvm/kvm-all.c | 30 +++++++++++++++---------------
|
||||
include/hw/core/cpu.h | 1 +
|
||||
2 files changed, 16 insertions(+), 15 deletions(-)
|
||||
|
||||
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||
index cf3a88d90e..fa7ca46c66 100644
|
||||
--- a/accel/kvm/kvm-all.c
|
||||
+++ b/accel/kvm/kvm-all.c
|
||||
@@ -450,6 +450,8 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
|
||||
"kvm_init_vcpu: kvm_arch_init_vcpu failed (%lu)",
|
||||
kvm_arch_vcpu_id(cpu));
|
||||
}
|
||||
+ cpu->kvm_vcpu_stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
|
||||
+
|
||||
err:
|
||||
return ret;
|
||||
}
|
||||
@@ -3959,7 +3961,7 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd
|
||||
|
||||
/* Read stats header */
|
||||
kvm_stats_header = &descriptors->kvm_stats_header;
|
||||
- ret = read(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header));
|
||||
+ ret = pread(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header), 0);
|
||||
if (ret != sizeof(*kvm_stats_header)) {
|
||||
error_setg(errp, "KVM stats: failed to read stats header: "
|
||||
"expected %zu actual %zu",
|
||||
@@ -3990,7 +3992,8 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd
|
||||
}
|
||||
|
||||
static void query_stats(StatsResultList **result, StatsTarget target,
|
||||
- strList *names, int stats_fd, Error **errp)
|
||||
+ strList *names, int stats_fd, CPUState *cpu,
|
||||
+ Error **errp)
|
||||
{
|
||||
struct kvm_stats_desc *kvm_stats_desc;
|
||||
struct kvm_stats_header *kvm_stats_header;
|
||||
@@ -4048,7 +4051,7 @@ static void query_stats(StatsResultList **result, StatsTarget target,
|
||||
break;
|
||||
case STATS_TARGET_VCPU:
|
||||
add_stats_entry(result, STATS_PROVIDER_KVM,
|
||||
- current_cpu->parent_obj.canonical_path,
|
||||
+ cpu->parent_obj.canonical_path,
|
||||
stats_list);
|
||||
break;
|
||||
default:
|
||||
@@ -4085,10 +4088,9 @@ static void query_stats_schema(StatsSchemaList **result, StatsTarget target,
|
||||
add_stats_schema(result, STATS_PROVIDER_KVM, target, stats_list);
|
||||
}
|
||||
|
||||
-static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data)
|
||||
+static void query_stats_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args)
|
||||
{
|
||||
- StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr;
|
||||
- int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
|
||||
+ int stats_fd = cpu->kvm_vcpu_stats_fd;
|
||||
Error *local_err = NULL;
|
||||
|
||||
if (stats_fd == -1) {
|
||||
@@ -4097,14 +4099,13 @@ static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data)
|
||||
return;
|
||||
}
|
||||
query_stats(kvm_stats_args->result.stats, STATS_TARGET_VCPU,
|
||||
- kvm_stats_args->names, stats_fd, kvm_stats_args->errp);
|
||||
- close(stats_fd);
|
||||
+ kvm_stats_args->names, stats_fd, cpu,
|
||||
+ kvm_stats_args->errp);
|
||||
}
|
||||
|
||||
-static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data)
|
||||
+static void query_stats_schema_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args)
|
||||
{
|
||||
- StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr;
|
||||
- int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
|
||||
+ int stats_fd = cpu->kvm_vcpu_stats_fd;
|
||||
Error *local_err = NULL;
|
||||
|
||||
if (stats_fd == -1) {
|
||||
@@ -4114,7 +4115,6 @@ static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data)
|
||||
}
|
||||
query_stats_schema(kvm_stats_args->result.schema, STATS_TARGET_VCPU, stats_fd,
|
||||
kvm_stats_args->errp);
|
||||
- close(stats_fd);
|
||||
}
|
||||
|
||||
static void query_stats_cb(StatsResultList **result, StatsTarget target,
|
||||
@@ -4132,7 +4132,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target,
|
||||
error_setg_errno(errp, errno, "KVM stats: ioctl failed");
|
||||
return;
|
||||
}
|
||||
- query_stats(result, target, names, stats_fd, errp);
|
||||
+ query_stats(result, target, names, stats_fd, NULL, errp);
|
||||
close(stats_fd);
|
||||
break;
|
||||
}
|
||||
@@ -4146,7 +4146,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target,
|
||||
if (!apply_str_list_filter(cpu->parent_obj.canonical_path, targets)) {
|
||||
continue;
|
||||
}
|
||||
- run_on_cpu(cpu, query_stats_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args));
|
||||
+ query_stats_vcpu(cpu, &stats_args);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -4172,6 +4172,6 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp)
|
||||
if (first_cpu) {
|
||||
stats_args.result.schema = result;
|
||||
stats_args.errp = errp;
|
||||
- run_on_cpu(first_cpu, query_stats_schema_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args));
|
||||
+ query_stats_schema_vcpu(first_cpu, &stats_args);
|
||||
}
|
||||
}
|
||||
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
|
||||
index 397fd3ac68..ae96be07e7 100644
|
||||
--- a/include/hw/core/cpu.h
|
||||
+++ b/include/hw/core/cpu.h
|
||||
@@ -399,6 +399,7 @@ struct CPUState {
|
||||
struct kvm_dirty_gfn *kvm_dirty_gfns;
|
||||
uint32_t kvm_fetch_index;
|
||||
uint64_t dirty_pages;
|
||||
+ int kvm_vcpu_stats_fd;
|
||||
|
||||
/* Use by accel-block: CPU is executing an ioctl() */
|
||||
QemuLockCnt in_ioctl_lock;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,53 +0,0 @@
|
||||
From 6de2f37d9a5db6578554929227377e4fd6d2feb3 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 14/21] loongarch: mark loongarch_ipi_iocsr re-entrnacy safe
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [10/13] 02435b9148b906960137de32eb5a3c4961e44a57 (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit 6d0589e0e6c64b888864a2bf980537be20389264
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Sat May 6 07:21:45 2023 -0400
|
||||
|
||||
loongarch: mark loongarch_ipi_iocsr re-entrnacy safe
|
||||
|
||||
loongarch_ipi_iocsr MRs rely on re-entrant IO through the ipi_send
|
||||
function. As such, mark these MRs re-entrancy-safe.
|
||||
|
||||
Fixes: a2e1753b80 ("memory: prevent dma-reentracy issues")
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Song Gao <gaosong@loongson.cn>
|
||||
Message-Id: <20230506112145.3563708-1-alxndr@bu.edu>
|
||||
Signed-off-by: Song Gao <gaosong@loongson.cn>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/intc/loongarch_ipi.c | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c
|
||||
index aa4bf9eb74..40e98af2ce 100644
|
||||
--- a/hw/intc/loongarch_ipi.c
|
||||
+++ b/hw/intc/loongarch_ipi.c
|
||||
@@ -215,6 +215,10 @@ static void loongarch_ipi_init(Object *obj)
|
||||
for (cpu = 0; cpu < MAX_IPI_CORE_NUM; cpu++) {
|
||||
memory_region_init_io(&s->ipi_iocsr_mem[cpu], obj, &loongarch_ipi_ops,
|
||||
&lams->ipi_core[cpu], "loongarch_ipi_iocsr", 0x48);
|
||||
+
|
||||
+ /* loongarch_ipi_iocsr performs re-entrant IO through ipi_send */
|
||||
+ s->ipi_iocsr_mem[cpu].disable_reentrancy_guard = true;
|
||||
+
|
||||
sysbus_init_mmio(sbd, &s->ipi_iocsr_mem[cpu]);
|
||||
|
||||
memory_region_init_io(&s->ipi64_iocsr_mem[cpu], obj, &loongarch_ipi64_ops,
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,70 +0,0 @@
|
||||
From 0660a7a6994db0db9f6d0b84f6345aa06dc61761 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Mon, 29 May 2023 14:21:08 -0400
|
||||
Subject: [PATCH 16/21] lsi53c895a: disable reentrancy detection for MMIO
|
||||
region, too
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [12/13] fb9da8b68cdf0dc0b0bd8fb8540849c944d0bf20 (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit d139fe9ad8a27bcc50b4ead77d2f97d191a0e95e
|
||||
Author: Thomas Huth <thuth@redhat.com>
|
||||
Date: Tue May 16 11:05:56 2023 +0200
|
||||
|
||||
lsi53c895a: disable reentrancy detection for MMIO region, too
|
||||
|
||||
While trying to use a SCSI disk on the LSI controller with an
|
||||
older version of Fedora (25), I'm getting:
|
||||
|
||||
qemu: warning: Blocked re-entrant IO on MemoryRegion: lsi-mmio at addr: 0x34
|
||||
|
||||
and the SCSI controller is not usable. Seems like we have to
|
||||
disable the reentrancy checker for the MMIO region, too, to
|
||||
get this working again.
|
||||
|
||||
The problem could be reproduced it like this:
|
||||
|
||||
./qemu-system-x86_64 -accel kvm -m 2G -machine q35 \
|
||||
-device lsi53c810,id=lsi1 -device scsi-hd,drive=d0 \
|
||||
-drive if=none,id=d0,file=.../somedisk.qcow2 \
|
||||
-cdrom Fedora-Everything-netinst-i386-25-1.3.iso
|
||||
|
||||
Where somedisk.qcow2 is an image that contains already some partitions
|
||||
and file systems.
|
||||
|
||||
In the boot menu of Fedora, go to
|
||||
"Troubleshooting" -> "Rescue a Fedora system" -> "3) Skip to shell"
|
||||
|
||||
Then check "dmesg | grep -i 53c" for failure messages, and try to mount
|
||||
a partition from somedisk.qcow2.
|
||||
|
||||
Message-Id: <20230516090556.553813-1-thuth@redhat.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/scsi/lsi53c895a.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
|
||||
index db27872963..048436352b 100644
|
||||
--- a/hw/scsi/lsi53c895a.c
|
||||
+++ b/hw/scsi/lsi53c895a.c
|
||||
@@ -2307,6 +2307,7 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp)
|
||||
* re-entrancy guard.
|
||||
*/
|
||||
s->ram_io.disable_reentrancy_guard = true;
|
||||
+ s->mmio_io.disable_reentrancy_guard = true;
|
||||
|
||||
address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io");
|
||||
qdev_init_gpio_out(d, &s->ext_irq, 1);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,58 +0,0 @@
|
||||
From 621808c6c4da3adcc073231493d487d6360386c9 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 09/21] lsi53c895a: disable reentrancy detection for script RAM
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [5/13] 765d65fc3fb735eb4b52a408ccff91b538ad32b6 (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit bfd6e7ae6a72b84e2eb9574f56e6ec037f05182c
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:10 2023 -0400
|
||||
|
||||
lsi53c895a: disable reentrancy detection for script RAM
|
||||
|
||||
As the code is designed to use the memory APIs to access the script ram,
|
||||
disable reentrancy checks for the pseudo-RAM ram_io MemoryRegion.
|
||||
|
||||
In the future, ram_io may be converted from an IO to a proper RAM MemoryRegion.
|
||||
|
||||
Reported-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Message-Id: <20230427211013.2994127-6-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/scsi/lsi53c895a.c | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
|
||||
index af93557a9a..db27872963 100644
|
||||
--- a/hw/scsi/lsi53c895a.c
|
||||
+++ b/hw/scsi/lsi53c895a.c
|
||||
@@ -2302,6 +2302,12 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp)
|
||||
memory_region_init_io(&s->io_io, OBJECT(s), &lsi_io_ops, s,
|
||||
"lsi-io", 256);
|
||||
|
||||
+ /*
|
||||
+ * Since we use the address-space API to interact with ram_io, disable the
|
||||
+ * re-entrancy guard.
|
||||
+ */
|
||||
+ s->ram_io.disable_reentrancy_guard = true;
|
||||
+
|
||||
address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io");
|
||||
qdev_init_gpio_out(d, &s->ext_irq, 1);
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,112 @@
|
||||
From 633c6a52ac88526534466ae311522fe5447bcf91 Mon Sep 17 00:00:00 2001
|
||||
From: David Hildenbrand <david@redhat.com>
|
||||
Date: Wed, 17 Jan 2024 14:55:54 +0100
|
||||
Subject: [PATCH 02/22] memory-device: reintroduce memory region size check
|
||||
|
||||
RH-Author: David Hildenbrand <david@redhat.com>
|
||||
RH-MergeRequest: 221: memory-device: reintroduce memory region size check
|
||||
RH-Jira: RHEL-20341
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-Commit: [2/2] e9ff2339b0c07c3f48f5834c9c80cd6d4cbc8f71
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-20341
|
||||
|
||||
We used to check that the memory region size is multiples of the overall
|
||||
requested address alignment for the device memory address.
|
||||
|
||||
We removed that check, because there are cases (i.e., hv-balloon) where
|
||||
devices unconditionally request an address alignment that has a very large
|
||||
alignment (i.e., 32 GiB), but the actual memory device size might not be
|
||||
multiples of that alignment.
|
||||
|
||||
However, this change:
|
||||
|
||||
(a) allows for some practically impossible DIMM sizes, like "1GB+1 byte".
|
||||
(b) allows for DIMMs that partially cover hugetlb pages, previously
|
||||
reported in [1].
|
||||
|
||||
Both scenarios don't make any sense: we might even waste memory.
|
||||
|
||||
So let's reintroduce that check, but only check that the
|
||||
memory region size is multiples of the memory region alignment (i.e.,
|
||||
page size, huge page size), but not any additional memory device
|
||||
requirements communicated using md->get_min_alignment().
|
||||
|
||||
The following examples now fail again as expected:
|
||||
|
||||
(a) 1M with 2M THP
|
||||
qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \
|
||||
-object memory-backend-ram,id=mem1,size=1M \
|
||||
-device pc-dimm,id=dimm1,memdev=mem1
|
||||
-> backend memory size must be multiple of 0x200000
|
||||
|
||||
(b) 1G+1byte
|
||||
|
||||
qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \
|
||||
-object memory-backend-ram,id=mem1,size=1073741825B \
|
||||
-device pc-dimm,id=dimm1,memdev=mem1
|
||||
-> backend memory size must be multiple of 0x200000
|
||||
|
||||
(c) Unliagned hugetlb size (2M)
|
||||
|
||||
qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \
|
||||
-object memory-backend-file,id=mem1,mem-path=/dev/hugepages/tmp,size=511M \
|
||||
-device pc-dimm,id=dimm1,memdev=mem1
|
||||
backend memory size must be multiple of 0x200000
|
||||
|
||||
(d) Unliagned hugetlb size (1G)
|
||||
|
||||
qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \
|
||||
-object memory-backend-file,id=mem1,mem-path=/dev/hugepages1G/tmp,size=2047M \
|
||||
-device pc-dimm,id=dimm1,memdev=mem1
|
||||
-> backend memory size must be multiple of 0x40000000
|
||||
|
||||
Note that this fix depends on a hv-balloon change to communicate its
|
||||
additional alignment requirements using get_min_alignment() instead of
|
||||
through the memory region.
|
||||
|
||||
[1] https://lkml.kernel.org/r/f77d641d500324525ac036fe1827b3070de75fc1.1701088320.git.mprivozn@redhat.com
|
||||
|
||||
Message-ID: <20240117135554.787344-3-david@redhat.com>
|
||||
Reported-by: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
Reported-by: Michal Privoznik <mprivozn@redhat.com>
|
||||
Fixes: eb1b7c4bd413 ("memory-device: Drop size alignment check")
|
||||
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
Tested-by: Mario Casquero <mcasquer@redhat.com>
|
||||
Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
|
||||
Signed-off-by: David Hildenbrand <david@redhat.com>
|
||||
(cherry picked from commit 540a1abbf0b243e4cfb4333c5d30a041f7080ba4)
|
||||
Signed-off-by: David Hildenbrand <david@redhat.com>
|
||||
---
|
||||
hw/mem/memory-device.c | 14 ++++++++++++++
|
||||
1 file changed, 14 insertions(+)
|
||||
|
||||
diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c
|
||||
index a1b1af26bc..e098585cda 100644
|
||||
--- a/hw/mem/memory-device.c
|
||||
+++ b/hw/mem/memory-device.c
|
||||
@@ -374,6 +374,20 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
|
||||
goto out;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * We always want the memory region size to be multiples of the memory
|
||||
+ * region alignment: for example, DIMMs with 1G+1byte size don't make
|
||||
+ * any sense. Note that we don't check that the size is multiples
|
||||
+ * of any additional alignment requirements the memory device might
|
||||
+ * have when it comes to the address in physical address space.
|
||||
+ */
|
||||
+ if (!QEMU_IS_ALIGNED(memory_region_size(mr),
|
||||
+ memory_region_get_alignment(mr))) {
|
||||
+ error_setg(errp, "backend memory size must be multiple of 0x%"
|
||||
+ PRIx64, memory_region_get_alignment(mr));
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
if (legacy_align) {
|
||||
align = *legacy_align;
|
||||
} else {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,150 +0,0 @@
|
||||
From 0bc9295be331781491e993b6f1b0dca959194f13 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 05/21] memory: prevent dma-reentracy issues
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/13] d4a762d3b156200a65d09cde58cd6d77b229071e (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
CVE: CVE-2023-0330
|
||||
|
||||
commit a2e1753b8054344f32cf94f31c6399a58794a380
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:06 2023 -0400
|
||||
|
||||
memory: prevent dma-reentracy issues
|
||||
|
||||
Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
|
||||
This flag is set/checked prior to calling a device's MemoryRegion
|
||||
handlers, and set when device code initiates DMA. The purpose of this
|
||||
flag is to prevent two types of DMA-based reentrancy issues:
|
||||
|
||||
1.) mmio -> dma -> mmio case
|
||||
2.) bh -> dma write -> mmio case
|
||||
|
||||
These issues have led to problems such as stack-exhaustion and
|
||||
use-after-frees.
|
||||
|
||||
Summary of the problem from Peter Maydell:
|
||||
https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com
|
||||
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282
|
||||
Resolves: CVE-2023-0330
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Message-Id: <20230427211013.2994127-2-alxndr@bu.edu>
|
||||
[thuth: Replace warn_report() with warn_report_once()]
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
include/exec/memory.h | 5 +++++
|
||||
include/hw/qdev-core.h | 7 +++++++
|
||||
softmmu/memory.c | 16 ++++++++++++++++
|
||||
3 files changed, 28 insertions(+)
|
||||
|
||||
diff --git a/include/exec/memory.h b/include/exec/memory.h
|
||||
index 15ade918ba..e45ce6061f 100644
|
||||
--- a/include/exec/memory.h
|
||||
+++ b/include/exec/memory.h
|
||||
@@ -767,6 +767,8 @@ struct MemoryRegion {
|
||||
bool is_iommu;
|
||||
RAMBlock *ram_block;
|
||||
Object *owner;
|
||||
+ /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */
|
||||
+ DeviceState *dev;
|
||||
|
||||
const MemoryRegionOps *ops;
|
||||
void *opaque;
|
||||
@@ -791,6 +793,9 @@ struct MemoryRegion {
|
||||
unsigned ioeventfd_nb;
|
||||
MemoryRegionIoeventfd *ioeventfds;
|
||||
RamDiscardManager *rdm; /* Only for RAM */
|
||||
+
|
||||
+ /* For devices designed to perform re-entrant IO into their own IO MRs */
|
||||
+ bool disable_reentrancy_guard;
|
||||
};
|
||||
|
||||
struct IOMMUMemoryRegion {
|
||||
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
|
||||
index bd50ad5ee1..7623703943 100644
|
||||
--- a/include/hw/qdev-core.h
|
||||
+++ b/include/hw/qdev-core.h
|
||||
@@ -162,6 +162,10 @@ struct NamedClockList {
|
||||
QLIST_ENTRY(NamedClockList) node;
|
||||
};
|
||||
|
||||
+typedef struct {
|
||||
+ bool engaged_in_io;
|
||||
+} MemReentrancyGuard;
|
||||
+
|
||||
/**
|
||||
* DeviceState:
|
||||
* @realized: Indicates whether the device has been fully constructed.
|
||||
@@ -194,6 +198,9 @@ struct DeviceState {
|
||||
int alias_required_for_version;
|
||||
ResettableState reset;
|
||||
GSList *unplug_blockers;
|
||||
+
|
||||
+ /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
|
||||
+ MemReentrancyGuard mem_reentrancy_guard;
|
||||
};
|
||||
|
||||
struct DeviceListener {
|
||||
diff --git a/softmmu/memory.c b/softmmu/memory.c
|
||||
index b1a6cae6f5..b7b3386e9d 100644
|
||||
--- a/softmmu/memory.c
|
||||
+++ b/softmmu/memory.c
|
||||
@@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
|
||||
access_size_max = 4;
|
||||
}
|
||||
|
||||
+ /* Do not allow more than one simultaneous access to a device's IO Regions */
|
||||
+ if (mr->dev && !mr->disable_reentrancy_guard &&
|
||||
+ !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
|
||||
+ if (mr->dev->mem_reentrancy_guard.engaged_in_io) {
|
||||
+ warn_report_once("Blocked re-entrant IO on MemoryRegion: "
|
||||
+ "%s at addr: 0x%" HWADDR_PRIX,
|
||||
+ memory_region_name(mr), addr);
|
||||
+ return MEMTX_ACCESS_ERROR;
|
||||
+ }
|
||||
+ mr->dev->mem_reentrancy_guard.engaged_in_io = true;
|
||||
+ }
|
||||
+
|
||||
/* FIXME: support unaligned access? */
|
||||
access_size = MAX(MIN(size, access_size_max), access_size_min);
|
||||
access_mask = MAKE_64BIT_MASK(0, access_size * 8);
|
||||
@@ -556,6 +568,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
|
||||
access_mask, attrs);
|
||||
}
|
||||
}
|
||||
+ if (mr->dev) {
|
||||
+ mr->dev->mem_reentrancy_guard.engaged_in_io = false;
|
||||
+ }
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -1170,6 +1185,7 @@ static void memory_region_do_init(MemoryRegion *mr,
|
||||
}
|
||||
mr->name = g_strdup(name);
|
||||
mr->owner = owner;
|
||||
+ mr->dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE);
|
||||
mr->ram_block = NULL;
|
||||
|
||||
if (name) {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,67 +0,0 @@
|
||||
From 3f2042e33acb6db91594e12ebd63b9abd9e753cc Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Wed, 7 Jun 2023 11:45:09 -0400
|
||||
Subject: [PATCH 15/21] memory: stricter checks prior to unsetting
|
||||
engaged_in_io
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [11/13] b8e1a4b49dd7fa3b7948d32f46dfe1d7f7a4c1cf (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit 3884bf6468ac6bbb58c2b3feaa74e87f821b52f3
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Tue May 16 04:40:02 2023 -0400
|
||||
|
||||
memory: stricter checks prior to unsetting engaged_in_io
|
||||
|
||||
engaged_in_io could be unset by an MR with re-entrancy checks disabled.
|
||||
Ensure that only MRs that can set the engaged_in_io flag can unset it.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Message-Id: <20230516084002.3813836-1-alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
softmmu/memory.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/softmmu/memory.c b/softmmu/memory.c
|
||||
index b7b3386e9d..26424f1d78 100644
|
||||
--- a/softmmu/memory.c
|
||||
+++ b/softmmu/memory.c
|
||||
@@ -534,6 +534,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
|
||||
unsigned access_size;
|
||||
unsigned i;
|
||||
MemTxResult r = MEMTX_OK;
|
||||
+ bool reentrancy_guard_applied = false;
|
||||
|
||||
if (!access_size_min) {
|
||||
access_size_min = 1;
|
||||
@@ -552,6 +553,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
|
||||
return MEMTX_ACCESS_ERROR;
|
||||
}
|
||||
mr->dev->mem_reentrancy_guard.engaged_in_io = true;
|
||||
+ reentrancy_guard_applied = true;
|
||||
}
|
||||
|
||||
/* FIXME: support unaligned access? */
|
||||
@@ -568,7 +570,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
|
||||
access_mask, attrs);
|
||||
}
|
||||
}
|
||||
- if (mr->dev) {
|
||||
+ if (mr->dev && reentrancy_guard_applied) {
|
||||
mr->dev->mem_reentrancy_guard.engaged_in_io = false;
|
||||
}
|
||||
return r;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,162 +0,0 @@
|
||||
From 8f89d3bc8f226cd038bf88b9fb3ef43b0fb33034 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Wed, 12 Jul 2023 17:46:57 +0200
|
||||
Subject: [PATCH 10/37] migration: Add switchover ack capability
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 179: vfio: live migration support
|
||||
RH-Bugzilla: 2192818
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [8/28] 2f4ca020783bd617eca13b18289fce764279833b (clegoate/qemu-kvm-c9s)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/2192818
|
||||
|
||||
commit 6574232fff6a
|
||||
Author: Avihai Horon <avihaih@nvidia.com>
|
||||
Date: Wed Jun 21 14:11:54 2023 +0300
|
||||
|
||||
migration: Add switchover ack capability
|
||||
|
||||
Migration downtime estimation is calculated based on bandwidth and
|
||||
remaining migration data. This assumes that loading of migration data in
|
||||
the destination takes a negligible amount of time and that downtime
|
||||
depends only on network speed.
|
||||
|
||||
While this may be true for RAM, it's not necessarily true for other
|
||||
migrated devices. For example, loading the data of a VFIO device in the
|
||||
destination might require from the device to allocate resources, prepare
|
||||
internal data structures and so on. These operations can take a
|
||||
significant amount of time which can increase migration downtime.
|
||||
|
||||
This patch adds a new capability "switchover ack" that prevents the
|
||||
source from stopping the VM and completing the migration until an ACK
|
||||
is received from the destination that it's OK to do so.
|
||||
|
||||
This can be used by migrated devices in various ways to reduce downtime.
|
||||
For example, a device can send initial precopy metadata to pre-allocate
|
||||
resources in the destination and use this capability to make sure that
|
||||
the pre-allocation is completed before the source VM is stopped, so it
|
||||
will have full effect.
|
||||
|
||||
This new capability relies on the return path capability to communicate
|
||||
from the destination back to the source.
|
||||
|
||||
The actual implementation of the capability will be added in the
|
||||
following patches.
|
||||
|
||||
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Acked-by: Markus Armbruster <armbru@redhat.com>
|
||||
Tested-by: YangHang Liu <yanghliu@redhat.com>
|
||||
Acked-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
|
||||
Conflicts:
|
||||
- qapi/migration.json
|
||||
re-indent of @switchover-ack to avoid ../qapi/migration.json:482:1:
|
||||
unexpected de-indent (expected at least 17 spaces)
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
migration/options.c | 21 +++++++++++++++++++++
|
||||
migration/options.h | 1 +
|
||||
qapi/migration.json | 14 +++++++++++++-
|
||||
3 files changed, 35 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/migration/options.c b/migration/options.c
|
||||
index a76984276d..c3df6c6dde 100644
|
||||
--- a/migration/options.c
|
||||
+++ b/migration/options.c
|
||||
@@ -182,6 +182,8 @@ Property migration_properties[] = {
|
||||
DEFINE_PROP_MIG_CAP("x-zero-copy-send",
|
||||
MIGRATION_CAPABILITY_ZERO_COPY_SEND),
|
||||
#endif
|
||||
+ DEFINE_PROP_MIG_CAP("x-switchover-ack",
|
||||
+ MIGRATION_CAPABILITY_SWITCHOVER_ACK),
|
||||
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
@@ -305,6 +307,13 @@ bool migrate_return_path(void)
|
||||
return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH];
|
||||
}
|
||||
|
||||
+bool migrate_switchover_ack(void)
|
||||
+{
|
||||
+ MigrationState *s = migrate_get_current();
|
||||
+
|
||||
+ return s->capabilities[MIGRATION_CAPABILITY_SWITCHOVER_ACK];
|
||||
+}
|
||||
+
|
||||
bool migrate_validate_uuid(void)
|
||||
{
|
||||
MigrationState *s = migrate_get_current();
|
||||
@@ -532,6 +541,18 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp)
|
||||
}
|
||||
}
|
||||
|
||||
+ if (new_caps[MIGRATION_CAPABILITY_SWITCHOVER_ACK]) {
|
||||
+ if (!new_caps[MIGRATION_CAPABILITY_RETURN_PATH]) {
|
||||
+ error_setg(errp, "Capability 'switchover-ack' requires capability "
|
||||
+ "'return-path'");
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ /* Disable this capability until it's implemented */
|
||||
+ error_setg(errp, "'switchover-ack' is not implemented yet");
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
return true;
|
||||
}
|
||||
|
||||
diff --git a/migration/options.h b/migration/options.h
|
||||
index 7b0f7245ad..0fc7be6869 100644
|
||||
--- a/migration/options.h
|
||||
+++ b/migration/options.h
|
||||
@@ -47,6 +47,7 @@ bool migrate_postcopy_ram(void);
|
||||
bool migrate_rdma_pin_all(void);
|
||||
bool migrate_release_ram(void);
|
||||
bool migrate_return_path(void);
|
||||
+bool migrate_switchover_ack(void);
|
||||
bool migrate_validate_uuid(void);
|
||||
bool migrate_xbzrle(void);
|
||||
bool migrate_zero_blocks(void);
|
||||
diff --git a/qapi/migration.json b/qapi/migration.json
|
||||
index 2c35b7b9cf..b6a58347cc 100644
|
||||
--- a/qapi/migration.json
|
||||
+++ b/qapi/migration.json
|
||||
@@ -478,6 +478,18 @@
|
||||
# should not affect the correctness of postcopy migration.
|
||||
# (since 7.1)
|
||||
#
|
||||
+# @switchover-ack: If enabled, migration will not stop the source VM
|
||||
+# and complete the migration until an ACK is received
|
||||
+# from the destination that it's OK to do so.
|
||||
+# Exactly when this ACK is sent depends on the
|
||||
+# migrated devices that use this feature. For
|
||||
+# example, a device can use it to make sure some of
|
||||
+# its data is sent and loaded in the destination
|
||||
+# before doing switchover. This can reduce downtime
|
||||
+# if devices that support this capability are
|
||||
+# present. 'return-path' capability must be enabled
|
||||
+# to use it. (since 8.1)
|
||||
+#
|
||||
# Features:
|
||||
# @unstable: Members @x-colo and @x-ignore-shared are experimental.
|
||||
#
|
||||
@@ -492,7 +504,7 @@
|
||||
'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
|
||||
{ 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
|
||||
'validate-uuid', 'background-snapshot',
|
||||
- 'zero-copy-send', 'postcopy-preempt'] }
|
||||
+ 'zero-copy-send', 'postcopy-preempt', 'switchover-ack'] }
|
||||
|
||||
##
|
||||
# @MigrationCapabilityStatus:
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,308 +0,0 @@
|
||||
From e2c2910edf90186ca0d7d13c9943caa284e95ea9 Mon Sep 17 00:00:00 2001
|
||||
From: Peter Xu <peterx@redhat.com>
|
||||
Date: Tue, 25 Apr 2023 21:15:14 -0400
|
||||
Subject: [PATCH 51/56] migration: Allow postcopy_ram_supported_by_host() to
|
||||
report err
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Peter Xu <peterx@redhat.com>
|
||||
RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types
|
||||
RH-Bugzilla: 2057267
|
||||
RH-Acked-by: Leonardo Brás <leobras@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: quintela1 <quintela@redhat.com>
|
||||
RH-Commit: [50/50] 08c44affc11c27ddf1aa7ce0dfacbaf5effb80cb (peterx/qemu-kvm)
|
||||
|
||||
Instead of print it to STDERR, bring the error upwards so that it can be
|
||||
reported via QMP responses.
|
||||
|
||||
E.g.:
|
||||
|
||||
{ "execute": "migrate-set-capabilities" ,
|
||||
"arguments": { "capabilities":
|
||||
[ { "capability": "postcopy-ram", "state": true } ] } }
|
||||
|
||||
{ "error":
|
||||
{ "class": "GenericError",
|
||||
"desc": "Postcopy is not supported: Host backend files need to be TMPFS
|
||||
or HUGETLBFS only" } }
|
||||
|
||||
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||
Reviewed-by: Juan Quintela <quintela@redhat.com>
|
||||
Signed-off-by: Juan Quintela <quintela@redhat.com>
|
||||
(cherry picked from commit 74c38cf7fd24c60e4f0a90585d17250478260877)
|
||||
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||
---
|
||||
migration/options.c | 8 ++----
|
||||
migration/postcopy-ram.c | 60 +++++++++++++++++++++-------------------
|
||||
migration/postcopy-ram.h | 3 +-
|
||||
migration/savevm.c | 3 +-
|
||||
4 files changed, 39 insertions(+), 35 deletions(-)
|
||||
|
||||
diff --git a/migration/options.c b/migration/options.c
|
||||
index 4701c75a4d..e51d667e14 100644
|
||||
--- a/migration/options.c
|
||||
+++ b/migration/options.c
|
||||
@@ -302,6 +302,7 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp)
|
||||
{
|
||||
MigrationIncomingState *mis = migration_incoming_get_current();
|
||||
|
||||
+ ERRP_GUARD();
|
||||
#ifndef CONFIG_LIVE_BLOCK_MIGRATION
|
||||
if (new_caps[MIGRATION_CAPABILITY_BLOCK]) {
|
||||
error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) "
|
||||
@@ -327,11 +328,8 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp)
|
||||
*/
|
||||
if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] &&
|
||||
runstate_check(RUN_STATE_INMIGRATE) &&
|
||||
- !postcopy_ram_supported_by_host(mis)) {
|
||||
- /* postcopy_ram_supported_by_host will have emitted a more
|
||||
- * detailed message
|
||||
- */
|
||||
- error_setg(errp, "Postcopy is not supported");
|
||||
+ !postcopy_ram_supported_by_host(mis, errp)) {
|
||||
+ error_prepend(errp, "Postcopy is not supported: ");
|
||||
return false;
|
||||
}
|
||||
|
||||
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
|
||||
index 0711500036..75aa276bb1 100644
|
||||
--- a/migration/postcopy-ram.c
|
||||
+++ b/migration/postcopy-ram.c
|
||||
@@ -283,11 +283,13 @@ static bool request_ufd_features(int ufd, uint64_t features)
|
||||
return true;
|
||||
}
|
||||
|
||||
-static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis)
|
||||
+static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis,
|
||||
+ Error **errp)
|
||||
{
|
||||
uint64_t asked_features = 0;
|
||||
static uint64_t supported_features;
|
||||
|
||||
+ ERRP_GUARD();
|
||||
/*
|
||||
* it's not possible to
|
||||
* request UFFD_API twice per one fd
|
||||
@@ -295,7 +297,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis)
|
||||
*/
|
||||
if (!supported_features) {
|
||||
if (!receive_ufd_features(&supported_features)) {
|
||||
- error_report("%s failed", __func__);
|
||||
+ error_setg(errp, "Userfault feature detection failed");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -317,8 +319,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis)
|
||||
* userfault file descriptor
|
||||
*/
|
||||
if (!request_ufd_features(ufd, asked_features)) {
|
||||
- error_report("%s failed: features %" PRIu64, __func__,
|
||||
- asked_features);
|
||||
+ error_setg(errp, "Failed features %" PRIu64, asked_features);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -329,7 +330,8 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis)
|
||||
have_hp = supported_features & UFFD_FEATURE_MISSING_HUGETLBFS;
|
||||
#endif
|
||||
if (!have_hp) {
|
||||
- error_report("Userfault on this host does not support huge pages");
|
||||
+ error_setg(errp,
|
||||
+ "Userfault on this host does not support huge pages");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -338,7 +340,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis)
|
||||
|
||||
/* Callback from postcopy_ram_supported_by_host block iterator.
|
||||
*/
|
||||
-static int test_ramblock_postcopiable(RAMBlock *rb)
|
||||
+static int test_ramblock_postcopiable(RAMBlock *rb, Error **errp)
|
||||
{
|
||||
const char *block_name = qemu_ram_get_idstr(rb);
|
||||
ram_addr_t length = qemu_ram_get_used_length(rb);
|
||||
@@ -346,16 +348,18 @@ static int test_ramblock_postcopiable(RAMBlock *rb)
|
||||
QemuFsType fs;
|
||||
|
||||
if (length % pagesize) {
|
||||
- error_report("Postcopy requires RAM blocks to be a page size multiple,"
|
||||
- " block %s is 0x" RAM_ADDR_FMT " bytes with a "
|
||||
- "page size of 0x%zx", block_name, length, pagesize);
|
||||
+ error_setg(errp,
|
||||
+ "Postcopy requires RAM blocks to be a page size multiple,"
|
||||
+ " block %s is 0x" RAM_ADDR_FMT " bytes with a "
|
||||
+ "page size of 0x%zx", block_name, length, pagesize);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (rb->fd >= 0) {
|
||||
fs = qemu_fd_getfs(rb->fd);
|
||||
if (fs != QEMU_FS_TYPE_TMPFS && fs != QEMU_FS_TYPE_HUGETLBFS) {
|
||||
- error_report("Host backend files need to be TMPFS or HUGETLBFS only");
|
||||
+ error_setg(errp,
|
||||
+ "Host backend files need to be TMPFS or HUGETLBFS only");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@@ -368,7 +372,7 @@ static int test_ramblock_postcopiable(RAMBlock *rb)
|
||||
* normally fine since if the postcopy succeeds it gets turned back on at the
|
||||
* end.
|
||||
*/
|
||||
-bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
|
||||
+bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp)
|
||||
{
|
||||
long pagesize = qemu_real_host_page_size();
|
||||
int ufd = -1;
|
||||
@@ -377,29 +381,27 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
|
||||
struct uffdio_register reg_struct;
|
||||
struct uffdio_range range_struct;
|
||||
uint64_t feature_mask;
|
||||
- Error *local_err = NULL;
|
||||
RAMBlock *block;
|
||||
|
||||
+ ERRP_GUARD();
|
||||
if (qemu_target_page_size() > pagesize) {
|
||||
- error_report("Target page size bigger than host page size");
|
||||
+ error_setg(errp, "Target page size bigger than host page size");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ufd = uffd_open(O_CLOEXEC);
|
||||
if (ufd == -1) {
|
||||
- error_report("%s: userfaultfd not available: %s", __func__,
|
||||
- strerror(errno));
|
||||
+ error_setg(errp, "Userfaultfd not available: %s", strerror(errno));
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Give devices a chance to object */
|
||||
- if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, &local_err)) {
|
||||
- error_report_err(local_err);
|
||||
+ if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, errp)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Version and features check */
|
||||
- if (!ufd_check_and_apply(ufd, mis)) {
|
||||
+ if (!ufd_check_and_apply(ufd, mis, errp)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -417,7 +419,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
|
||||
* affect in reality, or we can revisit.
|
||||
*/
|
||||
RAMBLOCK_FOREACH(block) {
|
||||
- if (test_ramblock_postcopiable(block)) {
|
||||
+ if (test_ramblock_postcopiable(block, errp)) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
@@ -427,7 +429,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
|
||||
* it was enabled.
|
||||
*/
|
||||
if (munlockall()) {
|
||||
- error_report("%s: munlockall: %s", __func__, strerror(errno));
|
||||
+ error_setg(errp, "munlockall() failed: %s", strerror(errno));
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -439,8 +441,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
|
||||
testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE |
|
||||
MAP_ANONYMOUS, -1, 0);
|
||||
if (testarea == MAP_FAILED) {
|
||||
- error_report("%s: Failed to map test area: %s", __func__,
|
||||
- strerror(errno));
|
||||
+ error_setg(errp, "Failed to map test area: %s", strerror(errno));
|
||||
goto out;
|
||||
}
|
||||
g_assert(QEMU_PTR_IS_ALIGNED(testarea, pagesize));
|
||||
@@ -450,14 +451,14 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
|
||||
reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
|
||||
|
||||
if (ioctl(ufd, UFFDIO_REGISTER, ®_struct)) {
|
||||
- error_report("%s userfault register: %s", __func__, strerror(errno));
|
||||
+ error_setg(errp, "UFFDIO_REGISTER failed: %s", strerror(errno));
|
||||
goto out;
|
||||
}
|
||||
|
||||
range_struct.start = (uintptr_t)testarea;
|
||||
range_struct.len = pagesize;
|
||||
if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) {
|
||||
- error_report("%s userfault unregister: %s", __func__, strerror(errno));
|
||||
+ error_setg(errp, "UFFDIO_UNREGISTER failed: %s", strerror(errno));
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -465,8 +466,8 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
|
||||
(__u64)1 << _UFFDIO_COPY |
|
||||
(__u64)1 << _UFFDIO_ZEROPAGE;
|
||||
if ((reg_struct.ioctls & feature_mask) != feature_mask) {
|
||||
- error_report("Missing userfault map features: %" PRIx64,
|
||||
- (uint64_t)(~reg_struct.ioctls & feature_mask));
|
||||
+ error_setg(errp, "Missing userfault map features: %" PRIx64,
|
||||
+ (uint64_t)(~reg_struct.ioctls & feature_mask));
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -1188,6 +1189,8 @@ static int postcopy_temp_pages_setup(MigrationIncomingState *mis)
|
||||
|
||||
int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
|
||||
{
|
||||
+ Error *local_err = NULL;
|
||||
+
|
||||
/* Open the fd for the kernel to give us userfaults */
|
||||
mis->userfault_fd = uffd_open(O_CLOEXEC | O_NONBLOCK);
|
||||
if (mis->userfault_fd == -1) {
|
||||
@@ -1200,7 +1203,8 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
|
||||
* Although the host check already tested the API, we need to
|
||||
* do the check again as an ABI handshake on the new fd.
|
||||
*/
|
||||
- if (!ufd_check_and_apply(mis->userfault_fd, mis)) {
|
||||
+ if (!ufd_check_and_apply(mis->userfault_fd, mis, &local_err)) {
|
||||
+ error_report_err(local_err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -1360,7 +1364,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo *info)
|
||||
{
|
||||
}
|
||||
|
||||
-bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
|
||||
+bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp)
|
||||
{
|
||||
error_report("%s: No OS support", __func__);
|
||||
return false;
|
||||
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
|
||||
index b4867a32d5..442ab89752 100644
|
||||
--- a/migration/postcopy-ram.h
|
||||
+++ b/migration/postcopy-ram.h
|
||||
@@ -14,7 +14,8 @@
|
||||
#define QEMU_POSTCOPY_RAM_H
|
||||
|
||||
/* Return true if the host supports everything we need to do postcopy-ram */
|
||||
-bool postcopy_ram_supported_by_host(MigrationIncomingState *mis);
|
||||
+bool postcopy_ram_supported_by_host(MigrationIncomingState *mis,
|
||||
+ Error **errp);
|
||||
|
||||
/*
|
||||
* Make all of RAM sensitive to accesses to areas that haven't yet been written
|
||||
diff --git a/migration/savevm.c b/migration/savevm.c
|
||||
index 9671211339..211eff3a8b 100644
|
||||
--- a/migration/savevm.c
|
||||
+++ b/migration/savevm.c
|
||||
@@ -1753,7 +1753,8 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
- if (!postcopy_ram_supported_by_host(mis)) {
|
||||
+ if (!postcopy_ram_supported_by_host(mis, &local_err)) {
|
||||
+ error_report_err(local_err);
|
||||
postcopy_state_set(POSTCOPY_INCOMING_NONE);
|
||||
return -1;
|
||||
}
|
||||
--
|
||||
2.39.1
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue