forked from rpms/qemu-kvm
parent
de74d18eb7
commit
2c6097766c
@ -1 +1 @@
|
||||
SOURCES/qemu-7.2.0.tar.xz
|
||||
SOURCES/qemu-8.0.0.tar.xz
|
||||
|
@ -1 +1 @@
|
||||
634a3e4b381cbf13085eb1568accb85cbd9d89c4 SOURCES/qemu-7.2.0.tar.xz
|
||||
17d54a85aa5d7f5dcfc619aa34049f9a91ceed0d SOURCES/qemu-8.0.0.tar.xz
|
||||
|
@ -1,60 +0,0 @@
|
||||
From 9c6acadb444c9300d7c18b6939ce4f96484aeacc Mon Sep 17 00:00:00 2001
|
||||
From: David Gibson <dgibson@redhat.com>
|
||||
Date: Wed, 6 Feb 2019 03:58:56 +0000
|
||||
Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts
|
||||
|
||||
RH-Author: David Gibson <dgibson@redhat.com>
|
||||
Message-id: <20190206035856.19058-1-dgibson@redhat.com>
|
||||
Patchwork-id: 84246
|
||||
O-Subject: [RHELAV-8.0/rhel qemu-kvm PATCH] BZ1653590: Require at least 64kiB pages for downstream guests & hosts
|
||||
Bugzilla: 1653590
|
||||
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
|
||||
RH-Acked-by: Serhii Popovych <spopovyc@redhat.com>
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Most current POWER guests require 64kiB page support, so that's the default
|
||||
for the cap-hpt-max-pagesize option in qemu which limits available guest
|
||||
page sizes. We warn if the value is set smaller than that, but don't
|
||||
outright fail upstream, because we need to allow for the possibility of
|
||||
guest (and/or host) kernels configured for 4kiB page sizes.
|
||||
|
||||
Downstream, however, we simply don't support 4kiB pagesize configured
|
||||
kernels in guest or host, so we can have qemu simply error out in this
|
||||
situation.
|
||||
|
||||
Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified
|
||||
it failed immediately with a qemu error
|
||||
|
||||
Signed-off-by: David Gibson <dgibson@redhat.com>
|
||||
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
|
||||
---
|
||||
hw/ppc/spapr_caps.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
|
||||
index b4283055c1..59b88aadff 100644
|
||||
--- a/hw/ppc/spapr_caps.c
|
||||
+++ b/hw/ppc/spapr_caps.c
|
||||
@@ -329,12 +329,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
|
||||
static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr,
|
||||
uint8_t val, Error **errp)
|
||||
{
|
||||
+#if 0 /* disabled for RHEL */
|
||||
if (val < 12) {
|
||||
error_setg(errp, "Require at least 4kiB hpt-max-page-size");
|
||||
return;
|
||||
} else if (val < 16) {
|
||||
warn_report("Many guests require at least 64kiB hpt-max-page-size");
|
||||
}
|
||||
+#else /* Only page sizes >=64kiB supported for RHEL */
|
||||
+ if (val < 16) {
|
||||
+ error_setg(errp, "Require at least 64kiB hpt-max-page-size");
|
||||
+ return;
|
||||
+ }
|
||||
+#endif
|
||||
|
||||
spapr_check_pagesize(spapr, qemu_minrampagesize(), errp);
|
||||
}
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,110 @@
|
||||
From bd5d81d2865c239ffea0fecf32476732149ad05c Mon Sep 17 00:00:00 2001
|
||||
From: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
Date: Wed, 15 Feb 2023 02:03:17 -0500
|
||||
Subject: Add RHEL 9.2.0 compat structure
|
||||
|
||||
Adding compatibility bits necessary to keep 9.2.0 machine
|
||||
types same after rebase to 8.0.
|
||||
|
||||
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
|
||||
Rebase notes (8.0.0 rc4):
|
||||
- Added migration.x-preempt-pre-7-2 compat)
|
||||
---
|
||||
hw/arm/virt.c | 1 +
|
||||
hw/core/machine.c | 10 ++++++++++
|
||||
hw/i386/pc_piix.c | 2 ++
|
||||
hw/i386/pc_q35.c | 3 +++
|
||||
hw/s390x/s390-virtio-ccw.c | 1 +
|
||||
include/hw/boards.h | 3 +++
|
||||
6 files changed, 20 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 1ae1654be5..9be53e9355 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3669,6 +3669,7 @@ type_init(rhel_machine_init);
|
||||
static void rhel920_virt_options(MachineClass *mc)
|
||||
{
|
||||
compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
|
||||
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
|
||||
}
|
||||
DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 5aa567fad3..0e0120b7f2 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -52,6 +52,16 @@ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
|
||||
const char *rhel_old_machine_deprecation =
|
||||
"machine types for previous major releases are deprecated";
|
||||
|
||||
+GlobalProperty hw_compat_rhel_9_2[] = {
|
||||
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
+ { "e1000e", "migrate-timadj", "off" },
|
||||
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
+ { "virtio-mem", "x-early-migration", "false" },
|
||||
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
+ { "migration", "x-preempt-pre-7-2", "true" },
|
||||
+};
|
||||
+const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2);
|
||||
+
|
||||
/*
|
||||
* Mostly the same as hw_compat_7_0
|
||||
*/
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index 3e330fd36f..90fb6e2e03 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -947,6 +947,8 @@ static void pc_machine_rhel760_options(MachineClass *m)
|
||||
/* From pc_i440fx_5_1_machine_options() */
|
||||
pcmc->pci_root_uid = 1;
|
||||
pcmc->enforce_amd_1tb_hole = false;
|
||||
+ compat_props_add(m->compat_props, hw_compat_rhel_9_2,
|
||||
+ hw_compat_rhel_9_2_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
|
||||
hw_compat_rhel_9_1_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index 98601bb76f..8945b69175 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -701,6 +701,9 @@ static void pc_q35_machine_rhel920_options(MachineClass *m)
|
||||
m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)";
|
||||
pcmc->smbios_stream_product = "RHEL";
|
||||
pcmc->smbios_stream_version = "9.2.0";
|
||||
+
|
||||
+ compat_props_add(m->compat_props, hw_compat_rhel_9_2,
|
||||
+ hw_compat_rhel_9_2_len);
|
||||
}
|
||||
|
||||
DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
|
||||
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
||||
index dcd3b966b0..6a0b93c63d 100644
|
||||
--- a/hw/s390x/s390-virtio-ccw.c
|
||||
+++ b/hw/s390x/s390-virtio-ccw.c
|
||||
@@ -1211,6 +1211,7 @@ static void ccw_machine_rhel920_instance_options(MachineState *machine)
|
||||
|
||||
static void ccw_machine_rhel920_class_options(MachineClass *mc)
|
||||
{
|
||||
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
|
||||
}
|
||||
DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true);
|
||||
|
||||
diff --git a/include/hw/boards.h b/include/hw/boards.h
|
||||
index 5e7446ee40..5f08bd7550 100644
|
||||
--- a/include/hw/boards.h
|
||||
+++ b/include/hw/boards.h
|
||||
@@ -461,6 +461,9 @@ extern const size_t hw_compat_2_2_len;
|
||||
extern GlobalProperty hw_compat_2_1[];
|
||||
extern const size_t hw_compat_2_1_len;
|
||||
|
||||
+extern GlobalProperty hw_compat_rhel_9_2[];
|
||||
+extern const size_t hw_compat_rhel_9_2_len;
|
||||
+
|
||||
extern GlobalProperty hw_compat_rhel_9_1[];
|
||||
extern const size_t hw_compat_rhel_9_1_len;
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,26 +0,0 @@
|
||||
From 21ed34787b9492c2cfe3d8fc12a32748bcf02307 Mon Sep 17 00:00:00 2001
|
||||
From: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
Date: Wed, 9 Nov 2022 07:08:32 -0500
|
||||
Subject: Addd 7.2 compat bits for RHEL 9.1 machine type
|
||||
|
||||
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
---
|
||||
hw/core/machine.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 9edec1ca05..3d851d34da 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -54,6 +54,8 @@ GlobalProperty hw_compat_rhel_9_1[] = {
|
||||
{ "arm-gicv3-common", "force-8-bit-prio", "on" },
|
||||
/* hw_compat_rhel_9_1 from hw_compat_7_0 */
|
||||
{ "nvme-ns", "eui64-default", "on"},
|
||||
+ /* hw_compat_rhel_9_1 from hw_compat_7_1 */
|
||||
+ { "virtio-device", "queue_reset", "false" },
|
||||
};
|
||||
const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1);
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,76 @@
|
||||
From c6eaf73adda2e87fe91c9a3836f45dd58a553e06 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Mon, 27 Mar 2023 15:14:03 +0200
|
||||
Subject: redhat: hw/i386/pc: Update x86 machine type compatibility for QEMU
|
||||
8.0.0 update
|
||||
|
||||
Add pc_rhel_9_2_compat based on upstream pc_compat_7_2.
|
||||
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
hw/i386/pc.c | 6 ++++++
|
||||
hw/i386/pc_piix.c | 2 ++
|
||||
hw/i386/pc_q35.c | 2 ++
|
||||
include/hw/i386/pc.h | 3 +++
|
||||
4 files changed, 13 insertions(+)
|
||||
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index 8abb1f872e..f216922cee 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -429,6 +429,12 @@ GlobalProperty pc_rhel_compat[] = {
|
||||
};
|
||||
const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat);
|
||||
|
||||
+GlobalProperty pc_rhel_9_2_compat[] = {
|
||||
+ /* pc_rhel_9_2_compat from pc_compat_7_2 */
|
||||
+ { "ICH9-LPC", "noreboot", "true" },
|
||||
+};
|
||||
+const size_t pc_rhel_9_2_compat_len = G_N_ELEMENTS(pc_rhel_9_2_compat);
|
||||
+
|
||||
GlobalProperty pc_rhel_9_0_compat[] = {
|
||||
/* pc_rhel_9_0_compat from pc_compat_6_2 */
|
||||
{ "virtio-mem", "unplugged-inaccessible", "off" },
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index 90fb6e2e03..fc704d783f 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -949,6 +949,8 @@ static void pc_machine_rhel760_options(MachineClass *m)
|
||||
pcmc->enforce_amd_1tb_hole = false;
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_2,
|
||||
hw_compat_rhel_9_2_len);
|
||||
+ compat_props_add(m->compat_props, pc_rhel_9_2_compat,
|
||||
+ pc_rhel_9_2_compat_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
|
||||
hw_compat_rhel_9_1_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index 8945b69175..e97655616a 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -704,6 +704,8 @@ static void pc_q35_machine_rhel920_options(MachineClass *m)
|
||||
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_2,
|
||||
hw_compat_rhel_9_2_len);
|
||||
+ compat_props_add(m->compat_props, pc_rhel_9_2_compat,
|
||||
+ pc_rhel_9_2_compat_len);
|
||||
}
|
||||
|
||||
DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
|
||||
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
|
||||
index 4376f64a47..d218ad1628 100644
|
||||
--- a/include/hw/i386/pc.h
|
||||
+++ b/include/hw/i386/pc.h
|
||||
@@ -296,6 +296,9 @@ int pc_machine_kvm_type(MachineState *machine, const char *vm_type);
|
||||
extern GlobalProperty pc_rhel_compat[];
|
||||
extern const size_t pc_rhel_compat_len;
|
||||
|
||||
+extern GlobalProperty pc_rhel_9_2_compat[];
|
||||
+extern const size_t pc_rhel_9_2_compat_len;
|
||||
+
|
||||
extern GlobalProperty pc_rhel_9_0_compat[];
|
||||
extern const size_t pc_rhel_9_0_compat_len;
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,83 @@
|
||||
From 8173d2eabaf77312d36b00c618f6770948b80593 Mon Sep 17 00:00:00 2001
|
||||
From: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
Date: Mon, 17 Apr 2023 01:24:18 -0400
|
||||
Subject: Disable unwanted new devices
|
||||
|
||||
QEMU 8.0 adds two new device we do not want to support that can't
|
||||
be disabled using configure switch.
|
||||
|
||||
1) ide-cf - virtual CompactFlash card
|
||||
|
||||
2) i2c-echo - testing echo device
|
||||
|
||||
Use manual disabling of the device by changing code (1) and meson configs (2).
|
||||
|
||||
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
---
|
||||
hw/ide/qdev.c | 9 +++++++++
|
||||
hw/misc/meson.build | 3 ++-
|
||||
2 files changed, 11 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
|
||||
index 1b3b4da01d..454bfa5783 100644
|
||||
--- a/hw/ide/qdev.c
|
||||
+++ b/hw/ide/qdev.c
|
||||
@@ -283,10 +283,13 @@ static void ide_cd_realize(IDEDevice *dev, Error **errp)
|
||||
ide_dev_initfn(dev, IDE_CD, errp);
|
||||
}
|
||||
|
||||
+/* Disabled for Red Hat Enterprise Linux */
|
||||
+#if 0
|
||||
static void ide_cf_realize(IDEDevice *dev, Error **errp)
|
||||
{
|
||||
ide_dev_initfn(dev, IDE_CFATA, errp);
|
||||
}
|
||||
+#endif
|
||||
|
||||
#define DEFINE_IDE_DEV_PROPERTIES() \
|
||||
DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \
|
||||
@@ -346,6 +349,8 @@ static const TypeInfo ide_cd_info = {
|
||||
.class_init = ide_cd_class_init,
|
||||
};
|
||||
|
||||
+/* Disabled for Red Hat Enterprise Linux */
|
||||
+#if 0
|
||||
static Property ide_cf_properties[] = {
|
||||
DEFINE_IDE_DEV_PROPERTIES(),
|
||||
DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf),
|
||||
@@ -371,6 +376,7 @@ static const TypeInfo ide_cf_info = {
|
||||
.instance_size = sizeof(IDEDrive),
|
||||
.class_init = ide_cf_class_init,
|
||||
};
|
||||
+#endif
|
||||
|
||||
static void ide_device_class_init(ObjectClass *klass, void *data)
|
||||
{
|
||||
@@ -396,7 +402,10 @@ static void ide_register_types(void)
|
||||
type_register_static(&ide_bus_info);
|
||||
type_register_static(&ide_hd_info);
|
||||
type_register_static(&ide_cd_info);
|
||||
+/* Disabled for Red Hat Enterprise Linux */
|
||||
+#if 0
|
||||
type_register_static(&ide_cf_info);
|
||||
+#endif
|
||||
type_register_static(&ide_device_type_info);
|
||||
}
|
||||
|
||||
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
|
||||
index a40245ad44..9cc5a61ed7 100644
|
||||
--- a/hw/misc/meson.build
|
||||
+++ b/hw/misc/meson.build
|
||||
@@ -128,7 +128,8 @@ softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_rng.c'))
|
||||
|
||||
softmmu_ss.add(when: 'CONFIG_GRLIB', if_true: files('grlib_ahb_apb_pnp.c'))
|
||||
|
||||
-softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c'))
|
||||
+# Disabled for Red Hat Enterprise Linux
|
||||
+# softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c'))
|
||||
|
||||
specific_ss.add(when: 'CONFIG_AVR_POWER', if_true: files('avr_power.c'))
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,47 +0,0 @@
|
||||
From 27c188c6a4cbd908269cf06affd24025708ecb5c Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Thu, 17 Nov 2022 16:47:16 +0100
|
||||
Subject: redhat: Update s390x machine type compatibility for QEMU 7.2.0 update
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2143585
|
||||
Upstream Status: n/a (rhel-only)
|
||||
|
||||
Add the compatibility handling for the rebase from QEMU 7.1 to 7.2,
|
||||
i.e. the settings from ccw_machine_7_1_class_options() and
|
||||
ccw_machine_7_1_instance_options() to the rhel9.1.0 machine type
|
||||
(earlier settings have been added by previous rebases already).
|
||||
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
hw/s390x/s390-virtio-ccw.c | 9 +++++++++
|
||||
1 file changed, 9 insertions(+)
|
||||
|
||||
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
||||
index ba640e3d9e..97e868ada0 100644
|
||||
--- a/hw/s390x/s390-virtio-ccw.c
|
||||
+++ b/hw/s390x/s390-virtio-ccw.c
|
||||
@@ -1195,12 +1195,21 @@ static void ccw_machine_rhel900_instance_options(MachineState *machine)
|
||||
static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 };
|
||||
|
||||
s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat);
|
||||
+ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE);
|
||||
}
|
||||
|
||||
static void ccw_machine_rhel900_class_options(MachineClass *mc)
|
||||
{
|
||||
+ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc);
|
||||
+ static GlobalProperty compat[] = {
|
||||
+ { TYPE_S390_PCI_DEVICE, "interpret", "off", },
|
||||
+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", },
|
||||
+ };
|
||||
+
|
||||
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len);
|
||||
+ s390mc->max_threads = S390_MAX_CPUS;
|
||||
}
|
||||
DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true);
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,43 +0,0 @@
|
||||
From c1a21266d8bed27f1ef1f705818fde5f9350b73f Mon Sep 17 00:00:00 2001
|
||||
From: Cornelia Huck <cohuck@redhat.com>
|
||||
Date: Wed, 23 Nov 2022 14:15:37 +0100
|
||||
Subject: redhat: aarch64: add rhel9.2.0 virt machine type
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2131982
|
||||
Upstream: RHEL only
|
||||
|
||||
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 9 ++++++++-
|
||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index dfcab40a73..0a94f31dd1 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3518,14 +3518,21 @@ static void rhel_machine_init(void)
|
||||
}
|
||||
type_init(rhel_machine_init);
|
||||
|
||||
+static void rhel920_virt_options(MachineClass *mc)
|
||||
+{
|
||||
+}
|
||||
+DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0)
|
||||
+
|
||||
static void rhel900_virt_options(MachineClass *mc)
|
||||
{
|
||||
VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
|
||||
|
||||
+ rhel920_virt_options(mc);
|
||||
+
|
||||
compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
|
||||
|
||||
/* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */
|
||||
vmc->no_tcg_lpa2 = true;
|
||||
}
|
||||
-DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0)
|
||||
+DEFINE_RHEL_MACHINE(9, 0, 0)
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,62 +0,0 @@
|
||||
From a932b8d4296066be01613ada84241b501488f99f Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Thu, 17 Nov 2022 17:03:24 +0100
|
||||
Subject: redhat: Add new rhel-9.2.0 s390x machine type
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2136473
|
||||
Upstream Status: n/a (rhel-only)
|
||||
|
||||
RHEL 9.2 will be an EUS release - we want to have a new machine
|
||||
type here to make sure that we have a spot where we can wire up
|
||||
fixes later.
|
||||
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
hw/s390x/s390-virtio-ccw.c | 15 ++++++++++++++-
|
||||
1 file changed, 14 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
||||
index 97e868ada0..aa142a1a4e 100644
|
||||
--- a/hw/s390x/s390-virtio-ccw.c
|
||||
+++ b/hw/s390x/s390-virtio-ccw.c
|
||||
@@ -1190,10 +1190,21 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false);
|
||||
#endif
|
||||
|
||||
|
||||
+static void ccw_machine_rhel920_instance_options(MachineState *machine)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+static void ccw_machine_rhel920_class_options(MachineClass *mc)
|
||||
+{
|
||||
+}
|
||||
+DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true);
|
||||
+
|
||||
static void ccw_machine_rhel900_instance_options(MachineState *machine)
|
||||
{
|
||||
static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 };
|
||||
|
||||
+ ccw_machine_rhel920_instance_options(machine);
|
||||
+
|
||||
s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat);
|
||||
s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE);
|
||||
}
|
||||
@@ -1206,12 +1217,14 @@ static void ccw_machine_rhel900_class_options(MachineClass *mc)
|
||||
{ TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", },
|
||||
};
|
||||
|
||||
+ ccw_machine_rhel920_class_options(mc);
|
||||
+
|
||||
compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len);
|
||||
s390mc->max_threads = S390_MAX_CPUS;
|
||||
}
|
||||
-DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true);
|
||||
+DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", false);
|
||||
|
||||
static void ccw_machine_rhel860_instance_options(MachineState *machine)
|
||||
{
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,75 +0,0 @@
|
||||
From f33ca8aed4744238230f1f2cc47df77aa4c9e0ac Mon Sep 17 00:00:00 2001
|
||||
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||
Date: Thu, 17 Nov 2022 12:36:30 +0000
|
||||
Subject: x86: rhel 9.2.0 machine type
|
||||
|
||||
Add a 9.2.0 x86 machine type, and fix up the compatibility
|
||||
for 9.0.0 and older.
|
||||
|
||||
pc_compat_7_1 and pc_compat_7_0 are both empty upstream so there's
|
||||
nothing to do there.
|
||||
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
hw/i386/pc_piix.c | 1 +
|
||||
hw/i386/pc_q35.c | 21 ++++++++++++++++++++-
|
||||
2 files changed, 21 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index 173a1fd10b..fc06877344 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -944,6 +944,7 @@ static void pc_machine_rhel760_options(MachineClass *m)
|
||||
/* From pc_i440fx_5_1_machine_options() */
|
||||
pcmc->pci_root_uid = 1;
|
||||
pcmc->legacy_no_rng_seed = true;
|
||||
+ pcmc->enforce_amd_1tb_hole = false;
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
|
||||
hw_compat_rhel_9_1_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index 97c3630021..52cfe3bf45 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -692,6 +692,23 @@ static void pc_q35_machine_rhel_options(MachineClass *m)
|
||||
compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len);
|
||||
}
|
||||
|
||||
+static void pc_q35_init_rhel920(MachineState *machine)
|
||||
+{
|
||||
+ pc_q35_init(machine);
|
||||
+}
|
||||
+
|
||||
+static void pc_q35_machine_rhel920_options(MachineClass *m)
|
||||
+{
|
||||
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||
+ pc_q35_machine_rhel_options(m);
|
||||
+ m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)";
|
||||
+ pcmc->smbios_stream_product = "RHEL";
|
||||
+ pcmc->smbios_stream_version = "9.2.0";
|
||||
+}
|
||||
+
|
||||
+DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
|
||||
+ pc_q35_machine_rhel920_options);
|
||||
+
|
||||
static void pc_q35_init_rhel900(MachineState *machine)
|
||||
{
|
||||
pc_q35_init(machine);
|
||||
@@ -700,11 +717,13 @@ static void pc_q35_init_rhel900(MachineState *machine)
|
||||
static void pc_q35_machine_rhel900_options(MachineClass *m)
|
||||
{
|
||||
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||
- pc_q35_machine_rhel_options(m);
|
||||
+ pc_q35_machine_rhel920_options(m);
|
||||
m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)";
|
||||
+ m->alias = NULL;
|
||||
pcmc->smbios_stream_product = "RHEL";
|
||||
pcmc->smbios_stream_version = "9.0.0";
|
||||
pcmc->legacy_no_rng_seed = true;
|
||||
+ pcmc->enforce_amd_1tb_hole = false;
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
|
||||
hw_compat_rhel_9_1_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,82 +0,0 @@
|
||||
From 6aebc271d95f9c59cb63c923b6ce35f16fce10e4 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Mon, 16 Jan 2023 07:17:23 -0500
|
||||
Subject: [PATCH 30/31] KVM: keep track of running ioctls
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 138: accel: introduce accelerator blocker API
|
||||
RH-Bugzilla: 1979276
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/3] f566f81dda26ae733008f32261ecd1253ec1796d (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276
|
||||
|
||||
commit a27dd2de68f37ba96fe164a42121daa5f0750afc
|
||||
Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Fri Nov 11 10:47:57 2022 -0500
|
||||
|
||||
KVM: keep track of running ioctls
|
||||
|
||||
Using the new accel-blocker API, mark where ioctls are being called
|
||||
in KVM. Next, we will implement the critical section that will take
|
||||
care of performing memslots modifications atomically, therefore
|
||||
preventing any new ioctl from running and allowing the running ones
|
||||
to finish.
|
||||
|
||||
Signed-off-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Message-Id: <20221111154758.1372674-3-eesposit@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
accel/kvm/kvm-all.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||
index f99b0becd8..ff660fd469 100644
|
||||
--- a/accel/kvm/kvm-all.c
|
||||
+++ b/accel/kvm/kvm-all.c
|
||||
@@ -2310,6 +2310,7 @@ static int kvm_init(MachineState *ms)
|
||||
assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size());
|
||||
|
||||
s->sigmask_len = 8;
|
||||
+ accel_blocker_init();
|
||||
|
||||
#ifdef KVM_CAP_SET_GUEST_DEBUG
|
||||
QTAILQ_INIT(&s->kvm_sw_breakpoints);
|
||||
@@ -3014,7 +3015,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
|
||||
va_end(ap);
|
||||
|
||||
trace_kvm_vm_ioctl(type, arg);
|
||||
+ accel_ioctl_begin();
|
||||
ret = ioctl(s->vmfd, type, arg);
|
||||
+ accel_ioctl_end();
|
||||
if (ret == -1) {
|
||||
ret = -errno;
|
||||
}
|
||||
@@ -3032,7 +3035,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
|
||||
va_end(ap);
|
||||
|
||||
trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg);
|
||||
+ accel_cpu_ioctl_begin(cpu);
|
||||
ret = ioctl(cpu->kvm_fd, type, arg);
|
||||
+ accel_cpu_ioctl_end(cpu);
|
||||
if (ret == -1) {
|
||||
ret = -errno;
|
||||
}
|
||||
@@ -3050,7 +3055,9 @@ int kvm_device_ioctl(int fd, int type, ...)
|
||||
va_end(ap);
|
||||
|
||||
trace_kvm_device_ioctl(fd, type, arg);
|
||||
+ accel_ioctl_begin();
|
||||
ret = ioctl(fd, type, arg);
|
||||
+ accel_ioctl_end();
|
||||
if (ret == -1) {
|
||||
ret = -errno;
|
||||
}
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,140 +0,0 @@
|
||||
From 0c19fb7c4a22a30830152b224b2e66963f829a7a Mon Sep 17 00:00:00 2001
|
||||
From: Greg Kurz <groug@kaod.org>
|
||||
Date: Thu, 19 Jan 2023 18:24:24 +0100
|
||||
Subject: [PATCH 19/20] Revert "vhost-user: Introduce nested event loop in
|
||||
vhost_user_read()"
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Laurent Vivier <lvivier@redhat.com>
|
||||
RH-MergeRequest: 146: Fix vhost-user with dpdk
|
||||
RH-Bugzilla: 2155173
|
||||
RH-Acked-by: Cindy Lu <lulu@redhat.com>
|
||||
RH-Acked-by: Greg Kurz (RH) <gkurz@redhat.com>
|
||||
RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
|
||||
RH-Commit: [2/2] 9b67041f92f29f70b7ccb41d8087801e4e4e38af (lvivier/qemu-kvm-centos)
|
||||
|
||||
This reverts commit a7f523c7d114d445c5d83aecdba3efc038e5a692.
|
||||
|
||||
The nested event loop is broken by design. It's only user was removed.
|
||||
Drop the code as well so that nobody ever tries to use it again.
|
||||
|
||||
I had to fix a couple of trivial conflicts around return values because
|
||||
of 025faa872bcf ("vhost-user: stick to -errno error return convention").
|
||||
|
||||
Signed-off-by: Greg Kurz <groug@kaod.org>
|
||||
Message-Id: <20230119172424.478268-3-groug@kaod.org>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Acked-by: Maxime Coquelin <maxime.coquelin@redhat.com>
|
||||
(cherry picked from commit 4382138f642f69fdbc79ebf4e93d84be8061191f)
|
||||
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
|
||||
---
|
||||
hw/virtio/vhost-user.c | 65 ++++--------------------------------------
|
||||
1 file changed, 5 insertions(+), 60 deletions(-)
|
||||
|
||||
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
|
||||
index 0ac00eb901..7cb49c50f9 100644
|
||||
--- a/hw/virtio/vhost-user.c
|
||||
+++ b/hw/virtio/vhost-user.c
|
||||
@@ -305,19 +305,8 @@ static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-struct vhost_user_read_cb_data {
|
||||
- struct vhost_dev *dev;
|
||||
- VhostUserMsg *msg;
|
||||
- GMainLoop *loop;
|
||||
- int ret;
|
||||
-};
|
||||
-
|
||||
-static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
|
||||
- gpointer opaque)
|
||||
+static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
|
||||
{
|
||||
- struct vhost_user_read_cb_data *data = opaque;
|
||||
- struct vhost_dev *dev = data->dev;
|
||||
- VhostUserMsg *msg = data->msg;
|
||||
struct vhost_user *u = dev->opaque;
|
||||
CharBackend *chr = u->user->chr;
|
||||
uint8_t *p = (uint8_t *) msg;
|
||||
@@ -325,8 +314,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
|
||||
|
||||
r = vhost_user_read_header(dev, msg);
|
||||
if (r < 0) {
|
||||
- data->ret = r;
|
||||
- goto end;
|
||||
+ return r;
|
||||
}
|
||||
|
||||
/* validate message size is sane */
|
||||
@@ -334,8 +322,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
|
||||
error_report("Failed to read msg header."
|
||||
" Size %d exceeds the maximum %zu.", msg->hdr.size,
|
||||
VHOST_USER_PAYLOAD_SIZE);
|
||||
- data->ret = -EPROTO;
|
||||
- goto end;
|
||||
+ return -EPROTO;
|
||||
}
|
||||
|
||||
if (msg->hdr.size) {
|
||||
@@ -346,53 +333,11 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
|
||||
int saved_errno = errno;
|
||||
error_report("Failed to read msg payload."
|
||||
" Read %d instead of %d.", r, msg->hdr.size);
|
||||
- data->ret = r < 0 ? -saved_errno : -EIO;
|
||||
- goto end;
|
||||
+ return r < 0 ? -saved_errno : -EIO;
|
||||
}
|
||||
}
|
||||
|
||||
-end:
|
||||
- g_main_loop_quit(data->loop);
|
||||
- return G_SOURCE_REMOVE;
|
||||
-}
|
||||
-
|
||||
-static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
|
||||
-{
|
||||
- struct vhost_user *u = dev->opaque;
|
||||
- CharBackend *chr = u->user->chr;
|
||||
- GMainContext *prev_ctxt = chr->chr->gcontext;
|
||||
- GMainContext *ctxt = g_main_context_new();
|
||||
- GMainLoop *loop = g_main_loop_new(ctxt, FALSE);
|
||||
- struct vhost_user_read_cb_data data = {
|
||||
- .dev = dev,
|
||||
- .loop = loop,
|
||||
- .msg = msg,
|
||||
- .ret = 0
|
||||
- };
|
||||
-
|
||||
- /*
|
||||
- * We want to be able to monitor the slave channel fd while waiting
|
||||
- * for chr I/O. This requires an event loop, but we can't nest the
|
||||
- * one to which chr is currently attached : its fd handlers might not
|
||||
- * be prepared for re-entrancy. So we create a new one and switch chr
|
||||
- * to use it.
|
||||
- */
|
||||
- qemu_chr_be_update_read_handlers(chr->chr, ctxt);
|
||||
- qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data);
|
||||
-
|
||||
- g_main_loop_run(loop);
|
||||
-
|
||||
- /*
|
||||
- * Restore the previous event loop context. This also destroys/recreates
|
||||
- * event sources : this guarantees that all pending events in the original
|
||||
- * context that have been processed by the nested loop are purged.
|
||||
- */
|
||||
- qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt);
|
||||
-
|
||||
- g_main_loop_unref(loop);
|
||||
- g_main_context_unref(ctxt);
|
||||
-
|
||||
- return data.ret;
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
static int process_message_reply(struct vhost_dev *dev,
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,143 +0,0 @@
|
||||
From 9fb47ad317ad8cdda9960190d499ad6c3a9817f0 Mon Sep 17 00:00:00 2001
|
||||
From: Greg Kurz <groug@kaod.org>
|
||||
Date: Thu, 19 Jan 2023 18:24:23 +0100
|
||||
Subject: [PATCH 18/20] Revert "vhost-user: Monitor slave channel in
|
||||
vhost_user_read()"
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Laurent Vivier <lvivier@redhat.com>
|
||||
RH-MergeRequest: 146: Fix vhost-user with dpdk
|
||||
RH-Bugzilla: 2155173
|
||||
RH-Acked-by: Cindy Lu <lulu@redhat.com>
|
||||
RH-Acked-by: Greg Kurz (RH) <gkurz@redhat.com>
|
||||
RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
|
||||
RH-Commit: [1/2] c583a7f121ca9c93c9a2ad17bf0ccf5c1241dc99 (lvivier/qemu-kvm-centos)
|
||||
|
||||
This reverts commit db8a3772e300c1a656331a92da0785d81667dc81.
|
||||
|
||||
Motivation : this is breaking vhost-user with DPDK as reported in [0].
|
||||
|
||||
Received unexpected msg type. Expected 22 received 40
|
||||
Fail to update device iotlb
|
||||
Received unexpected msg type. Expected 40 received 22
|
||||
Received unexpected msg type. Expected 22 received 11
|
||||
Fail to update device iotlb
|
||||
Received unexpected msg type. Expected 11 received 22
|
||||
vhost VQ 1 ring restore failed: -71: Protocol error (71)
|
||||
Received unexpected msg type. Expected 22 received 11
|
||||
Fail to update device iotlb
|
||||
Received unexpected msg type. Expected 11 received 22
|
||||
vhost VQ 0 ring restore failed: -71: Protocol error (71)
|
||||
unable to start vhost net: 71: falling back on userspace virtio
|
||||
|
||||
The failing sequence that leads to the first error is :
|
||||
- QEMU sends a VHOST_USER_GET_STATUS (40) request to DPDK on the master
|
||||
socket
|
||||
- QEMU starts a nested event loop in order to wait for the
|
||||
VHOST_USER_GET_STATUS response and to be able to process messages from
|
||||
the slave channel
|
||||
- DPDK sends a couple of legitimate IOTLB miss messages on the slave
|
||||
channel
|
||||
- QEMU processes each IOTLB request and sends VHOST_USER_IOTLB_MSG (22)
|
||||
updates on the master socket
|
||||
- QEMU assumes to receive a response for the latest VHOST_USER_IOTLB_MSG
|
||||
but it gets the response for the VHOST_USER_GET_STATUS instead
|
||||
|
||||
The subsequent errors have the same root cause : the nested event loop
|
||||
breaks the order by design. It lures QEMU to expect responses to the
|
||||
latest message sent on the master socket to arrive first.
|
||||
|
||||
Since this was only needed for DAX enablement which is still not merged
|
||||
upstream, just drop the code for now. A working solution will have to
|
||||
be merged later on. Likely protect the master socket with a mutex
|
||||
and service the slave channel with a separate thread, as discussed with
|
||||
Maxime in the mail thread below.
|
||||
|
||||
[0] https://lore.kernel.org/qemu-devel/43145ede-89dc-280e-b953-6a2b436de395@redhat.com/
|
||||
|
||||
Reported-by: Yanghang Liu <yanghliu@redhat.com>
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2155173
|
||||
Signed-off-by: Greg Kurz <groug@kaod.org>
|
||||
Message-Id: <20230119172424.478268-2-groug@kaod.org>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Acked-by: Maxime Coquelin <maxime.coquelin@redhat.com>
|
||||
(cherry picked from commit f340a59d5a852d75ae34555723694c7e8eafbd0c)
|
||||
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
|
||||
---
|
||||
hw/virtio/vhost-user.c | 35 +++--------------------------------
|
||||
1 file changed, 3 insertions(+), 32 deletions(-)
|
||||
|
||||
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
|
||||
index 8f635844af..0ac00eb901 100644
|
||||
--- a/hw/virtio/vhost-user.c
|
||||
+++ b/hw/virtio/vhost-user.c
|
||||
@@ -356,35 +356,6 @@ end:
|
||||
return G_SOURCE_REMOVE;
|
||||
}
|
||||
|
||||
-static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
|
||||
- gpointer opaque);
|
||||
-
|
||||
-/*
|
||||
- * This updates the read handler to use a new event loop context.
|
||||
- * Event sources are removed from the previous context : this ensures
|
||||
- * that events detected in the previous context are purged. They will
|
||||
- * be re-detected and processed in the new context.
|
||||
- */
|
||||
-static void slave_update_read_handler(struct vhost_dev *dev,
|
||||
- GMainContext *ctxt)
|
||||
-{
|
||||
- struct vhost_user *u = dev->opaque;
|
||||
-
|
||||
- if (!u->slave_ioc) {
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- if (u->slave_src) {
|
||||
- g_source_destroy(u->slave_src);
|
||||
- g_source_unref(u->slave_src);
|
||||
- }
|
||||
-
|
||||
- u->slave_src = qio_channel_add_watch_source(u->slave_ioc,
|
||||
- G_IO_IN | G_IO_HUP,
|
||||
- slave_read, dev, NULL,
|
||||
- ctxt);
|
||||
-}
|
||||
-
|
||||
static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
|
||||
{
|
||||
struct vhost_user *u = dev->opaque;
|
||||
@@ -406,7 +377,6 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
|
||||
* be prepared for re-entrancy. So we create a new one and switch chr
|
||||
* to use it.
|
||||
*/
|
||||
- slave_update_read_handler(dev, ctxt);
|
||||
qemu_chr_be_update_read_handlers(chr->chr, ctxt);
|
||||
qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data);
|
||||
|
||||
@@ -418,7 +388,6 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
|
||||
* context that have been processed by the nested loop are purged.
|
||||
*/
|
||||
qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt);
|
||||
- slave_update_read_handler(dev, NULL);
|
||||
|
||||
g_main_loop_unref(loop);
|
||||
g_main_context_unref(ctxt);
|
||||
@@ -1802,7 +1771,9 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev)
|
||||
return -ECONNREFUSED;
|
||||
}
|
||||
u->slave_ioc = ioc;
|
||||
- slave_update_read_handler(dev, NULL);
|
||||
+ u->slave_src = qio_channel_add_watch_source(u->slave_ioc,
|
||||
+ G_IO_IN | G_IO_HUP,
|
||||
+ slave_read, dev, NULL, NULL);
|
||||
|
||||
if (reply_supported) {
|
||||
msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,348 +0,0 @@
|
||||
From ae2077fd5d351a68c313c64f07fb225dff694a8f Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Mon, 16 Jan 2023 07:16:41 -0500
|
||||
Subject: [PATCH 29/31] accel: introduce accelerator blocker API
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 138: accel: introduce accelerator blocker API
|
||||
RH-Bugzilla: 1979276
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/3] 56b07cd7db516c5066e6d66b4695064fdf73abbf (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276
|
||||
|
||||
commit bd688fc93120fb3e28aa70e3dfdf567ccc1e0bc1
|
||||
Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Fri Nov 11 10:47:56 2022 -0500
|
||||
|
||||
accel: introduce accelerator blocker API
|
||||
|
||||
This API allows the accelerators to prevent vcpus from issuing
|
||||
new ioctls while execting a critical section marked with the
|
||||
accel_ioctl_inhibit_begin/end functions.
|
||||
|
||||
Note that all functions submitting ioctls must mark where the
|
||||
ioctl is being called with accel_{cpu_}ioctl_begin/end().
|
||||
|
||||
This API requires the caller to always hold the BQL.
|
||||
API documentation is in sysemu/accel-blocker.h
|
||||
|
||||
Internally, it uses a QemuLockCnt together with a per-CPU QemuLockCnt
|
||||
(to minimize cache line bouncing) to keep avoid that new ioctls
|
||||
run when the critical section starts, and a QemuEvent to wait
|
||||
that all running ioctls finish.
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Message-Id: <20221111154758.1372674-2-eesposit@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Conflicts:
|
||||
util/meson.build: "interval-tree.c" does not exist
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
accel/accel-blocker.c | 154 +++++++++++++++++++++++++++++++++
|
||||
accel/meson.build | 2 +-
|
||||
hw/core/cpu-common.c | 2 +
|
||||
include/hw/core/cpu.h | 3 +
|
||||
include/sysemu/accel-blocker.h | 56 ++++++++++++
|
||||
util/meson.build | 2 +-
|
||||
6 files changed, 217 insertions(+), 2 deletions(-)
|
||||
create mode 100644 accel/accel-blocker.c
|
||||
create mode 100644 include/sysemu/accel-blocker.h
|
||||
|
||||
diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c
|
||||
new file mode 100644
|
||||
index 0000000000..1e7f423462
|
||||
--- /dev/null
|
||||
+++ b/accel/accel-blocker.c
|
||||
@@ -0,0 +1,154 @@
|
||||
+/*
|
||||
+ * Lock to inhibit accelerator ioctls
|
||||
+ *
|
||||
+ * Copyright (c) 2022 Red Hat Inc.
|
||||
+ *
|
||||
+ * Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
+ *
|
||||
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
+ * of this software and associated documentation files (the "Software"), to deal
|
||||
+ * in the Software without restriction, including without limitation the rights
|
||||
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
+ * copies of the Software, and to permit persons to whom the Software is
|
||||
+ * furnished to do so, subject to the following conditions:
|
||||
+ *
|
||||
+ * The above copyright notice and this permission notice shall be included in
|
||||
+ * all copies or substantial portions of the Software.
|
||||
+ *
|
||||
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
+ * THE SOFTWARE.
|
||||
+ */
|
||||
+
|
||||
+#include "qemu/osdep.h"
|
||||
+#include "qemu/thread.h"
|
||||
+#include "qemu/main-loop.h"
|
||||
+#include "hw/core/cpu.h"
|
||||
+#include "sysemu/accel-blocker.h"
|
||||
+
|
||||
+static QemuLockCnt accel_in_ioctl_lock;
|
||||
+static QemuEvent accel_in_ioctl_event;
|
||||
+
|
||||
+void accel_blocker_init(void)
|
||||
+{
|
||||
+ qemu_lockcnt_init(&accel_in_ioctl_lock);
|
||||
+ qemu_event_init(&accel_in_ioctl_event, false);
|
||||
+}
|
||||
+
|
||||
+void accel_ioctl_begin(void)
|
||||
+{
|
||||
+ if (likely(qemu_mutex_iothread_locked())) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */
|
||||
+ qemu_lockcnt_inc(&accel_in_ioctl_lock);
|
||||
+}
|
||||
+
|
||||
+void accel_ioctl_end(void)
|
||||
+{
|
||||
+ if (likely(qemu_mutex_iothread_locked())) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ qemu_lockcnt_dec(&accel_in_ioctl_lock);
|
||||
+ /* change event to SET. If event was BUSY, wake up all waiters */
|
||||
+ qemu_event_set(&accel_in_ioctl_event);
|
||||
+}
|
||||
+
|
||||
+void accel_cpu_ioctl_begin(CPUState *cpu)
|
||||
+{
|
||||
+ if (unlikely(qemu_mutex_iothread_locked())) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */
|
||||
+ qemu_lockcnt_inc(&cpu->in_ioctl_lock);
|
||||
+}
|
||||
+
|
||||
+void accel_cpu_ioctl_end(CPUState *cpu)
|
||||
+{
|
||||
+ if (unlikely(qemu_mutex_iothread_locked())) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ qemu_lockcnt_dec(&cpu->in_ioctl_lock);
|
||||
+ /* change event to SET. If event was BUSY, wake up all waiters */
|
||||
+ qemu_event_set(&accel_in_ioctl_event);
|
||||
+}
|
||||
+
|
||||
+static bool accel_has_to_wait(void)
|
||||
+{
|
||||
+ CPUState *cpu;
|
||||
+ bool needs_to_wait = false;
|
||||
+
|
||||
+ CPU_FOREACH(cpu) {
|
||||
+ if (qemu_lockcnt_count(&cpu->in_ioctl_lock)) {
|
||||
+ /* exit the ioctl, if vcpu is running it */
|
||||
+ qemu_cpu_kick(cpu);
|
||||
+ needs_to_wait = true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return needs_to_wait || qemu_lockcnt_count(&accel_in_ioctl_lock);
|
||||
+}
|
||||
+
|
||||
+void accel_ioctl_inhibit_begin(void)
|
||||
+{
|
||||
+ CPUState *cpu;
|
||||
+
|
||||
+ /*
|
||||
+ * We allow to inhibit only when holding the BQL, so we can identify
|
||||
+ * when an inhibitor wants to issue an ioctl easily.
|
||||
+ */
|
||||
+ g_assert(qemu_mutex_iothread_locked());
|
||||
+
|
||||
+ /* Block further invocations of the ioctls outside the BQL. */
|
||||
+ CPU_FOREACH(cpu) {
|
||||
+ qemu_lockcnt_lock(&cpu->in_ioctl_lock);
|
||||
+ }
|
||||
+ qemu_lockcnt_lock(&accel_in_ioctl_lock);
|
||||
+
|
||||
+ /* Keep waiting until there are running ioctls */
|
||||
+ while (true) {
|
||||
+
|
||||
+ /* Reset event to FREE. */
|
||||
+ qemu_event_reset(&accel_in_ioctl_event);
|
||||
+
|
||||
+ if (accel_has_to_wait()) {
|
||||
+ /*
|
||||
+ * If event is still FREE, and there are ioctls still in progress,
|
||||
+ * wait.
|
||||
+ *
|
||||
+ * If an ioctl finishes before qemu_event_wait(), it will change
|
||||
+ * the event state to SET. This will prevent qemu_event_wait() from
|
||||
+ * blocking, but it's not a problem because if other ioctls are
|
||||
+ * still running the loop will iterate once more and reset the event
|
||||
+ * status to FREE so that it can wait properly.
|
||||
+ *
|
||||
+ * If an ioctls finishes while qemu_event_wait() is blocking, then
|
||||
+ * it will be waken up, but also here the while loop makes sure
|
||||
+ * to re-enter the wait if there are other running ioctls.
|
||||
+ */
|
||||
+ qemu_event_wait(&accel_in_ioctl_event);
|
||||
+ } else {
|
||||
+ /* No ioctl is running */
|
||||
+ return;
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+void accel_ioctl_inhibit_end(void)
|
||||
+{
|
||||
+ CPUState *cpu;
|
||||
+
|
||||
+ qemu_lockcnt_unlock(&accel_in_ioctl_lock);
|
||||
+ CPU_FOREACH(cpu) {
|
||||
+ qemu_lockcnt_unlock(&cpu->in_ioctl_lock);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
diff --git a/accel/meson.build b/accel/meson.build
|
||||
index 259c35c4c8..061332610f 100644
|
||||
--- a/accel/meson.build
|
||||
+++ b/accel/meson.build
|
||||
@@ -1,4 +1,4 @@
|
||||
-specific_ss.add(files('accel-common.c'))
|
||||
+specific_ss.add(files('accel-common.c', 'accel-blocker.c'))
|
||||
softmmu_ss.add(files('accel-softmmu.c'))
|
||||
user_ss.add(files('accel-user.c'))
|
||||
|
||||
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
|
||||
index f9fdd46b9d..8d6a4b1b65 100644
|
||||
--- a/hw/core/cpu-common.c
|
||||
+++ b/hw/core/cpu-common.c
|
||||
@@ -237,6 +237,7 @@ static void cpu_common_initfn(Object *obj)
|
||||
cpu->nr_threads = 1;
|
||||
|
||||
qemu_mutex_init(&cpu->work_mutex);
|
||||
+ qemu_lockcnt_init(&cpu->in_ioctl_lock);
|
||||
QSIMPLEQ_INIT(&cpu->work_list);
|
||||
QTAILQ_INIT(&cpu->breakpoints);
|
||||
QTAILQ_INIT(&cpu->watchpoints);
|
||||
@@ -248,6 +249,7 @@ static void cpu_common_finalize(Object *obj)
|
||||
{
|
||||
CPUState *cpu = CPU(obj);
|
||||
|
||||
+ qemu_lockcnt_destroy(&cpu->in_ioctl_lock);
|
||||
qemu_mutex_destroy(&cpu->work_mutex);
|
||||
}
|
||||
|
||||
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
|
||||
index 8830546121..2417597236 100644
|
||||
--- a/include/hw/core/cpu.h
|
||||
+++ b/include/hw/core/cpu.h
|
||||
@@ -398,6 +398,9 @@ struct CPUState {
|
||||
uint32_t kvm_fetch_index;
|
||||
uint64_t dirty_pages;
|
||||
|
||||
+ /* Use by accel-block: CPU is executing an ioctl() */
|
||||
+ QemuLockCnt in_ioctl_lock;
|
||||
+
|
||||
/* Used for events with 'vcpu' and *without* the 'disabled' properties */
|
||||
DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS);
|
||||
DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS);
|
||||
diff --git a/include/sysemu/accel-blocker.h b/include/sysemu/accel-blocker.h
|
||||
new file mode 100644
|
||||
index 0000000000..72020529ef
|
||||
--- /dev/null
|
||||
+++ b/include/sysemu/accel-blocker.h
|
||||
@@ -0,0 +1,56 @@
|
||||
+/*
|
||||
+ * Accelerator blocking API, to prevent new ioctls from starting and wait the
|
||||
+ * running ones finish.
|
||||
+ * This mechanism differs from pause/resume_all_vcpus() in that it does not
|
||||
+ * release the BQL.
|
||||
+ *
|
||||
+ * Copyright (c) 2022 Red Hat Inc.
|
||||
+ *
|
||||
+ * Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
+ *
|
||||
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
+ * See the COPYING file in the top-level directory.
|
||||
+ */
|
||||
+#ifndef ACCEL_BLOCKER_H
|
||||
+#define ACCEL_BLOCKER_H
|
||||
+
|
||||
+#include "qemu/osdep.h"
|
||||
+#include "sysemu/cpus.h"
|
||||
+
|
||||
+extern void accel_blocker_init(void);
|
||||
+
|
||||
+/*
|
||||
+ * accel_{cpu_}ioctl_begin/end:
|
||||
+ * Mark when ioctl is about to run or just finished.
|
||||
+ *
|
||||
+ * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is
|
||||
+ * called, preventing new ioctls to run. They will continue only after
|
||||
+ * accel_ioctl_inibith_end().
|
||||
+ */
|
||||
+extern void accel_ioctl_begin(void);
|
||||
+extern void accel_ioctl_end(void);
|
||||
+extern void accel_cpu_ioctl_begin(CPUState *cpu);
|
||||
+extern void accel_cpu_ioctl_end(CPUState *cpu);
|
||||
+
|
||||
+/*
|
||||
+ * accel_ioctl_inhibit_begin: start critical section
|
||||
+ *
|
||||
+ * This function makes sure that:
|
||||
+ * 1) incoming accel_{cpu_}ioctl_begin() calls block
|
||||
+ * 2) wait that all ioctls that were already running reach
|
||||
+ * accel_{cpu_}ioctl_end(), kicking vcpus if necessary.
|
||||
+ *
|
||||
+ * This allows the caller to access shared data or perform operations without
|
||||
+ * worrying of concurrent vcpus accesses.
|
||||
+ */
|
||||
+extern void accel_ioctl_inhibit_begin(void);
|
||||
+
|
||||
+/*
|
||||
+ * accel_ioctl_inhibit_end: end critical section started by
|
||||
+ * accel_ioctl_inhibit_begin()
|
||||
+ *
|
||||
+ * This function allows blocked accel_{cpu_}ioctl_begin() to continue.
|
||||
+ */
|
||||
+extern void accel_ioctl_inhibit_end(void);
|
||||
+
|
||||
+#endif /* ACCEL_BLOCKER_H */
|
||||
diff --git a/util/meson.build b/util/meson.build
|
||||
index 25b9b61f98..85a5504c4d 100644
|
||||
--- a/util/meson.build
|
||||
+++ b/util/meson.build
|
||||
@@ -57,6 +57,7 @@ util_ss.add(files('guest-random.c'))
|
||||
util_ss.add(files('yank.c'))
|
||||
util_ss.add(files('int128.c'))
|
||||
util_ss.add(files('memalign.c'))
|
||||
+util_ss.add(files('lockcnt.c'))
|
||||
|
||||
if have_user
|
||||
util_ss.add(files('selfmap.c'))
|
||||
@@ -71,7 +72,6 @@ endif
|
||||
if have_block or have_ga
|
||||
util_ss.add(files('aiocb.c', 'async.c'))
|
||||
util_ss.add(files('base64.c'))
|
||||
- util_ss.add(files('lockcnt.c'))
|
||||
util_ss.add(files('main-loop.c'))
|
||||
util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
|
||||
util_ss.add(files('coroutine-@0@.c'.format(config_host['CONFIG_COROUTINE_BACKEND'])))
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,58 +0,0 @@
|
||||
From ab68e13b7628f2348d41a4518a92508542af712f Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Fri, 3 Feb 2023 18:15:10 +0100
|
||||
Subject: [PATCH 05/20] accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 144: accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page
|
||||
RH-Bugzilla: 2165280
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Gavin Shan <gshan@redhat.com>
|
||||
RH-Acked-by: Shaoqin Huang <None>
|
||||
RH-Commit: [1/1] 5b0863c34ba06c01c4e343d1ecd72402779c7de3 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/2165280
|
||||
Upstream: yes
|
||||
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=50530041
|
||||
Test: 'kvm unit test ./run_tests.sh -g debug' does not SIGSEV anymore
|
||||
|
||||
After commit 4e4fa6c12d ("accel/tcg: Complete cpu initialization
|
||||
before registration"), it looks the CPUJumpCache pointer can be NULL.
|
||||
This causes a SIGSEV when running debug-wp-migration kvm unit test.
|
||||
|
||||
At the first place it should be clarified why this TCG code is called
|
||||
with KVM acceleration. This may hide another bug.
|
||||
|
||||
Fixes: 4e4fa6c12d ("accel/tcg: Complete cpu initialization before registration")
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
Message-Id: <20230203171510.2867451-1-eric.auger@redhat.com>
|
||||
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
(cherry picked from commit 99ab4d500af638ba3ebb20e8aa89d72201b70860)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
accel/tcg/cputlb.c | 7 ++++++-
|
||||
1 file changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
|
||||
index 6f1c00682b..4244b0e4e3 100644
|
||||
--- a/accel/tcg/cputlb.c
|
||||
+++ b/accel/tcg/cputlb.c
|
||||
@@ -100,9 +100,14 @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
|
||||
|
||||
static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
|
||||
{
|
||||
- int i, i0 = tb_jmp_cache_hash_page(page_addr);
|
||||
CPUJumpCache *jc = cpu->tb_jmp_cache;
|
||||
+ int i, i0;
|
||||
|
||||
+ if (unlikely(!jc)) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ i0 = tb_jmp_cache_hash_page(page_addr);
|
||||
for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
|
||||
qatomic_set(&jc->array[i0 + i].tb, NULL);
|
||||
}
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,90 +0,0 @@
|
||||
From 244e92fea388d2be9fe81a5c5912d92b8f599caa Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Thu, 23 Mar 2023 10:48:59 -0400
|
||||
Subject: [PATCH 1/2] aio-posix: fix race between epoll upgrade and
|
||||
aio_set_fd_handler()
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 292: aio-posix: fix race between epoll upgrade and aio_set_fd_handler()
|
||||
RH-Bugzilla: 2211923
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Commit: [1/1] 182471bac79fa2b2ae8a34087eb6c4ab1af786e1
|
||||
|
||||
If another thread calls aio_set_fd_handler() while the IOThread event
|
||||
loop is upgrading from ppoll(2) to epoll(7) then we might miss new
|
||||
AioHandlers. The epollfd will not monitor the new AioHandler's fd,
|
||||
resulting in hangs.
|
||||
|
||||
Take the AioHandler list lock while upgrading to epoll. This prevents
|
||||
AioHandlers from changing while epoll is being set up. If we cannot lock
|
||||
because we're in a nested event loop, then don't upgrade to epoll (it
|
||||
will happen next time we're not in a nested call).
|
||||
|
||||
The downside to taking the lock is that the aio_set_fd_handler() thread
|
||||
has to wait until the epoll upgrade is finished, which involves many
|
||||
epoll_ctl(2) system calls. However, this scenario is rare and I couldn't
|
||||
think of another solution that is still simple.
|
||||
|
||||
Reported-by: Qing Wang <qinwang@redhat.com>
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2090998
|
||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Cc: Fam Zheng <fam@euphon.net>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20230323144859.1338495-1-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit e62da98527fa35fe5f532cded01a33edf9fbe7b2)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
util/fdmon-epoll.c | 25 ++++++++++++++++++-------
|
||||
1 file changed, 18 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/util/fdmon-epoll.c b/util/fdmon-epoll.c
|
||||
index e11a8a022e..1683aa1105 100644
|
||||
--- a/util/fdmon-epoll.c
|
||||
+++ b/util/fdmon-epoll.c
|
||||
@@ -127,6 +127,8 @@ static bool fdmon_epoll_try_enable(AioContext *ctx)
|
||||
|
||||
bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd)
|
||||
{
|
||||
+ bool ok;
|
||||
+
|
||||
if (ctx->epollfd < 0) {
|
||||
return false;
|
||||
}
|
||||
@@ -136,14 +138,23 @@ bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd)
|
||||
return false;
|
||||
}
|
||||
|
||||
- if (npfd >= EPOLL_ENABLE_THRESHOLD) {
|
||||
- if (fdmon_epoll_try_enable(ctx)) {
|
||||
- return true;
|
||||
- } else {
|
||||
- fdmon_epoll_disable(ctx);
|
||||
- }
|
||||
+ if (npfd < EPOLL_ENABLE_THRESHOLD) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ /* The list must not change while we add fds to epoll */
|
||||
+ if (!qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ ok = fdmon_epoll_try_enable(ctx);
|
||||
+
|
||||
+ qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
|
||||
+
|
||||
+ if (!ok) {
|
||||
+ fdmon_epoll_disable(ctx);
|
||||
}
|
||||
- return false;
|
||||
+ return ok;
|
||||
}
|
||||
|
||||
void fdmon_epoll_setup(AioContext *ctx)
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,50 +0,0 @@
|
||||
From e9a9c0b023ae0dcbb14543b74063cca931d8230f Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:24:36 -0500
|
||||
Subject: [PATCH 08/12] aio-wait: switch to smp_mb__after_rmw()
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2175660
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [5/9] a90c96d148fdbec340a45dc6cedf3660d8be2aab (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
|
||||
|
||||
commit b532526a07ef3b903ead2e055fe6cc87b41057a3
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Fri Mar 3 11:03:52 2023 +0100
|
||||
|
||||
aio-wait: switch to smp_mb__after_rmw()
|
||||
|
||||
The barrier comes after an atomic increment, so it is enough to use
|
||||
smp_mb__after_rmw(); this avoids a double barrier on x86 systems.
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
include/block/aio-wait.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
|
||||
index dd9a7f6461..da13357bb8 100644
|
||||
--- a/include/block/aio-wait.h
|
||||
+++ b/include/block/aio-wait.h
|
||||
@@ -85,7 +85,7 @@ extern AioWait global_aio_wait;
|
||||
/* Increment wait_->num_waiters before evaluating cond. */ \
|
||||
qatomic_inc(&wait_->num_waiters); \
|
||||
/* Paired with smp_mb in aio_wait_kick(). */ \
|
||||
- smp_mb(); \
|
||||
+ smp_mb__after_rmw(); \
|
||||
if (ctx_ && in_aio_context_home_thread(ctx_)) { \
|
||||
while ((cond)) { \
|
||||
aio_poll(ctx_, true); \
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,55 @@
|
||||
From 5beea8b889a38aa59259679d7f1ba050f09eb0f0 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 12/21] apic: disable reentrancy detection for apic-msi
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [8/13] 329f3b1c02fc42d85c821dd14c70e6b885cf849a (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit 50795ee051a342c681a9b45671c552fbd6274db8
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:13 2023 -0400
|
||||
|
||||
apic: disable reentrancy detection for apic-msi
|
||||
|
||||
As the code is designed for re-entrant calls to apic-msi, mark apic-msi
|
||||
as reentrancy-safe.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Message-Id: <20230427211013.2994127-9-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/intc/apic.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
|
||||
index 20b5a94073..ac3d47d231 100644
|
||||
--- a/hw/intc/apic.c
|
||||
+++ b/hw/intc/apic.c
|
||||
@@ -885,6 +885,13 @@ static void apic_realize(DeviceState *dev, Error **errp)
|
||||
memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi",
|
||||
APIC_SPACE_SIZE);
|
||||
|
||||
+ /*
|
||||
+ * apic-msi's apic_mem_write can call into ioapic_eoi_broadcast, which can
|
||||
+ * write back to apic-msi. As such mark the apic-msi region re-entrancy
|
||||
+ * safe.
|
||||
+ */
|
||||
+ s->io_memory.disable_reentrancy_guard = true;
|
||||
+
|
||||
s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s);
|
||||
local_apics[s->id] = s;
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,231 @@
|
||||
From f6db359f543723e2eb840653d35004af357ea5ac Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 06/21] async: Add an optional reentrancy guard to the BH API
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/13] 009a9a68c1c25b9ad0cd9bc0d73b3e07bee2a19d (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit 9c86c97f12c060bf7484dd931f38634e166a81f0
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:07 2023 -0400
|
||||
|
||||
async: Add an optional reentrancy guard to the BH API
|
||||
|
||||
Devices can pass their MemoryReentrancyGuard (from their DeviceState),
|
||||
when creating new BHes. Then, the async API will toggle the guard
|
||||
before/after calling the BH call-back. This prevents bh->mmio reentrancy
|
||||
issues.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Message-Id: <20230427211013.2994127-3-alxndr@bu.edu>
|
||||
[thuth: Fix "line over 90 characters" checkpatch.pl error]
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
docs/devel/multiple-iothreads.txt | 7 +++++++
|
||||
include/block/aio.h | 18 ++++++++++++++++--
|
||||
include/qemu/main-loop.h | 7 +++++--
|
||||
tests/unit/ptimer-test-stubs.c | 3 ++-
|
||||
util/async.c | 18 +++++++++++++++++-
|
||||
util/main-loop.c | 6 ++++--
|
||||
util/trace-events | 1 +
|
||||
7 files changed, 52 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt
|
||||
index 343120f2ef..a3e949f6b3 100644
|
||||
--- a/docs/devel/multiple-iothreads.txt
|
||||
+++ b/docs/devel/multiple-iothreads.txt
|
||||
@@ -61,6 +61,7 @@ There are several old APIs that use the main loop AioContext:
|
||||
* LEGACY qemu_aio_set_event_notifier() - monitor an event notifier
|
||||
* LEGACY timer_new_ms() - create a timer
|
||||
* LEGACY qemu_bh_new() - create a BH
|
||||
+ * LEGACY qemu_bh_new_guarded() - create a BH with a device re-entrancy guard
|
||||
* LEGACY qemu_aio_wait() - run an event loop iteration
|
||||
|
||||
Since they implicitly work on the main loop they cannot be used in code that
|
||||
@@ -72,8 +73,14 @@ Instead, use the AioContext functions directly (see include/block/aio.h):
|
||||
* aio_set_event_notifier() - monitor an event notifier
|
||||
* aio_timer_new() - create a timer
|
||||
* aio_bh_new() - create a BH
|
||||
+ * aio_bh_new_guarded() - create a BH with a device re-entrancy guard
|
||||
* aio_poll() - run an event loop iteration
|
||||
|
||||
+The qemu_bh_new_guarded/aio_bh_new_guarded APIs accept a "MemReentrancyGuard"
|
||||
+argument, which is used to check for and prevent re-entrancy problems. For
|
||||
+BHs associated with devices, the reentrancy-guard is contained in the
|
||||
+corresponding DeviceState and named "mem_reentrancy_guard".
|
||||
+
|
||||
The AioContext can be obtained from the IOThread using
|
||||
iothread_get_aio_context() or for the main loop using qemu_get_aio_context().
|
||||
Code that takes an AioContext argument works both in IOThreads or the main
|
||||
diff --git a/include/block/aio.h b/include/block/aio.h
|
||||
index 543717f294..db6f23c619 100644
|
||||
--- a/include/block/aio.h
|
||||
+++ b/include/block/aio.h
|
||||
@@ -23,6 +23,8 @@
|
||||
#include "qemu/thread.h"
|
||||
#include "qemu/timer.h"
|
||||
#include "block/graph-lock.h"
|
||||
+#include "hw/qdev-core.h"
|
||||
+
|
||||
|
||||
typedef struct BlockAIOCB BlockAIOCB;
|
||||
typedef void BlockCompletionFunc(void *opaque, int ret);
|
||||
@@ -331,9 +333,11 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
* is opaque and must be allocated prior to its use.
|
||||
*
|
||||
* @name: A human-readable identifier for debugging purposes.
|
||||
+ * @reentrancy_guard: A guard set when entering a cb to prevent
|
||||
+ * device-reentrancy issues
|
||||
*/
|
||||
QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
- const char *name);
|
||||
+ const char *name, MemReentrancyGuard *reentrancy_guard);
|
||||
|
||||
/**
|
||||
* aio_bh_new: Allocate a new bottom half structure
|
||||
@@ -342,7 +346,17 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
* string.
|
||||
*/
|
||||
#define aio_bh_new(ctx, cb, opaque) \
|
||||
- aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)))
|
||||
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL)
|
||||
+
|
||||
+/**
|
||||
+ * aio_bh_new_guarded: Allocate a new bottom half structure with a
|
||||
+ * reentrancy_guard
|
||||
+ *
|
||||
+ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name
|
||||
+ * string.
|
||||
+ */
|
||||
+#define aio_bh_new_guarded(ctx, cb, opaque, guard) \
|
||||
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard)
|
||||
|
||||
/**
|
||||
* aio_notify: Force processing of pending events.
|
||||
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
|
||||
index b3e54e00bc..68e70e61aa 100644
|
||||
--- a/include/qemu/main-loop.h
|
||||
+++ b/include/qemu/main-loop.h
|
||||
@@ -387,9 +387,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms);
|
||||
|
||||
/* internal interfaces */
|
||||
|
||||
+#define qemu_bh_new_guarded(cb, opaque, guard) \
|
||||
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard)
|
||||
#define qemu_bh_new(cb, opaque) \
|
||||
- qemu_bh_new_full((cb), (opaque), (stringify(cb)))
|
||||
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name);
|
||||
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL)
|
||||
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
|
||||
+ MemReentrancyGuard *reentrancy_guard);
|
||||
void qemu_bh_schedule_idle(QEMUBH *bh);
|
||||
|
||||
enum {
|
||||
diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c
|
||||
index f2bfcede93..8c9407c560 100644
|
||||
--- a/tests/unit/ptimer-test-stubs.c
|
||||
+++ b/tests/unit/ptimer-test-stubs.c
|
||||
@@ -107,7 +107,8 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
|
||||
return deadline;
|
||||
}
|
||||
|
||||
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
|
||||
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
|
||||
+ MemReentrancyGuard *reentrancy_guard)
|
||||
{
|
||||
QEMUBH *bh = g_new(QEMUBH, 1);
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index 21016a1ac7..a9b528c370 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -65,6 +65,7 @@ struct QEMUBH {
|
||||
void *opaque;
|
||||
QSLIST_ENTRY(QEMUBH) next;
|
||||
unsigned flags;
|
||||
+ MemReentrancyGuard *reentrancy_guard;
|
||||
};
|
||||
|
||||
/* Called concurrently from any thread */
|
||||
@@ -137,7 +138,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb,
|
||||
}
|
||||
|
||||
QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
- const char *name)
|
||||
+ const char *name, MemReentrancyGuard *reentrancy_guard)
|
||||
{
|
||||
QEMUBH *bh;
|
||||
bh = g_new(QEMUBH, 1);
|
||||
@@ -146,13 +147,28 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
.cb = cb,
|
||||
.opaque = opaque,
|
||||
.name = name,
|
||||
+ .reentrancy_guard = reentrancy_guard,
|
||||
};
|
||||
return bh;
|
||||
}
|
||||
|
||||
void aio_bh_call(QEMUBH *bh)
|
||||
{
|
||||
+ bool last_engaged_in_io = false;
|
||||
+
|
||||
+ if (bh->reentrancy_guard) {
|
||||
+ last_engaged_in_io = bh->reentrancy_guard->engaged_in_io;
|
||||
+ if (bh->reentrancy_guard->engaged_in_io) {
|
||||
+ trace_reentrant_aio(bh->ctx, bh->name);
|
||||
+ }
|
||||
+ bh->reentrancy_guard->engaged_in_io = true;
|
||||
+ }
|
||||
+
|
||||
bh->cb(bh->opaque);
|
||||
+
|
||||
+ if (bh->reentrancy_guard) {
|
||||
+ bh->reentrancy_guard->engaged_in_io = last_engaged_in_io;
|
||||
+ }
|
||||
}
|
||||
|
||||
/* Multiple occurrences of aio_bh_poll cannot be called concurrently. */
|
||||
diff --git a/util/main-loop.c b/util/main-loop.c
|
||||
index e180c85145..7022f02ef8 100644
|
||||
--- a/util/main-loop.c
|
||||
+++ b/util/main-loop.c
|
||||
@@ -605,9 +605,11 @@ void main_loop_wait(int nonblocking)
|
||||
|
||||
/* Functions to operate on the main QEMU AioContext. */
|
||||
|
||||
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
|
||||
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
|
||||
+ MemReentrancyGuard *reentrancy_guard)
|
||||
{
|
||||
- return aio_bh_new_full(qemu_aio_context, cb, opaque, name);
|
||||
+ return aio_bh_new_full(qemu_aio_context, cb, opaque, name,
|
||||
+ reentrancy_guard);
|
||||
}
|
||||
|
||||
/*
|
||||
diff --git a/util/trace-events b/util/trace-events
|
||||
index 16f78d8fe5..3f7e766683 100644
|
||||
--- a/util/trace-events
|
||||
+++ b/util/trace-events
|
||||
@@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d"
|
||||
# async.c
|
||||
aio_co_schedule(void *ctx, void *co) "ctx %p co %p"
|
||||
aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p"
|
||||
+reentrant_aio(void *ctx, const char *name) "ctx %p name %s"
|
||||
|
||||
# thread-pool.c
|
||||
thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,70 @@
|
||||
From 137e84f68da06666ebf7f391766cc6209ce1c39c Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 13/21] async: avoid use-after-free on re-entrancy guard
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [9/13] d4b957108aaacf4a597122aaeeaa8e56985f1fca (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit 7915bd06f25e1803778081161bf6fa10c42dc7cd
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Mon May 1 10:19:56 2023 -0400
|
||||
|
||||
async: avoid use-after-free on re-entrancy guard
|
||||
|
||||
A BH callback can free the BH, causing a use-after-free in aio_bh_call.
|
||||
Fix that by keeping a local copy of the re-entrancy guard pointer.
|
||||
|
||||
Buglink: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58513
|
||||
Fixes: 9c86c97f12 ("async: Add an optional reentrancy guard to the BH API")
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Message-Id: <20230501141956.3444868-1-alxndr@bu.edu>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
util/async.c | 14 ++++++++------
|
||||
1 file changed, 8 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index a9b528c370..cd1a1815f9 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -156,18 +156,20 @@ void aio_bh_call(QEMUBH *bh)
|
||||
{
|
||||
bool last_engaged_in_io = false;
|
||||
|
||||
- if (bh->reentrancy_guard) {
|
||||
- last_engaged_in_io = bh->reentrancy_guard->engaged_in_io;
|
||||
- if (bh->reentrancy_guard->engaged_in_io) {
|
||||
+ /* Make a copy of the guard-pointer as cb may free the bh */
|
||||
+ MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard;
|
||||
+ if (reentrancy_guard) {
|
||||
+ last_engaged_in_io = reentrancy_guard->engaged_in_io;
|
||||
+ if (reentrancy_guard->engaged_in_io) {
|
||||
trace_reentrant_aio(bh->ctx, bh->name);
|
||||
}
|
||||
- bh->reentrancy_guard->engaged_in_io = true;
|
||||
+ reentrancy_guard->engaged_in_io = true;
|
||||
}
|
||||
|
||||
bh->cb(bh->opaque);
|
||||
|
||||
- if (bh->reentrancy_guard) {
|
||||
- bh->reentrancy_guard->engaged_in_io = last_engaged_in_io;
|
||||
+ if (reentrancy_guard) {
|
||||
+ reentrancy_guard->engaged_in_io = last_engaged_in_io;
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,66 +0,0 @@
|
||||
From 3d823dda6832b76fd3d776131008107b0b0f7166 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:24:36 -0500
|
||||
Subject: [PATCH 12/12] async: clarify usage of barriers in the polling case
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2175660
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [9/9] b4ea298d75a75bb61e07a27d1296e0095fbc2bbf (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
|
||||
|
||||
commit 6229438cca037d42f44a96d38feb15cb102a444f
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Mon Mar 6 10:43:52 2023 +0100
|
||||
|
||||
async: clarify usage of barriers in the polling case
|
||||
|
||||
Explain that aio_context_notifier_poll() relies on
|
||||
aio_notify_accept() to catch all the memory writes that were
|
||||
done before ctx->notified was set to true.
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
util/async.c | 10 ++++++++--
|
||||
1 file changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index 37d3e6036d..e0846baf93 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -472,8 +472,9 @@ void aio_notify_accept(AioContext *ctx)
|
||||
qatomic_set(&ctx->notified, false);
|
||||
|
||||
/*
|
||||
- * Write ctx->notified before reading e.g. bh->flags. Pairs with smp_wmb
|
||||
- * in aio_notify.
|
||||
+ * Order reads of ctx->notified (in aio_context_notifier_poll()) and the
|
||||
+ * above clearing of ctx->notified before reads of e.g. bh->flags. Pairs
|
||||
+ * with smp_wmb() in aio_notify.
|
||||
*/
|
||||
smp_mb();
|
||||
}
|
||||
@@ -496,6 +497,11 @@ static bool aio_context_notifier_poll(void *opaque)
|
||||
EventNotifier *e = opaque;
|
||||
AioContext *ctx = container_of(e, AioContext, notifier);
|
||||
|
||||
+ /*
|
||||
+ * No need for load-acquire because we just want to kick the
|
||||
+ * event loop. aio_notify_accept() takes care of synchronizing
|
||||
+ * the event loop with the producers.
|
||||
+ */
|
||||
return qatomic_read(&ctx->notified);
|
||||
}
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,111 +0,0 @@
|
||||
From 29bcf843d796ffc2a0906dea947e4cdfe9f7ec60 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:24:36 -0500
|
||||
Subject: [PATCH 11/12] async: update documentation of the memory barriers
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2175660
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [8/9] 5ca20e4c8983e0bc1ecee66bead3472777abe4d1 (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
|
||||
|
||||
commit 8dd48650b43dfde4ebea34191ac267e474bcc29e
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Mon Mar 6 10:15:06 2023 +0100
|
||||
|
||||
async: update documentation of the memory barriers
|
||||
|
||||
Ever since commit 8c6b0356b539 ("util/async: make bh_aio_poll() O(1)",
|
||||
2020-02-22), synchronization between qemu_bh_schedule() and aio_bh_poll()
|
||||
is happening when the bottom half is enqueued in the bh_list; not
|
||||
when the flags are set. Update the documentation to match.
|
||||
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
util/async.c | 33 +++++++++++++++++++--------------
|
||||
1 file changed, 19 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index 63434ddae4..37d3e6036d 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -73,14 +73,21 @@ static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags)
|
||||
unsigned old_flags;
|
||||
|
||||
/*
|
||||
- * The memory barrier implicit in qatomic_fetch_or makes sure that:
|
||||
- * 1. idle & any writes needed by the callback are done before the
|
||||
- * locations are read in the aio_bh_poll.
|
||||
- * 2. ctx is loaded before the callback has a chance to execute and bh
|
||||
- * could be freed.
|
||||
+ * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that
|
||||
+ * insertion starts after BH_PENDING is set.
|
||||
*/
|
||||
old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags);
|
||||
+
|
||||
if (!(old_flags & BH_PENDING)) {
|
||||
+ /*
|
||||
+ * At this point the bottom half becomes visible to aio_bh_poll().
|
||||
+ * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in
|
||||
+ * aio_bh_poll(), ensuring that:
|
||||
+ * 1. any writes needed by the callback are visible from the callback
|
||||
+ * after aio_bh_dequeue() returns bh.
|
||||
+ * 2. ctx is loaded before the callback has a chance to execute and bh
|
||||
+ * could be freed.
|
||||
+ */
|
||||
QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next);
|
||||
}
|
||||
|
||||
@@ -106,11 +113,8 @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
|
||||
QSLIST_REMOVE_HEAD(head, next);
|
||||
|
||||
/*
|
||||
- * The qatomic_and is paired with aio_bh_enqueue(). The implicit memory
|
||||
- * barrier ensures that the callback sees all writes done by the scheduling
|
||||
- * thread. It also ensures that the scheduling thread sees the cleared
|
||||
- * flag before bh->cb has run, and thus will call aio_notify again if
|
||||
- * necessary.
|
||||
+ * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that
|
||||
+ * the removal finishes before BH_PENDING is reset.
|
||||
*/
|
||||
*flags = qatomic_fetch_and(&bh->flags,
|
||||
~(BH_PENDING | BH_SCHEDULED | BH_IDLE));
|
||||
@@ -157,6 +161,7 @@ int aio_bh_poll(AioContext *ctx)
|
||||
BHListSlice *s;
|
||||
int ret = 0;
|
||||
|
||||
+ /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */
|
||||
QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list);
|
||||
QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next);
|
||||
|
||||
@@ -446,15 +451,15 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx)
|
||||
void aio_notify(AioContext *ctx)
|
||||
{
|
||||
/*
|
||||
- * Write e.g. bh->flags before writing ctx->notified. Pairs with smp_mb in
|
||||
- * aio_notify_accept.
|
||||
+ * Write e.g. ctx->bh_list before writing ctx->notified. Pairs with
|
||||
+ * smp_mb() in aio_notify_accept().
|
||||
*/
|
||||
smp_wmb();
|
||||
qatomic_set(&ctx->notified, true);
|
||||
|
||||
/*
|
||||
- * Write ctx->notified before reading ctx->notify_me. Pairs
|
||||
- * with smp_mb in aio_ctx_prepare or aio_poll.
|
||||
+ * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me.
|
||||
+ * Pairs with smp_mb() in aio_ctx_prepare or aio_poll.
|
||||
*/
|
||||
smp_mb();
|
||||
if (qatomic_read(&ctx->notify_me)) {
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,57 @@
|
||||
From 40866640d15e6a8c9f6af7e437edc1ec1e17ba34 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 10/21] bcm2835_property: disable reentrancy detection for
|
||||
iomem
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [6/13] 128ebc85e228674af66553af82fba70eb87960e6 (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit 985c4a4e547afb9573b6bd6843d20eb2c3d1d1cd
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:11 2023 -0400
|
||||
|
||||
bcm2835_property: disable reentrancy detection for iomem
|
||||
|
||||
As the code is designed for re-entrant calls from bcm2835_property to
|
||||
bcm2835_mbox and back into bcm2835_property, mark iomem as
|
||||
reentrancy-safe.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Message-Id: <20230427211013.2994127-7-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/misc/bcm2835_property.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c
|
||||
index 890ae7bae5..de056ea2df 100644
|
||||
--- a/hw/misc/bcm2835_property.c
|
||||
+++ b/hw/misc/bcm2835_property.c
|
||||
@@ -382,6 +382,13 @@ static void bcm2835_property_init(Object *obj)
|
||||
|
||||
memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_property_ops, s,
|
||||
TYPE_BCM2835_PROPERTY, 0x10);
|
||||
+
|
||||
+ /*
|
||||
+ * bcm2835_property_ops call into bcm2835_mbox, which in-turn reads from
|
||||
+ * iomem. As such, mark iomem as re-entracy safe.
|
||||
+ */
|
||||
+ s->iomem.disable_reentrancy_guard = true;
|
||||
+
|
||||
sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
|
||||
sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq);
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,250 +0,0 @@
|
||||
From 9bb9cafd736057fd2a8ebfa6f5769668f125fbe6 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:06 +0100
|
||||
Subject: [PATCH 24/31] block: Call drain callbacks only once
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [12/16] ea9a433dc01d1b8539a2d4ea12887f2a3ce830ea (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
We only need to call both the BlockDriver's callback and the parent
|
||||
callbacks when going from undrained to drained or vice versa. A second
|
||||
drain section doesn't make a difference for the driver or the parent,
|
||||
they weren't supposed to send new requests before and after the second
|
||||
drain.
|
||||
|
||||
One thing that gets in the way is the 'ignore_bds_parents' parameter in
|
||||
bdrv_do_drained_begin_quiesce() and bdrv_do_drained_end(): It means that
|
||||
bdrv_drain_all_begin() increases bs->quiesce_counter, but does not
|
||||
quiesce the parent through BdrvChildClass callbacks. If an additional
|
||||
drain section is started now, bs->quiesce_counter will be non-zero, but
|
||||
we would still need to quiesce the parent through BdrvChildClass in
|
||||
order to keep things consistent (and unquiesce it on the matching
|
||||
bdrv_drained_end(), even though the counter would not reach 0 yet as
|
||||
long as the bdrv_drain_all() section is still active).
|
||||
|
||||
Instead of keeping track of this, let's just get rid of the parameter.
|
||||
It was introduced in commit 6cd5c9d7b2d as an optimisation so that
|
||||
during bdrv_drain_all(), we wouldn't recursively drain all parents up to
|
||||
the root for each node, resulting in quadratic complexity. As it happens,
|
||||
calling the callbacks only once solves the same problem, so as of this
|
||||
patch, we'll still have O(n) complexity and ignore_bds_parents is not
|
||||
needed any more.
|
||||
|
||||
This patch only ignores the 'ignore_bds_parents' parameter. It will be
|
||||
removed in a separate patch.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20221118174110.55183-12-kwolf@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 57e05be343f33f4e5899a8d8946a8596d68424a1)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 25 +++++++------------------
|
||||
block/io.c | 30 ++++++++++++++++++------------
|
||||
include/block/block_int-common.h | 8 ++++----
|
||||
tests/unit/test-bdrv-drain.c | 16 ++++++++++------
|
||||
4 files changed, 39 insertions(+), 40 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index e0e3b21790..5a583e260d 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -2824,7 +2824,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
{
|
||||
BlockDriverState *old_bs = child->bs;
|
||||
int new_bs_quiesce_counter;
|
||||
- int drain_saldo;
|
||||
|
||||
assert(!child->frozen);
|
||||
assert(old_bs != new_bs);
|
||||
@@ -2834,16 +2833,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
|
||||
}
|
||||
|
||||
- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
|
||||
- drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;
|
||||
-
|
||||
/*
|
||||
* If the new child node is drained but the old one was not, flush
|
||||
* all outstanding requests to the old child node.
|
||||
*/
|
||||
- while (drain_saldo > 0 && child->klass->drained_begin) {
|
||||
+ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
|
||||
+ if (new_bs_quiesce_counter && !child->quiesced_parent) {
|
||||
bdrv_parent_drained_begin_single(child, true);
|
||||
- drain_saldo--;
|
||||
}
|
||||
|
||||
if (old_bs) {
|
||||
@@ -2859,16 +2855,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
if (new_bs) {
|
||||
assert_bdrv_graph_writable(new_bs);
|
||||
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
|
||||
-
|
||||
- /*
|
||||
- * Polling in bdrv_parent_drained_begin_single() may have led to the new
|
||||
- * node's quiesce_counter having been decreased. Not a problem, we just
|
||||
- * need to recognize this here and then invoke drained_end appropriately
|
||||
- * more often.
|
||||
- */
|
||||
- assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
|
||||
- drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
|
||||
-
|
||||
if (child->klass->attach) {
|
||||
child->klass->attach(child);
|
||||
}
|
||||
@@ -2877,10 +2863,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
/*
|
||||
* If the old child node was drained but the new one is not, allow
|
||||
* requests to come in only after the new node has been attached.
|
||||
+ *
|
||||
+ * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single()
|
||||
+ * polls, which could have changed the value.
|
||||
*/
|
||||
- while (drain_saldo < 0 && child->klass->drained_end) {
|
||||
+ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
|
||||
+ if (!new_bs_quiesce_counter && child->quiesced_parent) {
|
||||
bdrv_parent_drained_end_single(child);
|
||||
- drain_saldo++;
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 75224480d0..87d6f22ec4 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -62,8 +62,9 @@ void bdrv_parent_drained_end_single(BdrvChild *c)
|
||||
{
|
||||
IO_OR_GS_CODE();
|
||||
|
||||
- assert(c->parent_quiesce_counter > 0);
|
||||
- c->parent_quiesce_counter--;
|
||||
+ assert(c->quiesced_parent);
|
||||
+ c->quiesced_parent = false;
|
||||
+
|
||||
if (c->klass->drained_end) {
|
||||
c->klass->drained_end(c);
|
||||
}
|
||||
@@ -110,7 +111,10 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
|
||||
{
|
||||
AioContext *ctx = bdrv_child_get_parent_aio_context(c);
|
||||
IO_OR_GS_CODE();
|
||||
- c->parent_quiesce_counter++;
|
||||
+
|
||||
+ assert(!c->quiesced_parent);
|
||||
+ c->quiesced_parent = true;
|
||||
+
|
||||
if (c->klass->drained_begin) {
|
||||
c->klass->drained_begin(c);
|
||||
}
|
||||
@@ -358,11 +362,12 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|
||||
/* Stop things in parent-to-child order */
|
||||
if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
|
||||
aio_disable_external(bdrv_get_aio_context(bs));
|
||||
- }
|
||||
|
||||
- bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
|
||||
- if (bs->drv && bs->drv->bdrv_drain_begin) {
|
||||
- bs->drv->bdrv_drain_begin(bs);
|
||||
+ /* TODO Remove ignore_bds_parents, we don't consider it any more */
|
||||
+ bdrv_parent_drained_begin(bs, parent, false);
|
||||
+ if (bs->drv && bs->drv->bdrv_drain_begin) {
|
||||
+ bs->drv->bdrv_drain_begin(bs);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -413,13 +418,14 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
|
||||
assert(bs->quiesce_counter > 0);
|
||||
|
||||
/* Re-enable things in child-to-parent order */
|
||||
- if (bs->drv && bs->drv->bdrv_drain_end) {
|
||||
- bs->drv->bdrv_drain_end(bs);
|
||||
- }
|
||||
- bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
|
||||
-
|
||||
old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
|
||||
if (old_quiesce_counter == 1) {
|
||||
+ if (bs->drv && bs->drv->bdrv_drain_end) {
|
||||
+ bs->drv->bdrv_drain_end(bs);
|
||||
+ }
|
||||
+ /* TODO Remove ignore_bds_parents, we don't consider it any more */
|
||||
+ bdrv_parent_drained_end(bs, parent, false);
|
||||
+
|
||||
aio_enable_external(bdrv_get_aio_context(bs));
|
||||
}
|
||||
}
|
||||
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
|
||||
index 791dddfd7d..a6bc6b7fe9 100644
|
||||
--- a/include/block/block_int-common.h
|
||||
+++ b/include/block/block_int-common.h
|
||||
@@ -980,13 +980,13 @@ struct BdrvChild {
|
||||
bool frozen;
|
||||
|
||||
/*
|
||||
- * How many times the parent of this child has been drained
|
||||
+ * True if the parent of this child has been drained by this BdrvChild
|
||||
* (through klass->drained_*).
|
||||
- * Usually, this is equal to bs->quiesce_counter (potentially
|
||||
- * reduced by bdrv_drain_all_count). It may differ while the
|
||||
+ *
|
||||
+ * It is generally true if bs->quiesce_counter > 0. It may differ while the
|
||||
* child is entering or leaving a drained section.
|
||||
*/
|
||||
- int parent_quiesce_counter;
|
||||
+ bool quiesced_parent;
|
||||
|
||||
QLIST_ENTRY(BdrvChild) next;
|
||||
QLIST_ENTRY(BdrvChild) next_parent;
|
||||
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
|
||||
index dda08de8db..172bc6debc 100644
|
||||
--- a/tests/unit/test-bdrv-drain.c
|
||||
+++ b/tests/unit/test-bdrv-drain.c
|
||||
@@ -296,7 +296,11 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive)
|
||||
|
||||
do_drain_begin(drain_type, bs);
|
||||
|
||||
- g_assert_cmpint(bs->quiesce_counter, ==, 1);
|
||||
+ if (drain_type == BDRV_DRAIN_ALL) {
|
||||
+ g_assert_cmpint(bs->quiesce_counter, ==, 2);
|
||||
+ } else {
|
||||
+ g_assert_cmpint(bs->quiesce_counter, ==, 1);
|
||||
+ }
|
||||
g_assert_cmpint(backing->quiesce_counter, ==, !!recursive);
|
||||
|
||||
do_drain_end(drain_type, bs);
|
||||
@@ -348,8 +352,8 @@ static void test_nested(void)
|
||||
|
||||
for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) {
|
||||
for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) {
|
||||
- int backing_quiesce = (outer != BDRV_DRAIN) +
|
||||
- (inner != BDRV_DRAIN);
|
||||
+ int backing_quiesce = (outer == BDRV_DRAIN_ALL) +
|
||||
+ (inner == BDRV_DRAIN_ALL);
|
||||
|
||||
g_assert_cmpint(bs->quiesce_counter, ==, 0);
|
||||
g_assert_cmpint(backing->quiesce_counter, ==, 0);
|
||||
@@ -359,10 +363,10 @@ static void test_nested(void)
|
||||
do_drain_begin(outer, bs);
|
||||
do_drain_begin(inner, bs);
|
||||
|
||||
- g_assert_cmpint(bs->quiesce_counter, ==, 2);
|
||||
+ g_assert_cmpint(bs->quiesce_counter, ==, 2 + !!backing_quiesce);
|
||||
g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce);
|
||||
- g_assert_cmpint(s->drain_count, ==, 2);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce);
|
||||
+ g_assert_cmpint(s->drain_count, ==, 1);
|
||||
+ g_assert_cmpint(backing_s->drain_count, ==, !!backing_quiesce);
|
||||
|
||||
do_drain_end(inner, bs);
|
||||
do_drain_end(outer, bs);
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,354 @@
|
||||
From ff05c0b0d3414c0e5b3903048280accdc6c75ca0 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Czenczek <hreitz@redhat.com>
|
||||
Date: Tue, 11 Apr 2023 19:34:16 +0200
|
||||
Subject: [PATCH 2/9] block: Collapse padded I/O vecs exceeding IOV_MAX
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX
|
||||
RH-Bugzilla: 2174676
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/5] 84c56bd16f841a18cf2baa918dfeab3240e3944d (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
When processing vectored guest requests that are not aligned to the
|
||||
storage request alignment, we pad them by adding head and/or tail
|
||||
buffers for a read-modify-write cycle.
|
||||
|
||||
The guest can submit I/O vectors up to IOV_MAX (1024) in length, but
|
||||
with this padding, the vector can exceed that limit. As of
|
||||
4c002cef0e9abe7135d7916c51abce47f7fc1ee2 ("util/iov: make
|
||||
qemu_iovec_init_extended() honest"), we refuse to pad vectors beyond the
|
||||
limit, instead returning an error to the guest.
|
||||
|
||||
To the guest, this appears as a random I/O error. We should not return
|
||||
an I/O error to the guest when it issued a perfectly valid request.
|
||||
|
||||
Before 4c002cef0e9abe7135d7916c51abce47f7fc1ee2, we just made the vector
|
||||
longer than IOV_MAX, which generally seems to work (because the guest
|
||||
assumes a smaller alignment than we really have, file-posix's
|
||||
raw_co_prw() will generally see bdrv_qiov_is_aligned() return false, and
|
||||
so emulate the request, so that the IOV_MAX does not matter). However,
|
||||
that does not seem exactly great.
|
||||
|
||||
I see two ways to fix this problem:
|
||||
1. We split such long requests into two requests.
|
||||
2. We join some elements of the vector into new buffers to make it
|
||||
shorter.
|
||||
|
||||
I am wary of (1), because it seems like it may have unintended side
|
||||
effects.
|
||||
|
||||
(2) on the other hand seems relatively simple to implement, with
|
||||
hopefully few side effects, so this patch does that.
|
||||
|
||||
To do this, the use of qemu_iovec_init_extended() in bdrv_pad_request()
|
||||
is effectively replaced by the new function bdrv_create_padded_qiov(),
|
||||
which not only wraps the request IOV with padding head/tail, but also
|
||||
ensures that the resulting vector will not have more than IOV_MAX
|
||||
elements. Putting that functionality into qemu_iovec_init_extended() is
|
||||
infeasible because it requires allocating a bounce buffer; doing so
|
||||
would require many more parameters (buffer alignment, how to initialize
|
||||
the buffer, and out parameters like the buffer, its length, and the
|
||||
original elements), which is not reasonable.
|
||||
|
||||
Conversely, it is not difficult to move qemu_iovec_init_extended()'s
|
||||
functionality into bdrv_create_padded_qiov() by using public
|
||||
qemu_iovec_* functions, so that is what this patch does.
|
||||
|
||||
Because bdrv_pad_request() was the only "serious" user of
|
||||
qemu_iovec_init_extended(), the next patch will remove the latter
|
||||
function, so the functionality is not implemented twice.
|
||||
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2141964
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Message-Id: <20230411173418.19549-3-hreitz@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
(cherry picked from commit 18743311b829cafc1737a5f20bc3248d5f91ee2a)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/io.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++-----
|
||||
1 file changed, 151 insertions(+), 15 deletions(-)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 2e267a85ab..4e8e90208b 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -1439,6 +1439,14 @@ out:
|
||||
* @merge_reads is true for small requests,
|
||||
* if @buf_len == @head + bytes + @tail. In this case it is possible that both
|
||||
* head and tail exist but @buf_len == align and @tail_buf == @buf.
|
||||
+ *
|
||||
+ * @write is true for write requests, false for read requests.
|
||||
+ *
|
||||
+ * If padding makes the vector too long (exceeding IOV_MAX), then we need to
|
||||
+ * merge existing vector elements into a single one. @collapse_bounce_buf acts
|
||||
+ * as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse
|
||||
+ * I/O vector elements so for read requests, the data can be copied back after
|
||||
+ * the read is done.
|
||||
*/
|
||||
typedef struct BdrvRequestPadding {
|
||||
uint8_t *buf;
|
||||
@@ -1447,11 +1455,17 @@ typedef struct BdrvRequestPadding {
|
||||
size_t head;
|
||||
size_t tail;
|
||||
bool merge_reads;
|
||||
+ bool write;
|
||||
QEMUIOVector local_qiov;
|
||||
+
|
||||
+ uint8_t *collapse_bounce_buf;
|
||||
+ size_t collapse_len;
|
||||
+ QEMUIOVector pre_collapse_qiov;
|
||||
} BdrvRequestPadding;
|
||||
|
||||
static bool bdrv_init_padding(BlockDriverState *bs,
|
||||
int64_t offset, int64_t bytes,
|
||||
+ bool write,
|
||||
BdrvRequestPadding *pad)
|
||||
{
|
||||
int64_t align = bs->bl.request_alignment;
|
||||
@@ -1483,6 +1497,8 @@ static bool bdrv_init_padding(BlockDriverState *bs,
|
||||
pad->tail_buf = pad->buf + pad->buf_len - align;
|
||||
}
|
||||
|
||||
+ pad->write = write;
|
||||
+
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1547,8 +1563,23 @@ zero_mem:
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
+/**
|
||||
+ * Free *pad's associated buffers, and perform any necessary finalization steps.
|
||||
+ */
|
||||
+static void bdrv_padding_finalize(BdrvRequestPadding *pad)
|
||||
{
|
||||
+ if (pad->collapse_bounce_buf) {
|
||||
+ if (!pad->write) {
|
||||
+ /*
|
||||
+ * If padding required elements in the vector to be collapsed into a
|
||||
+ * bounce buffer, copy the bounce buffer content back
|
||||
+ */
|
||||
+ qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0,
|
||||
+ pad->collapse_bounce_buf, pad->collapse_len);
|
||||
+ }
|
||||
+ qemu_vfree(pad->collapse_bounce_buf);
|
||||
+ qemu_iovec_destroy(&pad->pre_collapse_qiov);
|
||||
+ }
|
||||
if (pad->buf) {
|
||||
qemu_vfree(pad->buf);
|
||||
qemu_iovec_destroy(&pad->local_qiov);
|
||||
@@ -1556,6 +1587,101 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
memset(pad, 0, sizeof(*pad));
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while
|
||||
+ * ensuring that the resulting vector will not exceed IOV_MAX elements.
|
||||
+ *
|
||||
+ * To ensure this, when necessary, the first two or three elements of @iov are
|
||||
+ * merged into pad->collapse_bounce_buf and replaced by a reference to that
|
||||
+ * bounce buffer in pad->local_qiov.
|
||||
+ *
|
||||
+ * After performing a read request, the data from the bounce buffer must be
|
||||
+ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()).
|
||||
+ */
|
||||
+static int bdrv_create_padded_qiov(BlockDriverState *bs,
|
||||
+ BdrvRequestPadding *pad,
|
||||
+ struct iovec *iov, int niov,
|
||||
+ size_t iov_offset, size_t bytes)
|
||||
+{
|
||||
+ int padded_niov, surplus_count, collapse_count;
|
||||
+
|
||||
+ /* Assert this invariant */
|
||||
+ assert(niov <= IOV_MAX);
|
||||
+
|
||||
+ /*
|
||||
+ * Cannot pad if resulting length would exceed SIZE_MAX. Returning an error
|
||||
+ * to the guest is not ideal, but there is little else we can do. At least
|
||||
+ * this will practically never happen on 64-bit systems.
|
||||
+ */
|
||||
+ if (SIZE_MAX - pad->head < bytes ||
|
||||
+ SIZE_MAX - pad->head - bytes < pad->tail)
|
||||
+ {
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ /* Length of the resulting IOV if we just concatenated everything */
|
||||
+ padded_niov = !!pad->head + niov + !!pad->tail;
|
||||
+
|
||||
+ qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX));
|
||||
+
|
||||
+ if (pad->head) {
|
||||
+ qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head);
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * If padded_niov > IOV_MAX, we cannot just concatenate everything.
|
||||
+ * Instead, merge the first two or three elements of @iov to reduce the
|
||||
+ * number of vector elements as necessary.
|
||||
+ */
|
||||
+ if (padded_niov > IOV_MAX) {
|
||||
+ /*
|
||||
+ * Only head and tail can have lead to the number of entries exceeding
|
||||
+ * IOV_MAX, so we can exceed it by the head and tail at most. We need
|
||||
+ * to reduce the number of elements by `surplus_count`, so we merge that
|
||||
+ * many elements plus one into one element.
|
||||
+ */
|
||||
+ surplus_count = padded_niov - IOV_MAX;
|
||||
+ assert(surplus_count <= !!pad->head + !!pad->tail);
|
||||
+ collapse_count = surplus_count + 1;
|
||||
+
|
||||
+ /*
|
||||
+ * Move the elements to collapse into `pad->pre_collapse_qiov`, then
|
||||
+ * advance `iov` (and associated variables) by those elements.
|
||||
+ */
|
||||
+ qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count);
|
||||
+ qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov,
|
||||
+ collapse_count, iov_offset, SIZE_MAX);
|
||||
+ iov += collapse_count;
|
||||
+ iov_offset = 0;
|
||||
+ niov -= collapse_count;
|
||||
+ bytes -= pad->pre_collapse_qiov.size;
|
||||
+
|
||||
+ /*
|
||||
+ * Construct the bounce buffer to match the length of the to-collapse
|
||||
+ * vector elements, and for write requests, initialize it with the data
|
||||
+ * from those elements. Then add it to `pad->local_qiov`.
|
||||
+ */
|
||||
+ pad->collapse_len = pad->pre_collapse_qiov.size;
|
||||
+ pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len);
|
||||
+ if (pad->write) {
|
||||
+ qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0,
|
||||
+ pad->collapse_bounce_buf, pad->collapse_len);
|
||||
+ }
|
||||
+ qemu_iovec_add(&pad->local_qiov,
|
||||
+ pad->collapse_bounce_buf, pad->collapse_len);
|
||||
+ }
|
||||
+
|
||||
+ qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes);
|
||||
+
|
||||
+ if (pad->tail) {
|
||||
+ qemu_iovec_add(&pad->local_qiov,
|
||||
+ pad->buf + pad->buf_len - pad->tail, pad->tail);
|
||||
+ }
|
||||
+
|
||||
+ assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX));
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* bdrv_pad_request
|
||||
*
|
||||
@@ -1563,6 +1689,8 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
* read of padding, bdrv_padding_rmw_read() should be called separately if
|
||||
* needed.
|
||||
*
|
||||
+ * @write is true for write requests, false for read requests.
|
||||
+ *
|
||||
* Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out:
|
||||
* - on function start they represent original request
|
||||
* - on failure or when padding is not needed they are unchanged
|
||||
@@ -1571,26 +1699,34 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
static int bdrv_pad_request(BlockDriverState *bs,
|
||||
QEMUIOVector **qiov, size_t *qiov_offset,
|
||||
int64_t *offset, int64_t *bytes,
|
||||
+ bool write,
|
||||
BdrvRequestPadding *pad, bool *padded,
|
||||
BdrvRequestFlags *flags)
|
||||
{
|
||||
int ret;
|
||||
+ struct iovec *sliced_iov;
|
||||
+ int sliced_niov;
|
||||
+ size_t sliced_head, sliced_tail;
|
||||
|
||||
bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
|
||||
|
||||
- if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
|
||||
+ if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
|
||||
if (padded) {
|
||||
*padded = false;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
- ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
|
||||
- *qiov, *qiov_offset, *bytes,
|
||||
- pad->buf + pad->buf_len - pad->tail,
|
||||
- pad->tail);
|
||||
+ sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
|
||||
+ &sliced_head, &sliced_tail,
|
||||
+ &sliced_niov);
|
||||
+
|
||||
+ /* Guaranteed by bdrv_check_qiov_request() */
|
||||
+ assert(*bytes <= SIZE_MAX);
|
||||
+ ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
|
||||
+ sliced_head, *bytes);
|
||||
if (ret < 0) {
|
||||
- bdrv_padding_destroy(pad);
|
||||
+ bdrv_padding_finalize(pad);
|
||||
return ret;
|
||||
}
|
||||
*bytes += pad->head + pad->tail;
|
||||
@@ -1657,8 +1793,8 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
|
||||
flags |= BDRV_REQ_COPY_ON_READ;
|
||||
}
|
||||
|
||||
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
|
||||
- NULL, &flags);
|
||||
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false,
|
||||
+ &pad, NULL, &flags);
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
@@ -1668,7 +1804,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
|
||||
bs->bl.request_alignment,
|
||||
qiov, qiov_offset, flags);
|
||||
tracked_request_end(&req);
|
||||
- bdrv_padding_destroy(&pad);
|
||||
+ bdrv_padding_finalize(&pad);
|
||||
|
||||
fail:
|
||||
bdrv_dec_in_flight(bs);
|
||||
@@ -2000,7 +2136,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
|
||||
/* This flag doesn't make sense for padding or zero writes */
|
||||
flags &= ~BDRV_REQ_REGISTERED_BUF;
|
||||
|
||||
- padding = bdrv_init_padding(bs, offset, bytes, &pad);
|
||||
+ padding = bdrv_init_padding(bs, offset, bytes, true, &pad);
|
||||
if (padding) {
|
||||
assert(!(flags & BDRV_REQ_NO_WAIT));
|
||||
bdrv_make_request_serialising(req, align);
|
||||
@@ -2048,7 +2184,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
|
||||
}
|
||||
|
||||
out:
|
||||
- bdrv_padding_destroy(&pad);
|
||||
+ bdrv_padding_finalize(&pad);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -2116,8 +2252,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
|
||||
* bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
|
||||
* alignment only if there is no ZERO flag.
|
||||
*/
|
||||
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
|
||||
- &padded, &flags);
|
||||
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true,
|
||||
+ &pad, &padded, &flags);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
@@ -2147,7 +2283,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
|
||||
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
|
||||
qiov, qiov_offset, flags);
|
||||
|
||||
- bdrv_padding_destroy(&pad);
|
||||
+ bdrv_padding_finalize(&pad);
|
||||
|
||||
out:
|
||||
tracked_request_end(&req);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,56 @@
|
||||
From dfa2811e88afaf996345552330e97f0513c1803c Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 4 May 2023 13:57:34 +0200
|
||||
Subject: [PATCH 53/56] block: Don't call no_coroutine_fns in
|
||||
qmp_block_resize()
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize()
|
||||
RH-Bugzilla: 2185688
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [2/4] 7ac7e34821cfc8bd5f0daadd7a1c4a5596bc60a6 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
This QMP handler runs in a coroutine, so it must use the corresponding
|
||||
no_co_wrappers instead.
|
||||
|
||||
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2185688
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20230504115750.54437-5-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 0c7d204f50c382c6baac8c94bd57af4a022b3888)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
blockdev.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/blockdev.c b/blockdev.c
|
||||
index d7b5c18f0a..eb509cf964 100644
|
||||
--- a/blockdev.c
|
||||
+++ b/blockdev.c
|
||||
@@ -2430,7 +2430,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
|
||||
return;
|
||||
}
|
||||
|
||||
- blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
|
||||
+ blk = blk_co_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
|
||||
if (!blk) {
|
||||
return;
|
||||
}
|
||||
@@ -2445,7 +2445,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
|
||||
|
||||
bdrv_co_lock(bs);
|
||||
bdrv_drained_end(bs);
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
bdrv_co_unlock(bs);
|
||||
}
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,298 +0,0 @@
|
||||
From 150ef3356cc6732fede7ca059168fc0565ed0b76 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:09 +0100
|
||||
Subject: [PATCH 27/31] block: Don't poll in bdrv_replace_child_noperm()
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [15/16] 5fc7d6b703a2d6c1118d875056f0afbd6ba5cca9 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
In order to make sure that bdrv_replace_child_noperm() doesn't have to
|
||||
poll any more, get rid of the bdrv_parent_drained_begin_single() call.
|
||||
|
||||
This is possible now because we can require that the parent is already
|
||||
drained through the child in question when the function is called and we
|
||||
don't call the parent drain callbacks more than once.
|
||||
|
||||
The additional drain calls needed in callers cause the test case to run
|
||||
its code in the drain handler too early (bdrv_attach_child() drains
|
||||
now), so modify it to only enable the code after the test setup has
|
||||
completed.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20221118174110.55183-15-kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 23987471285a26397e3152a9244b652445fd36c4)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 103 ++++++++++++++++++++++++++++++-----
|
||||
block/io.c | 2 +-
|
||||
include/block/block-io.h | 8 +++
|
||||
tests/unit/test-bdrv-drain.c | 10 ++++
|
||||
4 files changed, 108 insertions(+), 15 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index af31a94863..65588d313a 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -2407,6 +2407,20 @@ static void bdrv_replace_child_abort(void *opaque)
|
||||
|
||||
GLOBAL_STATE_CODE();
|
||||
/* old_bs reference is transparently moved from @s to @s->child */
|
||||
+ if (!s->child->bs) {
|
||||
+ /*
|
||||
+ * The parents were undrained when removing old_bs from the child. New
|
||||
+ * requests can't have been made, though, because the child was empty.
|
||||
+ *
|
||||
+ * TODO Make bdrv_replace_child_noperm() transactionable to avoid
|
||||
+ * undraining the parent in the first place. Once this is done, having
|
||||
+ * new_bs drained when calling bdrv_replace_child_tran() is not a
|
||||
+ * requirement any more.
|
||||
+ */
|
||||
+ bdrv_parent_drained_begin_single(s->child, false);
|
||||
+ assert(!bdrv_parent_drained_poll_single(s->child));
|
||||
+ }
|
||||
+ assert(s->child->quiesced_parent);
|
||||
bdrv_replace_child_noperm(s->child, s->old_bs);
|
||||
bdrv_unref(new_bs);
|
||||
}
|
||||
@@ -2422,12 +2436,19 @@ static TransactionActionDrv bdrv_replace_child_drv = {
|
||||
*
|
||||
* Note: real unref of old_bs is done only on commit.
|
||||
*
|
||||
+ * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be
|
||||
+ * kept drained until the transaction is completed.
|
||||
+ *
|
||||
* The function doesn't update permissions, caller is responsible for this.
|
||||
*/
|
||||
static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
|
||||
Transaction *tran)
|
||||
{
|
||||
BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
|
||||
+
|
||||
+ assert(child->quiesced_parent);
|
||||
+ assert(!new_bs || new_bs->quiesce_counter);
|
||||
+
|
||||
*s = (BdrvReplaceChildState) {
|
||||
.child = child,
|
||||
.old_bs = child->bs,
|
||||
@@ -2819,6 +2840,14 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
|
||||
return permissions[qapi_perm];
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Replaces the node that a BdrvChild points to without updating permissions.
|
||||
+ *
|
||||
+ * If @new_bs is non-NULL, the parent of @child must already be drained through
|
||||
+ * @child.
|
||||
+ *
|
||||
+ * This function does not poll.
|
||||
+ */
|
||||
static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
BlockDriverState *new_bs)
|
||||
{
|
||||
@@ -2826,6 +2855,28 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
int new_bs_quiesce_counter;
|
||||
|
||||
assert(!child->frozen);
|
||||
+
|
||||
+ /*
|
||||
+ * If we want to change the BdrvChild to point to a drained node as its new
|
||||
+ * child->bs, we need to make sure that its new parent is drained, too. In
|
||||
+ * other words, either child->quiesce_parent must already be true or we must
|
||||
+ * be able to set it and keep the parent's quiesce_counter consistent with
|
||||
+ * that, but without polling or starting new requests (this function
|
||||
+ * guarantees that it doesn't poll, and starting new requests would be
|
||||
+ * against the invariants of drain sections).
|
||||
+ *
|
||||
+ * To keep things simple, we pick the first option (child->quiesce_parent
|
||||
+ * must already be true). We also generalise the rule a bit to make it
|
||||
+ * easier to verify in callers and more likely to be covered in test cases:
|
||||
+ * The parent must be quiesced through this child even if new_bs isn't
|
||||
+ * currently drained.
|
||||
+ *
|
||||
+ * The only exception is for callers that always pass new_bs == NULL. In
|
||||
+ * this case, we obviously never need to consider the case of a drained
|
||||
+ * new_bs, so we can keep the callers simpler by allowing them not to drain
|
||||
+ * the parent.
|
||||
+ */
|
||||
+ assert(!new_bs || child->quiesced_parent);
|
||||
assert(old_bs != new_bs);
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
@@ -2833,15 +2884,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
|
||||
}
|
||||
|
||||
- /*
|
||||
- * If the new child node is drained but the old one was not, flush
|
||||
- * all outstanding requests to the old child node.
|
||||
- */
|
||||
- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
|
||||
- if (new_bs_quiesce_counter && !child->quiesced_parent) {
|
||||
- bdrv_parent_drained_begin_single(child, true);
|
||||
- }
|
||||
-
|
||||
if (old_bs) {
|
||||
if (child->klass->detach) {
|
||||
child->klass->detach(child);
|
||||
@@ -2861,11 +2903,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
}
|
||||
|
||||
/*
|
||||
- * If the old child node was drained but the new one is not, allow
|
||||
- * requests to come in only after the new node has been attached.
|
||||
- *
|
||||
- * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single()
|
||||
- * polls, which could have changed the value.
|
||||
+ * If the parent was drained through this BdrvChild previously, but new_bs
|
||||
+ * is not drained, allow requests to come in only after the new node has
|
||||
+ * been attached.
|
||||
*/
|
||||
new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
|
||||
if (!new_bs_quiesce_counter && child->quiesced_parent) {
|
||||
@@ -3002,6 +3042,24 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
|
||||
}
|
||||
|
||||
bdrv_ref(child_bs);
|
||||
+ /*
|
||||
+ * Let every new BdrvChild start with a drained parent. Inserting the child
|
||||
+ * in the graph with bdrv_replace_child_noperm() will undrain it if
|
||||
+ * @child_bs is not drained.
|
||||
+ *
|
||||
+ * The child was only just created and is not yet visible in global state
|
||||
+ * until bdrv_replace_child_noperm() inserts it into the graph, so nobody
|
||||
+ * could have sent requests and polling is not necessary.
|
||||
+ *
|
||||
+ * Note that this means that the parent isn't fully drained yet, we only
|
||||
+ * stop new requests from coming in. This is fine, we don't care about the
|
||||
+ * old requests here, they are not for this child. If another place enters a
|
||||
+ * drain section for the same parent, but wants it to be fully quiesced, it
|
||||
+ * will not run most of the the code in .drained_begin() again (which is not
|
||||
+ * a problem, we already did this), but it will still poll until the parent
|
||||
+ * is fully quiesced, so it will not be negatively affected either.
|
||||
+ */
|
||||
+ bdrv_parent_drained_begin_single(new_child, false);
|
||||
bdrv_replace_child_noperm(new_child, child_bs);
|
||||
|
||||
BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
|
||||
@@ -5059,12 +5117,24 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran)
|
||||
}
|
||||
|
||||
if (child->bs) {
|
||||
+ BlockDriverState *bs = child->bs;
|
||||
+ bdrv_drained_begin(bs);
|
||||
bdrv_replace_child_tran(child, NULL, tran);
|
||||
+ bdrv_drained_end(bs);
|
||||
}
|
||||
|
||||
tran_add(tran, &bdrv_remove_child_drv, child);
|
||||
}
|
||||
|
||||
+static void undrain_on_clean_cb(void *opaque)
|
||||
+{
|
||||
+ bdrv_drained_end(opaque);
|
||||
+}
|
||||
+
|
||||
+static TransactionActionDrv undrain_on_clean = {
|
||||
+ .clean = undrain_on_clean_cb,
|
||||
+};
|
||||
+
|
||||
static int bdrv_replace_node_noperm(BlockDriverState *from,
|
||||
BlockDriverState *to,
|
||||
bool auto_skip, Transaction *tran,
|
||||
@@ -5074,6 +5144,11 @@ static int bdrv_replace_node_noperm(BlockDriverState *from,
|
||||
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
+ bdrv_drained_begin(from);
|
||||
+ bdrv_drained_begin(to);
|
||||
+ tran_add(tran, &undrain_on_clean, from);
|
||||
+ tran_add(tran, &undrain_on_clean, to);
|
||||
+
|
||||
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
|
||||
assert(c->bs == from);
|
||||
if (!should_update_child(c, to)) {
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 5e9150d92c..ae64830eac 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -81,7 +81,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
|
||||
}
|
||||
}
|
||||
|
||||
-static bool bdrv_parent_drained_poll_single(BdrvChild *c)
|
||||
+bool bdrv_parent_drained_poll_single(BdrvChild *c)
|
||||
{
|
||||
if (c->klass->drained_poll) {
|
||||
return c->klass->drained_poll(c);
|
||||
diff --git a/include/block/block-io.h b/include/block/block-io.h
|
||||
index 8f5e75756a..65e6d2569b 100644
|
||||
--- a/include/block/block-io.h
|
||||
+++ b/include/block/block-io.h
|
||||
@@ -292,6 +292,14 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
|
||||
*/
|
||||
void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
|
||||
|
||||
+/**
|
||||
+ * bdrv_parent_drained_poll_single:
|
||||
+ *
|
||||
+ * Returns true if there is any pending activity to cease before @c can be
|
||||
+ * called quiesced, false otherwise.
|
||||
+ */
|
||||
+bool bdrv_parent_drained_poll_single(BdrvChild *c);
|
||||
+
|
||||
/**
|
||||
* bdrv_parent_drained_end_single:
|
||||
*
|
||||
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
|
||||
index 172bc6debc..2686a8acee 100644
|
||||
--- a/tests/unit/test-bdrv-drain.c
|
||||
+++ b/tests/unit/test-bdrv-drain.c
|
||||
@@ -1654,6 +1654,7 @@ static void test_drop_intermediate_poll(void)
|
||||
|
||||
|
||||
typedef struct BDRVReplaceTestState {
|
||||
+ bool setup_completed;
|
||||
bool was_drained;
|
||||
bool was_undrained;
|
||||
bool has_read;
|
||||
@@ -1738,6 +1739,10 @@ static void bdrv_replace_test_drain_begin(BlockDriverState *bs)
|
||||
{
|
||||
BDRVReplaceTestState *s = bs->opaque;
|
||||
|
||||
+ if (!s->setup_completed) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
if (!s->drain_count) {
|
||||
s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs);
|
||||
bdrv_inc_in_flight(bs);
|
||||
@@ -1769,6 +1774,10 @@ static void bdrv_replace_test_drain_end(BlockDriverState *bs)
|
||||
{
|
||||
BDRVReplaceTestState *s = bs->opaque;
|
||||
|
||||
+ if (!s->setup_completed) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
g_assert(s->drain_count > 0);
|
||||
if (!--s->drain_count) {
|
||||
s->was_undrained = true;
|
||||
@@ -1867,6 +1876,7 @@ static void do_test_replace_child_mid_drain(int old_drain_count,
|
||||
bdrv_ref(old_child_bs);
|
||||
bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds,
|
||||
BDRV_CHILD_COW, &error_abort);
|
||||
+ parent_s->setup_completed = true;
|
||||
|
||||
for (i = 0; i < old_drain_count; i++) {
|
||||
bdrv_drained_begin(old_child_bs);
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,54 +0,0 @@
|
||||
From 6af6de77dace29aa8548b3649dc9c6163740ac86 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:03 +0100
|
||||
Subject: [PATCH 21/31] block: Don't use subtree drains in
|
||||
bdrv_drop_intermediate()
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [9/16] 3c06fa826f262558f57d38b0155500c2e8e23a53 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Instead of using a subtree drain from the top node (which also drains
|
||||
child nodes of base that we're not even interested in), use a normal
|
||||
drain for base, which automatically drains all of the parents, too.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20221118174110.55183-9-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 631086deefc32690ee56efed1c5b891dec31ae37)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index cb5e96b1cf..b3449a312e 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -5586,7 +5586,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
bdrv_ref(top);
|
||||
- bdrv_subtree_drained_begin(top);
|
||||
+ bdrv_drained_begin(base);
|
||||
|
||||
if (!top->drv || !base->drv) {
|
||||
goto exit;
|
||||
@@ -5659,7 +5659,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
|
||||
|
||||
ret = 0;
|
||||
exit:
|
||||
- bdrv_subtree_drained_end(top);
|
||||
+ bdrv_drained_end(base);
|
||||
bdrv_unref(top);
|
||||
return ret;
|
||||
}
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,157 +0,0 @@
|
||||
From ad52cb621daad45d3c2a0e2e670d6ca2e16690bd Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:02 +0100
|
||||
Subject: [PATCH 20/31] block: Drain individual nodes during reopen
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [8/16] 5441b6f0ae9102ef40d1093e1db3084eea81e3b0 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
bdrv_reopen() and friends use subtree drains as a lazy way of covering
|
||||
all the nodes they touch. Turns out that this lazy way is a lot more
|
||||
complicated than just draining the nodes individually, even not
|
||||
accounting for the additional complexity in the drain mechanism itself.
|
||||
|
||||
Simplify the code by switching to draining the individual nodes that are
|
||||
already managed in the BlockReopenQueue anyway.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20221118174110.55183-8-kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit d22933acd2f470eeef779e4d444e848f76dcfaf8)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 16 +++++++++-------
|
||||
block/replication.c | 6 ------
|
||||
blockdev.c | 13 -------------
|
||||
3 files changed, 9 insertions(+), 26 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 46df410b07..cb5e96b1cf 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -4150,7 +4150,7 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
|
||||
* returns a pointer to bs_queue, which is either the newly allocated
|
||||
* bs_queue, or the existing bs_queue being used.
|
||||
*
|
||||
- * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
|
||||
+ * bs is drained here and undrained by bdrv_reopen_queue_free().
|
||||
*
|
||||
* To be called with bs->aio_context locked.
|
||||
*/
|
||||
@@ -4172,12 +4172,10 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
|
||||
int flags;
|
||||
QemuOpts *opts;
|
||||
|
||||
- /* Make sure that the caller remembered to use a drained section. This is
|
||||
- * important to avoid graph changes between the recursive queuing here and
|
||||
- * bdrv_reopen_multiple(). */
|
||||
- assert(bs->quiesce_counter > 0);
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
+ bdrv_drained_begin(bs);
|
||||
+
|
||||
if (bs_queue == NULL) {
|
||||
bs_queue = g_new0(BlockReopenQueue, 1);
|
||||
QTAILQ_INIT(bs_queue);
|
||||
@@ -4328,6 +4326,12 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue)
|
||||
if (bs_queue) {
|
||||
BlockReopenQueueEntry *bs_entry, *next;
|
||||
QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
|
||||
+ AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs);
|
||||
+
|
||||
+ aio_context_acquire(ctx);
|
||||
+ bdrv_drained_end(bs_entry->state.bs);
|
||||
+ aio_context_release(ctx);
|
||||
+
|
||||
qobject_unref(bs_entry->state.explicit_options);
|
||||
qobject_unref(bs_entry->state.options);
|
||||
g_free(bs_entry);
|
||||
@@ -4475,7 +4479,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
|
||||
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
- bdrv_subtree_drained_begin(bs);
|
||||
queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
|
||||
|
||||
if (ctx != qemu_get_aio_context()) {
|
||||
@@ -4486,7 +4489,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
|
||||
if (ctx != qemu_get_aio_context()) {
|
||||
aio_context_acquire(ctx);
|
||||
}
|
||||
- bdrv_subtree_drained_end(bs);
|
||||
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/replication.c b/block/replication.c
|
||||
index f1eed25e43..c62f48a874 100644
|
||||
--- a/block/replication.c
|
||||
+++ b/block/replication.c
|
||||
@@ -374,9 +374,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
|
||||
s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs);
|
||||
}
|
||||
|
||||
- bdrv_subtree_drained_begin(hidden_disk->bs);
|
||||
- bdrv_subtree_drained_begin(secondary_disk->bs);
|
||||
-
|
||||
if (s->orig_hidden_read_only) {
|
||||
QDict *opts = qdict_new();
|
||||
qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
|
||||
@@ -401,9 +398,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
|
||||
aio_context_acquire(ctx);
|
||||
}
|
||||
}
|
||||
-
|
||||
- bdrv_subtree_drained_end(hidden_disk->bs);
|
||||
- bdrv_subtree_drained_end(secondary_disk->bs);
|
||||
}
|
||||
|
||||
static void backup_job_cleanup(BlockDriverState *bs)
|
||||
diff --git a/blockdev.c b/blockdev.c
|
||||
index 3f1dec6242..8ffb3d9537 100644
|
||||
--- a/blockdev.c
|
||||
+++ b/blockdev.c
|
||||
@@ -3547,8 +3547,6 @@ fail:
|
||||
void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
|
||||
{
|
||||
BlockReopenQueue *queue = NULL;
|
||||
- GSList *drained = NULL;
|
||||
- GSList *p;
|
||||
|
||||
/* Add each one of the BDS that we want to reopen to the queue */
|
||||
for (; reopen_list != NULL; reopen_list = reopen_list->next) {
|
||||
@@ -3585,9 +3583,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
|
||||
ctx = bdrv_get_aio_context(bs);
|
||||
aio_context_acquire(ctx);
|
||||
|
||||
- bdrv_subtree_drained_begin(bs);
|
||||
queue = bdrv_reopen_queue(queue, bs, qdict, false);
|
||||
- drained = g_slist_prepend(drained, bs);
|
||||
|
||||
aio_context_release(ctx);
|
||||
}
|
||||
@@ -3598,15 +3594,6 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
|
||||
|
||||
fail:
|
||||
bdrv_reopen_queue_free(queue);
|
||||
- for (p = drained; p; p = p->next) {
|
||||
- BlockDriverState *bs = p->data;
|
||||
- AioContext *ctx = bdrv_get_aio_context(bs);
|
||||
-
|
||||
- aio_context_acquire(ctx);
|
||||
- bdrv_subtree_drained_end(bs);
|
||||
- aio_context_release(ctx);
|
||||
- }
|
||||
- g_slist_free(drained);
|
||||
}
|
||||
|
||||
void qmp_blockdev_del(const char *node_name, Error **errp)
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,96 +0,0 @@
|
||||
From 9a789d104a4a69031ad95d7fad6380ab21e82503 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:08 +0100
|
||||
Subject: [PATCH 26/31] block: Drop out of coroutine in
|
||||
bdrv_do_drained_begin_quiesce()
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [14/16] c9266663b822f703e55b6a07de98ceb56e69e924 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
The next patch adds a parent drain to bdrv_attach_child_common(), which
|
||||
shouldn't be, but is currently called from coroutines in some cases (e.g.
|
||||
.bdrv_co_create implementations generally open new nodes). Therefore,
|
||||
the assertion that we're not in a coroutine doesn't hold true any more.
|
||||
|
||||
We could just remove the assertion because there is nothing in the
|
||||
function that should be in conflict with running in a coroutine, but
|
||||
just to be on the safe side, we can reverse the caller relationship
|
||||
between bdrv_do_drained_begin() and bdrv_do_drained_begin_quiesce() so
|
||||
that the latter also just drops out of coroutine context and we can
|
||||
still be certain in the future that any drain code doesn't run in
|
||||
coroutines.
|
||||
|
||||
As a nice side effect, the structure of bdrv_do_drained_begin() is now
|
||||
symmetrical with bdrv_do_drained_end().
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20221118174110.55183-14-kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 05c272ff0cf1b16cc3606f746182dd99b774f553)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/io.c | 25 ++++++++++++-------------
|
||||
1 file changed, 12 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 2e9503df6a..5e9150d92c 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -346,10 +346,15 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
}
|
||||
}
|
||||
|
||||
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
|
||||
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
|
||||
+ bool poll)
|
||||
{
|
||||
IO_OR_GS_CODE();
|
||||
- assert(!qemu_in_coroutine());
|
||||
+
|
||||
+ if (qemu_in_coroutine()) {
|
||||
+ bdrv_co_yield_to_drain(bs, true, parent, poll);
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
/* Stop things in parent-to-child order */
|
||||
if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
|
||||
@@ -359,17 +364,6 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
|
||||
bs->drv->bdrv_drain_begin(bs);
|
||||
}
|
||||
}
|
||||
-}
|
||||
-
|
||||
-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
|
||||
- bool poll)
|
||||
-{
|
||||
- if (qemu_in_coroutine()) {
|
||||
- bdrv_co_yield_to_drain(bs, true, parent, poll);
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- bdrv_do_drained_begin_quiesce(bs, parent);
|
||||
|
||||
/*
|
||||
* Wait for drained requests to finish.
|
||||
@@ -385,6 +379,11 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
|
||||
}
|
||||
}
|
||||
|
||||
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
|
||||
+{
|
||||
+ bdrv_do_drained_begin(bs, parent, false);
|
||||
+}
|
||||
+
|
||||
void bdrv_drained_begin(BlockDriverState *bs)
|
||||
{
|
||||
IO_OR_GS_CODE();
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,67 +0,0 @@
|
||||
From e790b4c20a5124239fe93e91fbc87745e5f2cea6 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:01 +0100
|
||||
Subject: [PATCH 19/31] block: Fix locking for bdrv_reopen_queue_child()
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [7/16] 46bb54506c4400b9a1bf66b6bd7987ff67260003 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Callers don't agree whether bdrv_reopen_queue_child() should be called
|
||||
with the AioContext lock held or not. Standardise on holding the lock
|
||||
(as done by QMP blockdev-reopen and the replication block driver) and
|
||||
fix bdrv_reopen() to do the same.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20221118174110.55183-7-kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 2e117866d7c96cc17e84cd2946fee1bf3292d814)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 7 +++++--
|
||||
1 file changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 7999fd08c5..46df410b07 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -4151,6 +4151,8 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
|
||||
* bs_queue, or the existing bs_queue being used.
|
||||
*
|
||||
* bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
|
||||
+ *
|
||||
+ * To be called with bs->aio_context locked.
|
||||
*/
|
||||
static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
|
||||
BlockDriverState *bs,
|
||||
@@ -4309,6 +4311,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
|
||||
return bs_queue;
|
||||
}
|
||||
|
||||
+/* To be called with bs->aio_context locked */
|
||||
BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
|
||||
BlockDriverState *bs,
|
||||
QDict *options, bool keep_old_opts)
|
||||
@@ -4473,11 +4476,11 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
bdrv_subtree_drained_begin(bs);
|
||||
+ queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
|
||||
+
|
||||
if (ctx != qemu_get_aio_context()) {
|
||||
aio_context_release(ctx);
|
||||
}
|
||||
-
|
||||
- queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
|
||||
ret = bdrv_reopen_multiple(queue, errp);
|
||||
|
||||
if (ctx != qemu_get_aio_context()) {
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,73 @@
|
||||
From 547f6bf93734f7c13675eebb93273ef2273f7c31 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Czenczek <hreitz@redhat.com>
|
||||
Date: Fri, 14 Jul 2023 10:59:38 +0200
|
||||
Subject: [PATCH 5/9] block: Fix pad_request's request restriction
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX
|
||||
RH-Bugzilla: 2174676
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [5/5] e8abc0485f6e0608a1ec55143ff40a14d273dfc8 (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
bdrv_pad_request() relies on requests' lengths not to exceed SIZE_MAX,
|
||||
which bdrv_check_qiov_request() does not guarantee.
|
||||
|
||||
bdrv_check_request32() however will guarantee this, and both of
|
||||
bdrv_pad_request()'s callers (bdrv_co_preadv_part() and
|
||||
bdrv_co_pwritev_part()) already run it before calling
|
||||
bdrv_pad_request(). Therefore, bdrv_pad_request() can safely call
|
||||
bdrv_check_request32() without expecting error, too.
|
||||
|
||||
In effect, this patch will not change guest-visible behavior. It is a
|
||||
clean-up to tighten a condition to match what is guaranteed by our
|
||||
callers, and which exists purely to show clearly why the subsequent
|
||||
assertion (`assert(*bytes <= SIZE_MAX)`) is always true.
|
||||
|
||||
Note there is a difference between the interfaces of
|
||||
bdrv_check_qiov_request() and bdrv_check_request32(): The former takes
|
||||
an errp, the latter does not, so we can no longer just pass
|
||||
&error_abort. Instead, we need to check the returned value. While we
|
||||
do expect success (because the callers have already run this function),
|
||||
an assert(ret == 0) is not much simpler than just to return an error if
|
||||
it occurs, so let us handle errors by returning them up the stack now.
|
||||
|
||||
Reported-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Message-id: 20230714085938.202730-1-hreitz@redhat.com
|
||||
Fixes: 18743311b829cafc1737a5f20bc3248d5f91ee2a
|
||||
("block: Collapse padded I/O vecs exceeding IOV_MAX")
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
block/io.c | 8 ++++++--
|
||||
1 file changed, 6 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 4e8e90208b..807c9fb720 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -1708,7 +1708,11 @@ static int bdrv_pad_request(BlockDriverState *bs,
|
||||
int sliced_niov;
|
||||
size_t sliced_head, sliced_tail;
|
||||
|
||||
- bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
|
||||
+ /* Should have been checked by the caller already */
|
||||
+ ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
+ }
|
||||
|
||||
if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
|
||||
if (padded) {
|
||||
@@ -1721,7 +1725,7 @@ static int bdrv_pad_request(BlockDriverState *bs,
|
||||
&sliced_head, &sliced_tail,
|
||||
&sliced_niov);
|
||||
|
||||
- /* Guaranteed by bdrv_check_qiov_request() */
|
||||
+ /* Guaranteed by bdrv_check_request32() */
|
||||
assert(*bytes <= SIZE_MAX);
|
||||
ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
|
||||
sliced_head, *bytes);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,132 +0,0 @@
|
||||
From 074c89b05dae971c7118cb769fd34e22135c8f4c Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Mon, 20 Jun 2022 18:26:53 +0200
|
||||
Subject: [PATCH 06/20] block: Improve empty format-specific info dump
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
|
||||
RH-Bugzilla: 1860292
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [1/12] be551e83f426e620e673302198b51368bfd324ce (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
When a block driver supports obtaining format-specific information, but
|
||||
that object only contains optional fields, it is possible that none of
|
||||
them are present, so that dump_qobject() (called by
|
||||
bdrv_image_info_specific_dump()) will not print anything.
|
||||
|
||||
The callers of bdrv_image_info_specific_dump() put a header above this
|
||||
information ("Format specific information:\n"), which will look strange
|
||||
when there is nothing below. Modify bdrv_image_info_specific_dump() to
|
||||
print this header instead of its callers, and only if there is indeed
|
||||
something to be printed.
|
||||
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20220620162704.80987-2-hreitz@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 3716470b24f0f63090d59bcf28ad8fe6fb7835bd)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/qapi.c | 41 +++++++++++++++++++++++++++++++++++++----
|
||||
include/block/qapi.h | 3 ++-
|
||||
qemu-io-cmds.c | 4 ++--
|
||||
3 files changed, 41 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/block/qapi.c b/block/qapi.c
|
||||
index cf557e3aea..51202b470a 100644
|
||||
--- a/block/qapi.c
|
||||
+++ b/block/qapi.c
|
||||
@@ -777,7 +777,35 @@ static void dump_qdict(int indentation, QDict *dict)
|
||||
}
|
||||
}
|
||||
|
||||
-void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec)
|
||||
+/*
|
||||
+ * Return whether dumping the given QObject with dump_qobject() would
|
||||
+ * yield an empty dump, i.e. not print anything.
|
||||
+ */
|
||||
+static bool qobject_is_empty_dump(const QObject *obj)
|
||||
+{
|
||||
+ switch (qobject_type(obj)) {
|
||||
+ case QTYPE_QNUM:
|
||||
+ case QTYPE_QSTRING:
|
||||
+ case QTYPE_QBOOL:
|
||||
+ return false;
|
||||
+
|
||||
+ case QTYPE_QDICT:
|
||||
+ return qdict_size(qobject_to(QDict, obj)) == 0;
|
||||
+
|
||||
+ case QTYPE_QLIST:
|
||||
+ return qlist_empty(qobject_to(QList, obj));
|
||||
+
|
||||
+ default:
|
||||
+ abort();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * Dumps the given ImageInfoSpecific object in a human-readable form,
|
||||
+ * prepending an optional prefix if the dump is not empty.
|
||||
+ */
|
||||
+void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
|
||||
+ const char *prefix)
|
||||
{
|
||||
QObject *obj, *data;
|
||||
Visitor *v = qobject_output_visitor_new(&obj);
|
||||
@@ -785,7 +813,12 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec)
|
||||
visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort);
|
||||
visit_complete(v, &obj);
|
||||
data = qdict_get(qobject_to(QDict, obj), "data");
|
||||
- dump_qobject(1, data);
|
||||
+ if (!qobject_is_empty_dump(data)) {
|
||||
+ if (prefix) {
|
||||
+ qemu_printf("%s", prefix);
|
||||
+ }
|
||||
+ dump_qobject(1, data);
|
||||
+ }
|
||||
qobject_unref(obj);
|
||||
visit_free(v);
|
||||
}
|
||||
@@ -866,7 +899,7 @@ void bdrv_image_info_dump(ImageInfo *info)
|
||||
}
|
||||
|
||||
if (info->has_format_specific) {
|
||||
- qemu_printf("Format specific information:\n");
|
||||
- bdrv_image_info_specific_dump(info->format_specific);
|
||||
+ bdrv_image_info_specific_dump(info->format_specific,
|
||||
+ "Format specific information:\n");
|
||||
}
|
||||
}
|
||||
diff --git a/include/block/qapi.h b/include/block/qapi.h
|
||||
index 22c7807c89..c09859ea78 100644
|
||||
--- a/include/block/qapi.h
|
||||
+++ b/include/block/qapi.h
|
||||
@@ -40,6 +40,7 @@ void bdrv_query_image_info(BlockDriverState *bs,
|
||||
Error **errp);
|
||||
|
||||
void bdrv_snapshot_dump(QEMUSnapshotInfo *sn);
|
||||
-void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec);
|
||||
+void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
|
||||
+ const char *prefix);
|
||||
void bdrv_image_info_dump(ImageInfo *info);
|
||||
#endif
|
||||
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
|
||||
index 952dc940f1..f4a374528e 100644
|
||||
--- a/qemu-io-cmds.c
|
||||
+++ b/qemu-io-cmds.c
|
||||
@@ -1825,8 +1825,8 @@ static int info_f(BlockBackend *blk, int argc, char **argv)
|
||||
return -EIO;
|
||||
}
|
||||
if (spec_info) {
|
||||
- printf("Format specific information:\n");
|
||||
- bdrv_image_info_specific_dump(spec_info);
|
||||
+ bdrv_image_info_specific_dump(spec_info,
|
||||
+ "Format specific information:\n");
|
||||
qapi_free_ImageInfoSpecific(spec_info);
|
||||
}
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,81 +0,0 @@
|
||||
From 1808e560396872173f787f8e338e9837a4c3d626 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:00 +0100
|
||||
Subject: [PATCH 18/31] block: Inline bdrv_drain_invoke()
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [6/16] 2c7473a36360eb43d94b967deb12308cb5ea0d3b (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
bdrv_drain_invoke() has now two entirely separate cases that share no
|
||||
code any more and are selected depending on a bool parameter. Each case
|
||||
has only one caller. Just inline the function.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20221118174110.55183-6-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit c7bc05f78ab31fb02fc9635f60b9bd22efc8d121)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/io.c | 23 ++++++-----------------
|
||||
1 file changed, 6 insertions(+), 17 deletions(-)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index f4ca62b034..a25103be6f 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -242,21 +242,6 @@ typedef struct {
|
||||
bool ignore_bds_parents;
|
||||
} BdrvCoDrainData;
|
||||
|
||||
-/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */
|
||||
-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
|
||||
-{
|
||||
- if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) ||
|
||||
- (!begin && !bs->drv->bdrv_drain_end)) {
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- if (begin) {
|
||||
- bs->drv->bdrv_drain_begin(bs);
|
||||
- } else {
|
||||
- bs->drv->bdrv_drain_end(bs);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
|
||||
bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
|
||||
BdrvChild *ignore_parent, bool ignore_bds_parents)
|
||||
@@ -390,7 +375,9 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|
||||
}
|
||||
|
||||
bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
|
||||
- bdrv_drain_invoke(bs, true);
|
||||
+ if (bs->drv && bs->drv->bdrv_drain_begin) {
|
||||
+ bs->drv->bdrv_drain_begin(bs);
|
||||
+ }
|
||||
}
|
||||
|
||||
static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
|
||||
@@ -461,7 +448,9 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
|
||||
assert(bs->quiesce_counter > 0);
|
||||
|
||||
/* Re-enable things in child-to-parent order */
|
||||
- bdrv_drain_invoke(bs, false);
|
||||
+ if (bs->drv && bs->drv->bdrv_drain_end) {
|
||||
+ bs->drv->bdrv_drain_end(bs);
|
||||
+ }
|
||||
bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
|
||||
|
||||
old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,433 +0,0 @@
|
||||
From 3009e49f242ab371ffad35bb29c2c26ddfac75d4 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:40:59 +0100
|
||||
Subject: [PATCH 17/31] block: Remove drained_end_counter
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [5/16] 5589e3f05dece5394a05641f7f42096e8dc62bdb (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
drained_end_counter is unused now, nobody changes its value any more. It
|
||||
can be removed.
|
||||
|
||||
In cases where we had two almost identical functions that only differed
|
||||
in whether the caller passes drained_end_counter, or whether they would
|
||||
poll for a local drained_end_counter to reach 0, these become a single
|
||||
function.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Message-Id: <20221118174110.55183-5-kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 2f65df6e16dea2d6e7212fa675f4779d9281e26f)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 5 +-
|
||||
block/block-backend.c | 4 +-
|
||||
block/io.c | 98 ++++++++------------------------
|
||||
blockjob.c | 2 +-
|
||||
include/block/block-io.h | 24 --------
|
||||
include/block/block_int-common.h | 6 +-
|
||||
6 files changed, 30 insertions(+), 109 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 16a62a329c..7999fd08c5 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -1235,11 +1235,10 @@ static bool bdrv_child_cb_drained_poll(BdrvChild *child)
|
||||
return bdrv_drain_poll(bs, false, NULL, false);
|
||||
}
|
||||
|
||||
-static void bdrv_child_cb_drained_end(BdrvChild *child,
|
||||
- int *drained_end_counter)
|
||||
+static void bdrv_child_cb_drained_end(BdrvChild *child)
|
||||
{
|
||||
BlockDriverState *bs = child->opaque;
|
||||
- bdrv_drained_end_no_poll(bs, drained_end_counter);
|
||||
+ bdrv_drained_end(bs);
|
||||
}
|
||||
|
||||
static int bdrv_child_cb_inactivate(BdrvChild *child)
|
||||
diff --git a/block/block-backend.c b/block/block-backend.c
|
||||
index d98a96ff37..feaf2181fa 100644
|
||||
--- a/block/block-backend.c
|
||||
+++ b/block/block-backend.c
|
||||
@@ -129,7 +129,7 @@ static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format,
|
||||
}
|
||||
static void blk_root_drained_begin(BdrvChild *child);
|
||||
static bool blk_root_drained_poll(BdrvChild *child);
|
||||
-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter);
|
||||
+static void blk_root_drained_end(BdrvChild *child);
|
||||
|
||||
static void blk_root_change_media(BdrvChild *child, bool load);
|
||||
static void blk_root_resize(BdrvChild *child);
|
||||
@@ -2556,7 +2556,7 @@ static bool blk_root_drained_poll(BdrvChild *child)
|
||||
return busy || !!blk->in_flight;
|
||||
}
|
||||
|
||||
-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
|
||||
+static void blk_root_drained_end(BdrvChild *child)
|
||||
{
|
||||
BlockBackend *blk = child->opaque;
|
||||
assert(blk->quiesce_counter);
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index c2ed4b2af9..f4ca62b034 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -58,28 +58,19 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
|
||||
}
|
||||
}
|
||||
|
||||
-static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c,
|
||||
- int *drained_end_counter)
|
||||
+void bdrv_parent_drained_end_single(BdrvChild *c)
|
||||
{
|
||||
+ IO_OR_GS_CODE();
|
||||
+
|
||||
assert(c->parent_quiesce_counter > 0);
|
||||
c->parent_quiesce_counter--;
|
||||
if (c->klass->drained_end) {
|
||||
- c->klass->drained_end(c, drained_end_counter);
|
||||
+ c->klass->drained_end(c);
|
||||
}
|
||||
}
|
||||
|
||||
-void bdrv_parent_drained_end_single(BdrvChild *c)
|
||||
-{
|
||||
- int drained_end_counter = 0;
|
||||
- AioContext *ctx = bdrv_child_get_parent_aio_context(c);
|
||||
- IO_OR_GS_CODE();
|
||||
- bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter);
|
||||
- AIO_WAIT_WHILE(ctx, qatomic_read(&drained_end_counter) > 0);
|
||||
-}
|
||||
-
|
||||
static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
|
||||
- bool ignore_bds_parents,
|
||||
- int *drained_end_counter)
|
||||
+ bool ignore_bds_parents)
|
||||
{
|
||||
BdrvChild *c;
|
||||
|
||||
@@ -87,7 +78,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
|
||||
if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
|
||||
continue;
|
||||
}
|
||||
- bdrv_parent_drained_end_single_no_poll(c, drained_end_counter);
|
||||
+ bdrv_parent_drained_end_single(c);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -249,12 +240,10 @@ typedef struct {
|
||||
bool poll;
|
||||
BdrvChild *parent;
|
||||
bool ignore_bds_parents;
|
||||
- int *drained_end_counter;
|
||||
} BdrvCoDrainData;
|
||||
|
||||
/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */
|
||||
-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
|
||||
- int *drained_end_counter)
|
||||
+static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
|
||||
{
|
||||
if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) ||
|
||||
(!begin && !bs->drv->bdrv_drain_end)) {
|
||||
@@ -305,8 +294,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
|
||||
BdrvChild *parent, bool ignore_bds_parents,
|
||||
bool poll);
|
||||
static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
|
||||
- BdrvChild *parent, bool ignore_bds_parents,
|
||||
- int *drained_end_counter);
|
||||
+ BdrvChild *parent, bool ignore_bds_parents);
|
||||
|
||||
static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
{
|
||||
@@ -319,14 +307,12 @@ static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
aio_context_acquire(ctx);
|
||||
bdrv_dec_in_flight(bs);
|
||||
if (data->begin) {
|
||||
- assert(!data->drained_end_counter);
|
||||
bdrv_do_drained_begin(bs, data->recursive, data->parent,
|
||||
data->ignore_bds_parents, data->poll);
|
||||
} else {
|
||||
assert(!data->poll);
|
||||
bdrv_do_drained_end(bs, data->recursive, data->parent,
|
||||
- data->ignore_bds_parents,
|
||||
- data->drained_end_counter);
|
||||
+ data->ignore_bds_parents);
|
||||
}
|
||||
aio_context_release(ctx);
|
||||
} else {
|
||||
@@ -342,8 +328,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
bool begin, bool recursive,
|
||||
BdrvChild *parent,
|
||||
bool ignore_bds_parents,
|
||||
- bool poll,
|
||||
- int *drained_end_counter)
|
||||
+ bool poll)
|
||||
{
|
||||
BdrvCoDrainData data;
|
||||
Coroutine *self = qemu_coroutine_self();
|
||||
@@ -363,7 +348,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
.parent = parent,
|
||||
.ignore_bds_parents = ignore_bds_parents,
|
||||
.poll = poll,
|
||||
- .drained_end_counter = drained_end_counter,
|
||||
};
|
||||
|
||||
if (bs) {
|
||||
@@ -406,7 +390,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|
||||
}
|
||||
|
||||
bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
|
||||
- bdrv_drain_invoke(bs, true, NULL);
|
||||
+ bdrv_drain_invoke(bs, true);
|
||||
}
|
||||
|
||||
static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
|
||||
@@ -417,7 +401,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
|
||||
|
||||
if (qemu_in_coroutine()) {
|
||||
bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
|
||||
- poll, NULL);
|
||||
+ poll);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -461,38 +445,24 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs)
|
||||
|
||||
/**
|
||||
* This function does not poll, nor must any of its recursively called
|
||||
- * functions. The *drained_end_counter pointee will be incremented
|
||||
- * once for every background operation scheduled, and decremented once
|
||||
- * the operation settles. Therefore, the pointer must remain valid
|
||||
- * until the pointee reaches 0. That implies that whoever sets up the
|
||||
- * pointee has to poll until it is 0.
|
||||
- *
|
||||
- * We use atomic operations to access *drained_end_counter, because
|
||||
- * (1) when called from bdrv_set_aio_context_ignore(), the subgraph of
|
||||
- * @bs may contain nodes in different AioContexts,
|
||||
- * (2) bdrv_drain_all_end() uses the same counter for all nodes,
|
||||
- * regardless of which AioContext they are in.
|
||||
+ * functions.
|
||||
*/
|
||||
static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
|
||||
- BdrvChild *parent, bool ignore_bds_parents,
|
||||
- int *drained_end_counter)
|
||||
+ BdrvChild *parent, bool ignore_bds_parents)
|
||||
{
|
||||
BdrvChild *child;
|
||||
int old_quiesce_counter;
|
||||
|
||||
- assert(drained_end_counter != NULL);
|
||||
-
|
||||
if (qemu_in_coroutine()) {
|
||||
bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
|
||||
- false, drained_end_counter);
|
||||
+ false);
|
||||
return;
|
||||
}
|
||||
assert(bs->quiesce_counter > 0);
|
||||
|
||||
/* Re-enable things in child-to-parent order */
|
||||
- bdrv_drain_invoke(bs, false, drained_end_counter);
|
||||
- bdrv_parent_drained_end(bs, parent, ignore_bds_parents,
|
||||
- drained_end_counter);
|
||||
+ bdrv_drain_invoke(bs, false);
|
||||
+ bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
|
||||
|
||||
old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
|
||||
if (old_quiesce_counter == 1) {
|
||||
@@ -503,32 +473,21 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
|
||||
assert(!ignore_bds_parents);
|
||||
bs->recursive_quiesce_counter--;
|
||||
QLIST_FOREACH(child, &bs->children, next) {
|
||||
- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents,
|
||||
- drained_end_counter);
|
||||
+ bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void bdrv_drained_end(BlockDriverState *bs)
|
||||
{
|
||||
- int drained_end_counter = 0;
|
||||
IO_OR_GS_CODE();
|
||||
- bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter);
|
||||
- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
|
||||
-}
|
||||
-
|
||||
-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter)
|
||||
-{
|
||||
- IO_CODE();
|
||||
- bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter);
|
||||
+ bdrv_do_drained_end(bs, false, NULL, false);
|
||||
}
|
||||
|
||||
void bdrv_subtree_drained_end(BlockDriverState *bs)
|
||||
{
|
||||
- int drained_end_counter = 0;
|
||||
IO_OR_GS_CODE();
|
||||
- bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter);
|
||||
- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
|
||||
+ bdrv_do_drained_end(bs, true, NULL, false);
|
||||
}
|
||||
|
||||
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
|
||||
@@ -543,16 +502,12 @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
|
||||
|
||||
void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
|
||||
{
|
||||
- int drained_end_counter = 0;
|
||||
int i;
|
||||
IO_OR_GS_CODE();
|
||||
|
||||
for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
|
||||
- bdrv_do_drained_end(child->bs, true, child, false,
|
||||
- &drained_end_counter);
|
||||
+ bdrv_do_drained_end(child->bs, true, child, false);
|
||||
}
|
||||
-
|
||||
- BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0);
|
||||
}
|
||||
|
||||
void bdrv_drain(BlockDriverState *bs)
|
||||
@@ -610,7 +565,7 @@ void bdrv_drain_all_begin(void)
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
if (qemu_in_coroutine()) {
|
||||
- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
|
||||
+ bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -649,22 +604,19 @@ void bdrv_drain_all_begin(void)
|
||||
|
||||
void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
|
||||
{
|
||||
- int drained_end_counter = 0;
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
g_assert(bs->quiesce_counter > 0);
|
||||
g_assert(!bs->refcnt);
|
||||
|
||||
while (bs->quiesce_counter) {
|
||||
- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
|
||||
+ bdrv_do_drained_end(bs, false, NULL, true);
|
||||
}
|
||||
- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
|
||||
}
|
||||
|
||||
void bdrv_drain_all_end(void)
|
||||
{
|
||||
BlockDriverState *bs = NULL;
|
||||
- int drained_end_counter = 0;
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
/*
|
||||
@@ -680,13 +632,11 @@ void bdrv_drain_all_end(void)
|
||||
AioContext *aio_context = bdrv_get_aio_context(bs);
|
||||
|
||||
aio_context_acquire(aio_context);
|
||||
- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
|
||||
+ bdrv_do_drained_end(bs, false, NULL, true);
|
||||
aio_context_release(aio_context);
|
||||
}
|
||||
|
||||
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
|
||||
- AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0);
|
||||
-
|
||||
assert(bdrv_drain_all_count > 0);
|
||||
bdrv_drain_all_count--;
|
||||
}
|
||||
diff --git a/blockjob.c b/blockjob.c
|
||||
index f51d4e18f3..0ab721e139 100644
|
||||
--- a/blockjob.c
|
||||
+++ b/blockjob.c
|
||||
@@ -120,7 +120,7 @@ static bool child_job_drained_poll(BdrvChild *c)
|
||||
}
|
||||
}
|
||||
|
||||
-static void child_job_drained_end(BdrvChild *c, int *drained_end_counter)
|
||||
+static void child_job_drained_end(BdrvChild *c)
|
||||
{
|
||||
BlockJob *job = c->opaque;
|
||||
job_resume(&job->job);
|
||||
diff --git a/include/block/block-io.h b/include/block/block-io.h
|
||||
index b099d7db45..054e964c9b 100644
|
||||
--- a/include/block/block-io.h
|
||||
+++ b/include/block/block-io.h
|
||||
@@ -237,21 +237,6 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
|
||||
int64_t bytes, BdrvRequestFlags read_flags,
|
||||
BdrvRequestFlags write_flags);
|
||||
|
||||
-/**
|
||||
- * bdrv_drained_end_no_poll:
|
||||
- *
|
||||
- * Same as bdrv_drained_end(), but do not poll for the subgraph to
|
||||
- * actually become unquiesced. Therefore, no graph changes will occur
|
||||
- * with this function.
|
||||
- *
|
||||
- * *drained_end_counter is incremented for every background operation
|
||||
- * that is scheduled, and will be decremented for every operation once
|
||||
- * it settles. The caller must poll until it reaches 0. The counter
|
||||
- * should be accessed using atomic operations only.
|
||||
- */
|
||||
-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter);
|
||||
-
|
||||
-
|
||||
/*
|
||||
* "I/O or GS" API functions. These functions can run without
|
||||
* the BQL, but only in one specific iothread/main loop.
|
||||
@@ -311,9 +296,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
|
||||
* bdrv_parent_drained_end_single:
|
||||
*
|
||||
* End a quiesced section for the parent of @c.
|
||||
- *
|
||||
- * This polls @bs's AioContext until all scheduled sub-drained_ends
|
||||
- * have settled, which may result in graph changes.
|
||||
*/
|
||||
void bdrv_parent_drained_end_single(BdrvChild *c);
|
||||
|
||||
@@ -361,12 +343,6 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs);
|
||||
* bdrv_drained_end:
|
||||
*
|
||||
* End a quiescent section started by bdrv_drained_begin().
|
||||
- *
|
||||
- * This polls @bs's AioContext until all scheduled sub-drained_ends
|
||||
- * have settled. On one hand, that may result in graph changes. On
|
||||
- * the other, this requires that the caller either runs in the main
|
||||
- * loop; or that all involved nodes (@bs and all of its parents) are
|
||||
- * in the caller's AioContext.
|
||||
*/
|
||||
void bdrv_drained_end(BlockDriverState *bs);
|
||||
|
||||
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
|
||||
index 40d646d1ed..2b97576f6d 100644
|
||||
--- a/include/block/block_int-common.h
|
||||
+++ b/include/block/block_int-common.h
|
||||
@@ -939,15 +939,11 @@ struct BdrvChildClass {
|
||||
* These functions must not change the graph (and therefore also must not
|
||||
* call aio_poll(), which could change the graph indirectly).
|
||||
*
|
||||
- * If drained_end() schedules background operations, it must atomically
|
||||
- * increment *drained_end_counter for each such operation and atomically
|
||||
- * decrement it once the operation has settled.
|
||||
- *
|
||||
* Note that this can be nested. If drained_begin() was called twice, new
|
||||
* I/O is allowed only after drained_end() was called twice, too.
|
||||
*/
|
||||
void (*drained_begin)(BdrvChild *child);
|
||||
- void (*drained_end)(BdrvChild *child, int *drained_end_counter);
|
||||
+ void (*drained_end)(BdrvChild *child);
|
||||
|
||||
/*
|
||||
* Returns whether the parent has pending requests for the child. This
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,274 +0,0 @@
|
||||
From 0dc7990533cef41e58579ee96315aca1fdc44ea1 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:07 +0100
|
||||
Subject: [PATCH 25/31] block: Remove ignore_bds_parents parameter from
|
||||
drain_begin/end.
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [13/16] 1ed88d975a9569bffeb33ad847874417780ce408 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
ignore_bds_parents is now ignored during drain_begin and drain_end, so
|
||||
we can just remove it there. It is still a valid optimisation for
|
||||
drain_all in bdrv_drained_poll(), so leave it around there.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20221118174110.55183-13-kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit a82a3bd135078d14f1bb4b5e50f51e77d3748270)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 2 +-
|
||||
block/io.c | 58 +++++++++++++++-------------------------
|
||||
include/block/block-io.h | 3 +--
|
||||
3 files changed, 24 insertions(+), 39 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 5a583e260d..af31a94863 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -1226,7 +1226,7 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c)
|
||||
static void bdrv_child_cb_drained_begin(BdrvChild *child)
|
||||
{
|
||||
BlockDriverState *bs = child->opaque;
|
||||
- bdrv_do_drained_begin_quiesce(bs, NULL, false);
|
||||
+ bdrv_do_drained_begin_quiesce(bs, NULL);
|
||||
}
|
||||
|
||||
static bool bdrv_child_cb_drained_poll(BdrvChild *child)
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 87d6f22ec4..2e9503df6a 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -45,13 +45,12 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs);
|
||||
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
|
||||
int64_t offset, int64_t bytes, BdrvRequestFlags flags);
|
||||
|
||||
-static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
|
||||
- bool ignore_bds_parents)
|
||||
+static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
|
||||
{
|
||||
BdrvChild *c, *next;
|
||||
|
||||
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
|
||||
- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
|
||||
+ if (c == ignore) {
|
||||
continue;
|
||||
}
|
||||
bdrv_parent_drained_begin_single(c, false);
|
||||
@@ -70,13 +69,12 @@ void bdrv_parent_drained_end_single(BdrvChild *c)
|
||||
}
|
||||
}
|
||||
|
||||
-static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
|
||||
- bool ignore_bds_parents)
|
||||
+static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
|
||||
{
|
||||
BdrvChild *c;
|
||||
|
||||
QLIST_FOREACH(c, &bs->parents, next_parent) {
|
||||
- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
|
||||
+ if (c == ignore) {
|
||||
continue;
|
||||
}
|
||||
bdrv_parent_drained_end_single(c);
|
||||
@@ -242,7 +240,6 @@ typedef struct {
|
||||
bool begin;
|
||||
bool poll;
|
||||
BdrvChild *parent;
|
||||
- bool ignore_bds_parents;
|
||||
} BdrvCoDrainData;
|
||||
|
||||
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
|
||||
@@ -269,9 +266,8 @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
|
||||
}
|
||||
|
||||
static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
|
||||
- bool ignore_bds_parents, bool poll);
|
||||
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
|
||||
- bool ignore_bds_parents);
|
||||
+ bool poll);
|
||||
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent);
|
||||
|
||||
static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
{
|
||||
@@ -284,11 +280,10 @@ static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
aio_context_acquire(ctx);
|
||||
bdrv_dec_in_flight(bs);
|
||||
if (data->begin) {
|
||||
- bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents,
|
||||
- data->poll);
|
||||
+ bdrv_do_drained_begin(bs, data->parent, data->poll);
|
||||
} else {
|
||||
assert(!data->poll);
|
||||
- bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents);
|
||||
+ bdrv_do_drained_end(bs, data->parent);
|
||||
}
|
||||
aio_context_release(ctx);
|
||||
} else {
|
||||
@@ -303,7 +298,6 @@ static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
bool begin,
|
||||
BdrvChild *parent,
|
||||
- bool ignore_bds_parents,
|
||||
bool poll)
|
||||
{
|
||||
BdrvCoDrainData data;
|
||||
@@ -321,7 +315,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
.done = false,
|
||||
.begin = begin,
|
||||
.parent = parent,
|
||||
- .ignore_bds_parents = ignore_bds_parents,
|
||||
.poll = poll,
|
||||
};
|
||||
|
||||
@@ -353,8 +346,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
}
|
||||
}
|
||||
|
||||
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|
||||
- BdrvChild *parent, bool ignore_bds_parents)
|
||||
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
|
||||
{
|
||||
IO_OR_GS_CODE();
|
||||
assert(!qemu_in_coroutine());
|
||||
@@ -362,9 +354,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|
||||
/* Stop things in parent-to-child order */
|
||||
if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
|
||||
aio_disable_external(bdrv_get_aio_context(bs));
|
||||
-
|
||||
- /* TODO Remove ignore_bds_parents, we don't consider it any more */
|
||||
- bdrv_parent_drained_begin(bs, parent, false);
|
||||
+ bdrv_parent_drained_begin(bs, parent);
|
||||
if (bs->drv && bs->drv->bdrv_drain_begin) {
|
||||
bs->drv->bdrv_drain_begin(bs);
|
||||
}
|
||||
@@ -372,14 +362,14 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|
||||
}
|
||||
|
||||
static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
|
||||
- bool ignore_bds_parents, bool poll)
|
||||
+ bool poll)
|
||||
{
|
||||
if (qemu_in_coroutine()) {
|
||||
- bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll);
|
||||
+ bdrv_co_yield_to_drain(bs, true, parent, poll);
|
||||
return;
|
||||
}
|
||||
|
||||
- bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
|
||||
+ bdrv_do_drained_begin_quiesce(bs, parent);
|
||||
|
||||
/*
|
||||
* Wait for drained requests to finish.
|
||||
@@ -391,7 +381,6 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
|
||||
* nodes.
|
||||
*/
|
||||
if (poll) {
|
||||
- assert(!ignore_bds_parents);
|
||||
BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
|
||||
}
|
||||
}
|
||||
@@ -399,20 +388,19 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
|
||||
void bdrv_drained_begin(BlockDriverState *bs)
|
||||
{
|
||||
IO_OR_GS_CODE();
|
||||
- bdrv_do_drained_begin(bs, NULL, false, true);
|
||||
+ bdrv_do_drained_begin(bs, NULL, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* This function does not poll, nor must any of its recursively called
|
||||
* functions.
|
||||
*/
|
||||
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
|
||||
- bool ignore_bds_parents)
|
||||
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
|
||||
{
|
||||
int old_quiesce_counter;
|
||||
|
||||
if (qemu_in_coroutine()) {
|
||||
- bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false);
|
||||
+ bdrv_co_yield_to_drain(bs, false, parent, false);
|
||||
return;
|
||||
}
|
||||
assert(bs->quiesce_counter > 0);
|
||||
@@ -423,9 +411,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
|
||||
if (bs->drv && bs->drv->bdrv_drain_end) {
|
||||
bs->drv->bdrv_drain_end(bs);
|
||||
}
|
||||
- /* TODO Remove ignore_bds_parents, we don't consider it any more */
|
||||
- bdrv_parent_drained_end(bs, parent, false);
|
||||
-
|
||||
+ bdrv_parent_drained_end(bs, parent);
|
||||
aio_enable_external(bdrv_get_aio_context(bs));
|
||||
}
|
||||
}
|
||||
@@ -433,7 +419,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
|
||||
void bdrv_drained_end(BlockDriverState *bs)
|
||||
{
|
||||
IO_OR_GS_CODE();
|
||||
- bdrv_do_drained_end(bs, NULL, false);
|
||||
+ bdrv_do_drained_end(bs, NULL);
|
||||
}
|
||||
|
||||
void bdrv_drain(BlockDriverState *bs)
|
||||
@@ -491,7 +477,7 @@ void bdrv_drain_all_begin(void)
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
if (qemu_in_coroutine()) {
|
||||
- bdrv_co_yield_to_drain(NULL, true, NULL, true, true);
|
||||
+ bdrv_co_yield_to_drain(NULL, true, NULL, true);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -516,7 +502,7 @@ void bdrv_drain_all_begin(void)
|
||||
AioContext *aio_context = bdrv_get_aio_context(bs);
|
||||
|
||||
aio_context_acquire(aio_context);
|
||||
- bdrv_do_drained_begin(bs, NULL, true, false);
|
||||
+ bdrv_do_drained_begin(bs, NULL, false);
|
||||
aio_context_release(aio_context);
|
||||
}
|
||||
|
||||
@@ -536,7 +522,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
|
||||
g_assert(!bs->refcnt);
|
||||
|
||||
while (bs->quiesce_counter) {
|
||||
- bdrv_do_drained_end(bs, NULL, true);
|
||||
+ bdrv_do_drained_end(bs, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -558,7 +544,7 @@ void bdrv_drain_all_end(void)
|
||||
AioContext *aio_context = bdrv_get_aio_context(bs);
|
||||
|
||||
aio_context_acquire(aio_context);
|
||||
- bdrv_do_drained_end(bs, NULL, true);
|
||||
+ bdrv_do_drained_end(bs, NULL);
|
||||
aio_context_release(aio_context);
|
||||
}
|
||||
|
||||
diff --git a/include/block/block-io.h b/include/block/block-io.h
|
||||
index 9c36a16a1f..8f5e75756a 100644
|
||||
--- a/include/block/block-io.h
|
||||
+++ b/include/block/block-io.h
|
||||
@@ -329,8 +329,7 @@ void bdrv_drained_begin(BlockDriverState *bs);
|
||||
* Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
|
||||
* running requests to complete.
|
||||
*/
|
||||
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|
||||
- BdrvChild *parent, bool ignore_bds_parents);
|
||||
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent);
|
||||
|
||||
/**
|
||||
* bdrv_drained_end:
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,106 +0,0 @@
|
||||
From 60b66881fb972e1cdff1cd7b4c865e5e21c141b0 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:10 +0100
|
||||
Subject: [PATCH 28/31] block: Remove poll parameter from
|
||||
bdrv_parent_drained_begin_single()
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [16/16] fd526cc9e5bebeb256cfa56d23ec596f26caa37a (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
All callers of bdrv_parent_drained_begin_single() pass poll=false now,
|
||||
so we don't need the parameter any more.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20221118174110.55183-16-kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 606ed756c1d69cba4822be8923248d2fd714f069)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 4 ++--
|
||||
block/io.c | 8 ++------
|
||||
include/block/block-io.h | 5 ++---
|
||||
3 files changed, 6 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 65588d313a..0d78711416 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -2417,7 +2417,7 @@ static void bdrv_replace_child_abort(void *opaque)
|
||||
* new_bs drained when calling bdrv_replace_child_tran() is not a
|
||||
* requirement any more.
|
||||
*/
|
||||
- bdrv_parent_drained_begin_single(s->child, false);
|
||||
+ bdrv_parent_drained_begin_single(s->child);
|
||||
assert(!bdrv_parent_drained_poll_single(s->child));
|
||||
}
|
||||
assert(s->child->quiesced_parent);
|
||||
@@ -3059,7 +3059,7 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
|
||||
* a problem, we already did this), but it will still poll until the parent
|
||||
* is fully quiesced, so it will not be negatively affected either.
|
||||
*/
|
||||
- bdrv_parent_drained_begin_single(new_child, false);
|
||||
+ bdrv_parent_drained_begin_single(new_child);
|
||||
bdrv_replace_child_noperm(new_child, child_bs);
|
||||
|
||||
BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index ae64830eac..38e57d1f67 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -53,7 +53,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
|
||||
if (c == ignore) {
|
||||
continue;
|
||||
}
|
||||
- bdrv_parent_drained_begin_single(c, false);
|
||||
+ bdrv_parent_drained_begin_single(c);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,9 +105,8 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
|
||||
return busy;
|
||||
}
|
||||
|
||||
-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
|
||||
+void bdrv_parent_drained_begin_single(BdrvChild *c)
|
||||
{
|
||||
- AioContext *ctx = bdrv_child_get_parent_aio_context(c);
|
||||
IO_OR_GS_CODE();
|
||||
|
||||
assert(!c->quiesced_parent);
|
||||
@@ -116,9 +115,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
|
||||
if (c->klass->drained_begin) {
|
||||
c->klass->drained_begin(c);
|
||||
}
|
||||
- if (poll) {
|
||||
- AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c));
|
||||
- }
|
||||
}
|
||||
|
||||
static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
|
||||
diff --git a/include/block/block-io.h b/include/block/block-io.h
|
||||
index 65e6d2569b..92aaa7c1e9 100644
|
||||
--- a/include/block/block-io.h
|
||||
+++ b/include/block/block-io.h
|
||||
@@ -287,10 +287,9 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
|
||||
/**
|
||||
* bdrv_parent_drained_begin_single:
|
||||
*
|
||||
- * Begin a quiesced section for the parent of @c. If @poll is true, wait for
|
||||
- * any pending activity to cease.
|
||||
+ * Begin a quiesced section for the parent of @c.
|
||||
*/
|
||||
-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
|
||||
+void bdrv_parent_drained_begin_single(BdrvChild *c);
|
||||
|
||||
/**
|
||||
* bdrv_parent_drained_poll_single:
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,896 +0,0 @@
|
||||
From 79063522861cb2baf921b204bcdf4c3bfb5697f4 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:05 +0100
|
||||
Subject: [PATCH 23/31] block: Remove subtree drains
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [11/16] d92f5041cceeeec49a65441b22d20f692c0f1c77 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Subtree drains are not used any more. Remove them.
|
||||
|
||||
After this, BdrvChildClass.attach/detach() don't poll any more.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20221118174110.55183-11-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 299403aedaeb7f08d8e98aa8614b29d4e5546066)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 20 +--
|
||||
block/io.c | 121 +++-----------
|
||||
include/block/block-io.h | 18 +--
|
||||
include/block/block_int-common.h | 1 -
|
||||
include/block/block_int-io.h | 12 --
|
||||
tests/unit/test-bdrv-drain.c | 261 ++-----------------------------
|
||||
6 files changed, 44 insertions(+), 389 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 5330e89903..e0e3b21790 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -1232,7 +1232,7 @@ static void bdrv_child_cb_drained_begin(BdrvChild *child)
|
||||
static bool bdrv_child_cb_drained_poll(BdrvChild *child)
|
||||
{
|
||||
BlockDriverState *bs = child->opaque;
|
||||
- return bdrv_drain_poll(bs, false, NULL, false);
|
||||
+ return bdrv_drain_poll(bs, NULL, false);
|
||||
}
|
||||
|
||||
static void bdrv_child_cb_drained_end(BdrvChild *child)
|
||||
@@ -1482,8 +1482,6 @@ static void bdrv_child_cb_attach(BdrvChild *child)
|
||||
assert(!bs->file);
|
||||
bs->file = child;
|
||||
}
|
||||
-
|
||||
- bdrv_apply_subtree_drain(child, bs);
|
||||
}
|
||||
|
||||
static void bdrv_child_cb_detach(BdrvChild *child)
|
||||
@@ -1494,8 +1492,6 @@ static void bdrv_child_cb_detach(BdrvChild *child)
|
||||
bdrv_backing_detach(child);
|
||||
}
|
||||
|
||||
- bdrv_unapply_subtree_drain(child, bs);
|
||||
-
|
||||
assert_bdrv_graph_writable(bs);
|
||||
QLIST_REMOVE(child, next);
|
||||
if (child == bs->backing) {
|
||||
@@ -2851,9 +2847,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
}
|
||||
|
||||
if (old_bs) {
|
||||
- /* Detach first so that the recursive drain sections coming from @child
|
||||
- * are already gone and we only end the drain sections that came from
|
||||
- * elsewhere. */
|
||||
if (child->klass->detach) {
|
||||
child->klass->detach(child);
|
||||
}
|
||||
@@ -2868,17 +2861,14 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
|
||||
|
||||
/*
|
||||
- * Detaching the old node may have led to the new node's
|
||||
- * quiesce_counter having been decreased. Not a problem, we
|
||||
- * just need to recognize this here and then invoke
|
||||
- * drained_end appropriately more often.
|
||||
+ * Polling in bdrv_parent_drained_begin_single() may have led to the new
|
||||
+ * node's quiesce_counter having been decreased. Not a problem, we just
|
||||
+ * need to recognize this here and then invoke drained_end appropriately
|
||||
+ * more often.
|
||||
*/
|
||||
assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
|
||||
drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
|
||||
|
||||
- /* Attach only after starting new drained sections, so that recursive
|
||||
- * drain sections coming from @child don't get an extra .drained_begin
|
||||
- * callback. */
|
||||
if (child->klass->attach) {
|
||||
child->klass->attach(child);
|
||||
}
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index a25103be6f..75224480d0 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -236,17 +236,15 @@ typedef struct {
|
||||
BlockDriverState *bs;
|
||||
bool done;
|
||||
bool begin;
|
||||
- bool recursive;
|
||||
bool poll;
|
||||
BdrvChild *parent;
|
||||
bool ignore_bds_parents;
|
||||
} BdrvCoDrainData;
|
||||
|
||||
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
|
||||
-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
|
||||
- BdrvChild *ignore_parent, bool ignore_bds_parents)
|
||||
+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
|
||||
+ bool ignore_bds_parents)
|
||||
{
|
||||
- BdrvChild *child, *next;
|
||||
IO_OR_GS_CODE();
|
||||
|
||||
if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
|
||||
@@ -257,29 +255,19 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
|
||||
return true;
|
||||
}
|
||||
|
||||
- if (recursive) {
|
||||
- assert(!ignore_bds_parents);
|
||||
- QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
|
||||
- if (bdrv_drain_poll(child->bs, recursive, child, false)) {
|
||||
- return true;
|
||||
- }
|
||||
- }
|
||||
- }
|
||||
-
|
||||
return false;
|
||||
}
|
||||
|
||||
-static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
|
||||
+static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
|
||||
BdrvChild *ignore_parent)
|
||||
{
|
||||
- return bdrv_drain_poll(bs, recursive, ignore_parent, false);
|
||||
+ return bdrv_drain_poll(bs, ignore_parent, false);
|
||||
}
|
||||
|
||||
-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
|
||||
- BdrvChild *parent, bool ignore_bds_parents,
|
||||
- bool poll);
|
||||
-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
|
||||
- BdrvChild *parent, bool ignore_bds_parents);
|
||||
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
|
||||
+ bool ignore_bds_parents, bool poll);
|
||||
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
|
||||
+ bool ignore_bds_parents);
|
||||
|
||||
static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
{
|
||||
@@ -292,12 +280,11 @@ static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
aio_context_acquire(ctx);
|
||||
bdrv_dec_in_flight(bs);
|
||||
if (data->begin) {
|
||||
- bdrv_do_drained_begin(bs, data->recursive, data->parent,
|
||||
- data->ignore_bds_parents, data->poll);
|
||||
+ bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents,
|
||||
+ data->poll);
|
||||
} else {
|
||||
assert(!data->poll);
|
||||
- bdrv_do_drained_end(bs, data->recursive, data->parent,
|
||||
- data->ignore_bds_parents);
|
||||
+ bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents);
|
||||
}
|
||||
aio_context_release(ctx);
|
||||
} else {
|
||||
@@ -310,7 +297,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
}
|
||||
|
||||
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
- bool begin, bool recursive,
|
||||
+ bool begin,
|
||||
BdrvChild *parent,
|
||||
bool ignore_bds_parents,
|
||||
bool poll)
|
||||
@@ -329,7 +316,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
.bs = bs,
|
||||
.done = false,
|
||||
.begin = begin,
|
||||
- .recursive = recursive,
|
||||
.parent = parent,
|
||||
.ignore_bds_parents = ignore_bds_parents,
|
||||
.poll = poll,
|
||||
@@ -380,29 +366,16 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|
||||
}
|
||||
}
|
||||
|
||||
-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
|
||||
- BdrvChild *parent, bool ignore_bds_parents,
|
||||
- bool poll)
|
||||
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
|
||||
+ bool ignore_bds_parents, bool poll)
|
||||
{
|
||||
- BdrvChild *child, *next;
|
||||
-
|
||||
if (qemu_in_coroutine()) {
|
||||
- bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
|
||||
- poll);
|
||||
+ bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll);
|
||||
return;
|
||||
}
|
||||
|
||||
bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
|
||||
|
||||
- if (recursive) {
|
||||
- assert(!ignore_bds_parents);
|
||||
- bs->recursive_quiesce_counter++;
|
||||
- QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
|
||||
- bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
|
||||
- false);
|
||||
- }
|
||||
- }
|
||||
-
|
||||
/*
|
||||
* Wait for drained requests to finish.
|
||||
*
|
||||
@@ -414,35 +387,27 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
|
||||
*/
|
||||
if (poll) {
|
||||
assert(!ignore_bds_parents);
|
||||
- BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
|
||||
+ BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
|
||||
}
|
||||
}
|
||||
|
||||
void bdrv_drained_begin(BlockDriverState *bs)
|
||||
{
|
||||
IO_OR_GS_CODE();
|
||||
- bdrv_do_drained_begin(bs, false, NULL, false, true);
|
||||
-}
|
||||
-
|
||||
-void bdrv_subtree_drained_begin(BlockDriverState *bs)
|
||||
-{
|
||||
- IO_OR_GS_CODE();
|
||||
- bdrv_do_drained_begin(bs, true, NULL, false, true);
|
||||
+ bdrv_do_drained_begin(bs, NULL, false, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* This function does not poll, nor must any of its recursively called
|
||||
* functions.
|
||||
*/
|
||||
-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
|
||||
- BdrvChild *parent, bool ignore_bds_parents)
|
||||
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
|
||||
+ bool ignore_bds_parents)
|
||||
{
|
||||
- BdrvChild *child;
|
||||
int old_quiesce_counter;
|
||||
|
||||
if (qemu_in_coroutine()) {
|
||||
- bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
|
||||
- false);
|
||||
+ bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false);
|
||||
return;
|
||||
}
|
||||
assert(bs->quiesce_counter > 0);
|
||||
@@ -457,46 +422,12 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
|
||||
if (old_quiesce_counter == 1) {
|
||||
aio_enable_external(bdrv_get_aio_context(bs));
|
||||
}
|
||||
-
|
||||
- if (recursive) {
|
||||
- assert(!ignore_bds_parents);
|
||||
- bs->recursive_quiesce_counter--;
|
||||
- QLIST_FOREACH(child, &bs->children, next) {
|
||||
- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents);
|
||||
- }
|
||||
- }
|
||||
}
|
||||
|
||||
void bdrv_drained_end(BlockDriverState *bs)
|
||||
{
|
||||
IO_OR_GS_CODE();
|
||||
- bdrv_do_drained_end(bs, false, NULL, false);
|
||||
-}
|
||||
-
|
||||
-void bdrv_subtree_drained_end(BlockDriverState *bs)
|
||||
-{
|
||||
- IO_OR_GS_CODE();
|
||||
- bdrv_do_drained_end(bs, true, NULL, false);
|
||||
-}
|
||||
-
|
||||
-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
|
||||
-{
|
||||
- int i;
|
||||
- IO_OR_GS_CODE();
|
||||
-
|
||||
- for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
|
||||
- bdrv_do_drained_begin(child->bs, true, child, false, true);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
|
||||
-{
|
||||
- int i;
|
||||
- IO_OR_GS_CODE();
|
||||
-
|
||||
- for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
|
||||
- bdrv_do_drained_end(child->bs, true, child, false);
|
||||
- }
|
||||
+ bdrv_do_drained_end(bs, NULL, false);
|
||||
}
|
||||
|
||||
void bdrv_drain(BlockDriverState *bs)
|
||||
@@ -529,7 +460,7 @@ static bool bdrv_drain_all_poll(void)
|
||||
while ((bs = bdrv_next_all_states(bs))) {
|
||||
AioContext *aio_context = bdrv_get_aio_context(bs);
|
||||
aio_context_acquire(aio_context);
|
||||
- result |= bdrv_drain_poll(bs, false, NULL, true);
|
||||
+ result |= bdrv_drain_poll(bs, NULL, true);
|
||||
aio_context_release(aio_context);
|
||||
}
|
||||
|
||||
@@ -554,7 +485,7 @@ void bdrv_drain_all_begin(void)
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
if (qemu_in_coroutine()) {
|
||||
- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true);
|
||||
+ bdrv_co_yield_to_drain(NULL, true, NULL, true, true);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -579,7 +510,7 @@ void bdrv_drain_all_begin(void)
|
||||
AioContext *aio_context = bdrv_get_aio_context(bs);
|
||||
|
||||
aio_context_acquire(aio_context);
|
||||
- bdrv_do_drained_begin(bs, false, NULL, true, false);
|
||||
+ bdrv_do_drained_begin(bs, NULL, true, false);
|
||||
aio_context_release(aio_context);
|
||||
}
|
||||
|
||||
@@ -599,7 +530,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
|
||||
g_assert(!bs->refcnt);
|
||||
|
||||
while (bs->quiesce_counter) {
|
||||
- bdrv_do_drained_end(bs, false, NULL, true);
|
||||
+ bdrv_do_drained_end(bs, NULL, true);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -621,7 +552,7 @@ void bdrv_drain_all_end(void)
|
||||
AioContext *aio_context = bdrv_get_aio_context(bs);
|
||||
|
||||
aio_context_acquire(aio_context);
|
||||
- bdrv_do_drained_end(bs, false, NULL, true);
|
||||
+ bdrv_do_drained_end(bs, NULL, true);
|
||||
aio_context_release(aio_context);
|
||||
}
|
||||
|
||||
diff --git a/include/block/block-io.h b/include/block/block-io.h
|
||||
index 054e964c9b..9c36a16a1f 100644
|
||||
--- a/include/block/block-io.h
|
||||
+++ b/include/block/block-io.h
|
||||
@@ -302,8 +302,7 @@ void bdrv_parent_drained_end_single(BdrvChild *c);
|
||||
/**
|
||||
* bdrv_drain_poll:
|
||||
*
|
||||
- * Poll for pending requests in @bs, its parents (except for @ignore_parent),
|
||||
- * and if @recursive is true its children as well (used for subtree drain).
|
||||
+ * Poll for pending requests in @bs and its parents (except for @ignore_parent).
|
||||
*
|
||||
* If @ignore_bds_parents is true, parents that are BlockDriverStates must
|
||||
* ignore the drain request because they will be drained separately (used for
|
||||
@@ -311,8 +310,8 @@ void bdrv_parent_drained_end_single(BdrvChild *c);
|
||||
*
|
||||
* This is part of bdrv_drained_begin.
|
||||
*/
|
||||
-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
|
||||
- BdrvChild *ignore_parent, bool ignore_bds_parents);
|
||||
+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
|
||||
+ bool ignore_bds_parents);
|
||||
|
||||
/**
|
||||
* bdrv_drained_begin:
|
||||
@@ -333,12 +332,6 @@ void bdrv_drained_begin(BlockDriverState *bs);
|
||||
void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|
||||
BdrvChild *parent, bool ignore_bds_parents);
|
||||
|
||||
-/**
|
||||
- * Like bdrv_drained_begin, but recursively begins a quiesced section for
|
||||
- * exclusive access to all child nodes as well.
|
||||
- */
|
||||
-void bdrv_subtree_drained_begin(BlockDriverState *bs);
|
||||
-
|
||||
/**
|
||||
* bdrv_drained_end:
|
||||
*
|
||||
@@ -346,9 +339,4 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs);
|
||||
*/
|
||||
void bdrv_drained_end(BlockDriverState *bs);
|
||||
|
||||
-/**
|
||||
- * End a quiescent section started by bdrv_subtree_drained_begin().
|
||||
- */
|
||||
-void bdrv_subtree_drained_end(BlockDriverState *bs);
|
||||
-
|
||||
#endif /* BLOCK_IO_H */
|
||||
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
|
||||
index 2b97576f6d..791dddfd7d 100644
|
||||
--- a/include/block/block_int-common.h
|
||||
+++ b/include/block/block_int-common.h
|
||||
@@ -1184,7 +1184,6 @@ struct BlockDriverState {
|
||||
|
||||
/* Accessed with atomic ops. */
|
||||
int quiesce_counter;
|
||||
- int recursive_quiesce_counter;
|
||||
|
||||
unsigned int write_gen; /* Current data generation */
|
||||
|
||||
diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
|
||||
index 4b0b3e17ef..8bc061ebb8 100644
|
||||
--- a/include/block/block_int-io.h
|
||||
+++ b/include/block/block_int-io.h
|
||||
@@ -179,16 +179,4 @@ void bdrv_bsc_invalidate_range(BlockDriverState *bs,
|
||||
*/
|
||||
void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes);
|
||||
|
||||
-
|
||||
-/*
|
||||
- * "I/O or GS" API functions. These functions can run without
|
||||
- * the BQL, but only in one specific iothread/main loop.
|
||||
- *
|
||||
- * See include/block/block-io.h for more information about
|
||||
- * the "I/O or GS" API.
|
||||
- */
|
||||
-
|
||||
-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
|
||||
-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
|
||||
-
|
||||
#endif /* BLOCK_INT_IO_H */
|
||||
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
|
||||
index 695519ee02..dda08de8db 100644
|
||||
--- a/tests/unit/test-bdrv-drain.c
|
||||
+++ b/tests/unit/test-bdrv-drain.c
|
||||
@@ -156,7 +156,6 @@ static void call_in_coroutine(void (*entry)(void))
|
||||
enum drain_type {
|
||||
BDRV_DRAIN_ALL,
|
||||
BDRV_DRAIN,
|
||||
- BDRV_SUBTREE_DRAIN,
|
||||
DRAIN_TYPE_MAX,
|
||||
};
|
||||
|
||||
@@ -165,7 +164,6 @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
|
||||
switch (drain_type) {
|
||||
case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break;
|
||||
case BDRV_DRAIN: bdrv_drained_begin(bs); break;
|
||||
- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break;
|
||||
default: g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
@@ -175,7 +173,6 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
|
||||
switch (drain_type) {
|
||||
case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break;
|
||||
case BDRV_DRAIN: bdrv_drained_end(bs); break;
|
||||
- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break;
|
||||
default: g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
@@ -271,11 +268,6 @@ static void test_drv_cb_drain(void)
|
||||
test_drv_cb_common(BDRV_DRAIN, false);
|
||||
}
|
||||
|
||||
-static void test_drv_cb_drain_subtree(void)
|
||||
-{
|
||||
- test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
|
||||
-}
|
||||
-
|
||||
static void test_drv_cb_co_drain_all(void)
|
||||
{
|
||||
call_in_coroutine(test_drv_cb_drain_all);
|
||||
@@ -286,11 +278,6 @@ static void test_drv_cb_co_drain(void)
|
||||
call_in_coroutine(test_drv_cb_drain);
|
||||
}
|
||||
|
||||
-static void test_drv_cb_co_drain_subtree(void)
|
||||
-{
|
||||
- call_in_coroutine(test_drv_cb_drain_subtree);
|
||||
-}
|
||||
-
|
||||
static void test_quiesce_common(enum drain_type drain_type, bool recursive)
|
||||
{
|
||||
BlockBackend *blk;
|
||||
@@ -332,11 +319,6 @@ static void test_quiesce_drain(void)
|
||||
test_quiesce_common(BDRV_DRAIN, false);
|
||||
}
|
||||
|
||||
-static void test_quiesce_drain_subtree(void)
|
||||
-{
|
||||
- test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
|
||||
-}
|
||||
-
|
||||
static void test_quiesce_co_drain_all(void)
|
||||
{
|
||||
call_in_coroutine(test_quiesce_drain_all);
|
||||
@@ -347,11 +329,6 @@ static void test_quiesce_co_drain(void)
|
||||
call_in_coroutine(test_quiesce_drain);
|
||||
}
|
||||
|
||||
-static void test_quiesce_co_drain_subtree(void)
|
||||
-{
|
||||
- call_in_coroutine(test_quiesce_drain_subtree);
|
||||
-}
|
||||
-
|
||||
static void test_nested(void)
|
||||
{
|
||||
BlockBackend *blk;
|
||||
@@ -402,158 +379,6 @@ static void test_nested(void)
|
||||
blk_unref(blk);
|
||||
}
|
||||
|
||||
-static void test_multiparent(void)
|
||||
-{
|
||||
- BlockBackend *blk_a, *blk_b;
|
||||
- BlockDriverState *bs_a, *bs_b, *backing;
|
||||
- BDRVTestState *a_s, *b_s, *backing_s;
|
||||
-
|
||||
- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
|
||||
- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
|
||||
- &error_abort);
|
||||
- a_s = bs_a->opaque;
|
||||
- blk_insert_bs(blk_a, bs_a, &error_abort);
|
||||
-
|
||||
- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
|
||||
- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
|
||||
- &error_abort);
|
||||
- b_s = bs_b->opaque;
|
||||
- blk_insert_bs(blk_b, bs_b, &error_abort);
|
||||
-
|
||||
- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
|
||||
- backing_s = backing->opaque;
|
||||
- bdrv_set_backing_hd(bs_a, backing, &error_abort);
|
||||
- bdrv_set_backing_hd(bs_b, backing, &error_abort);
|
||||
-
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 0);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 0);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 0);
|
||||
-
|
||||
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
|
||||
-
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 1);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 1);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 1);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 1);
|
||||
-
|
||||
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
|
||||
-
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 2);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 2);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 2);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 2);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 2);
|
||||
-
|
||||
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
|
||||
-
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 1);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 1);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 1);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 1);
|
||||
-
|
||||
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
|
||||
-
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 0);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 0);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 0);
|
||||
-
|
||||
- bdrv_unref(backing);
|
||||
- bdrv_unref(bs_a);
|
||||
- bdrv_unref(bs_b);
|
||||
- blk_unref(blk_a);
|
||||
- blk_unref(blk_b);
|
||||
-}
|
||||
-
|
||||
-static void test_graph_change_drain_subtree(void)
|
||||
-{
|
||||
- BlockBackend *blk_a, *blk_b;
|
||||
- BlockDriverState *bs_a, *bs_b, *backing;
|
||||
- BDRVTestState *a_s, *b_s, *backing_s;
|
||||
-
|
||||
- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
|
||||
- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
|
||||
- &error_abort);
|
||||
- a_s = bs_a->opaque;
|
||||
- blk_insert_bs(blk_a, bs_a, &error_abort);
|
||||
-
|
||||
- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
|
||||
- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
|
||||
- &error_abort);
|
||||
- b_s = bs_b->opaque;
|
||||
- blk_insert_bs(blk_b, bs_b, &error_abort);
|
||||
-
|
||||
- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
|
||||
- backing_s = backing->opaque;
|
||||
- bdrv_set_backing_hd(bs_a, backing, &error_abort);
|
||||
-
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 0);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 0);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 0);
|
||||
-
|
||||
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
|
||||
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
|
||||
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
|
||||
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
|
||||
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
|
||||
-
|
||||
- bdrv_set_backing_hd(bs_b, backing, &error_abort);
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 5);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 5);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 5);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 5);
|
||||
-
|
||||
- bdrv_set_backing_hd(bs_b, NULL, &error_abort);
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 3);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 3);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 3);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 2);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 3);
|
||||
-
|
||||
- bdrv_set_backing_hd(bs_b, backing, &error_abort);
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 5);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 5);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 5);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 5);
|
||||
-
|
||||
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
|
||||
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
|
||||
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
|
||||
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
|
||||
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
|
||||
-
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 0);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 0);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 0);
|
||||
-
|
||||
- bdrv_unref(backing);
|
||||
- bdrv_unref(bs_a);
|
||||
- bdrv_unref(bs_b);
|
||||
- blk_unref(blk_a);
|
||||
- blk_unref(blk_b);
|
||||
-}
|
||||
-
|
||||
static void test_graph_change_drain_all(void)
|
||||
{
|
||||
BlockBackend *blk_a, *blk_b;
|
||||
@@ -773,12 +598,6 @@ static void test_iothread_drain(void)
|
||||
test_iothread_common(BDRV_DRAIN, 1);
|
||||
}
|
||||
|
||||
-static void test_iothread_drain_subtree(void)
|
||||
-{
|
||||
- test_iothread_common(BDRV_SUBTREE_DRAIN, 0);
|
||||
- test_iothread_common(BDRV_SUBTREE_DRAIN, 1);
|
||||
-}
|
||||
-
|
||||
|
||||
typedef struct TestBlockJob {
|
||||
BlockJob common;
|
||||
@@ -863,7 +682,6 @@ enum test_job_result {
|
||||
enum test_job_drain_node {
|
||||
TEST_JOB_DRAIN_SRC,
|
||||
TEST_JOB_DRAIN_SRC_CHILD,
|
||||
- TEST_JOB_DRAIN_SRC_PARENT,
|
||||
};
|
||||
|
||||
static void test_blockjob_common_drain_node(enum drain_type drain_type,
|
||||
@@ -901,9 +719,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
|
||||
case TEST_JOB_DRAIN_SRC_CHILD:
|
||||
drain_bs = src_backing;
|
||||
break;
|
||||
- case TEST_JOB_DRAIN_SRC_PARENT:
|
||||
- drain_bs = src_overlay;
|
||||
- break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
@@ -1055,10 +870,6 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
|
||||
TEST_JOB_DRAIN_SRC);
|
||||
test_blockjob_common_drain_node(drain_type, use_iothread, result,
|
||||
TEST_JOB_DRAIN_SRC_CHILD);
|
||||
- if (drain_type == BDRV_SUBTREE_DRAIN) {
|
||||
- test_blockjob_common_drain_node(drain_type, use_iothread, result,
|
||||
- TEST_JOB_DRAIN_SRC_PARENT);
|
||||
- }
|
||||
}
|
||||
|
||||
static void test_blockjob_drain_all(void)
|
||||
@@ -1071,11 +882,6 @@ static void test_blockjob_drain(void)
|
||||
test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS);
|
||||
}
|
||||
|
||||
-static void test_blockjob_drain_subtree(void)
|
||||
-{
|
||||
- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS);
|
||||
-}
|
||||
-
|
||||
static void test_blockjob_error_drain_all(void)
|
||||
{
|
||||
test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN);
|
||||
@@ -1088,12 +894,6 @@ static void test_blockjob_error_drain(void)
|
||||
test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE);
|
||||
}
|
||||
|
||||
-static void test_blockjob_error_drain_subtree(void)
|
||||
-{
|
||||
- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN);
|
||||
- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE);
|
||||
-}
|
||||
-
|
||||
static void test_blockjob_iothread_drain_all(void)
|
||||
{
|
||||
test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS);
|
||||
@@ -1104,11 +904,6 @@ static void test_blockjob_iothread_drain(void)
|
||||
test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS);
|
||||
}
|
||||
|
||||
-static void test_blockjob_iothread_drain_subtree(void)
|
||||
-{
|
||||
- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS);
|
||||
-}
|
||||
-
|
||||
static void test_blockjob_iothread_error_drain_all(void)
|
||||
{
|
||||
test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN);
|
||||
@@ -1121,12 +916,6 @@ static void test_blockjob_iothread_error_drain(void)
|
||||
test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE);
|
||||
}
|
||||
|
||||
-static void test_blockjob_iothread_error_drain_subtree(void)
|
||||
-{
|
||||
- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN);
|
||||
- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE);
|
||||
-}
|
||||
-
|
||||
|
||||
typedef struct BDRVTestTopState {
|
||||
BdrvChild *wait_child;
|
||||
@@ -1273,14 +1062,6 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete,
|
||||
bdrv_drain(child_bs);
|
||||
bdrv_unref(child_bs);
|
||||
break;
|
||||
- case BDRV_SUBTREE_DRAIN:
|
||||
- /* Would have to ref/unref bs here for !detach_instead_of_delete, but
|
||||
- * then the whole test becomes pointless because the graph changes
|
||||
- * don't occur during the drain any more. */
|
||||
- assert(detach_instead_of_delete);
|
||||
- bdrv_subtree_drained_begin(bs);
|
||||
- bdrv_subtree_drained_end(bs);
|
||||
- break;
|
||||
case BDRV_DRAIN_ALL:
|
||||
bdrv_drain_all_begin();
|
||||
bdrv_drain_all_end();
|
||||
@@ -1315,11 +1096,6 @@ static void test_detach_by_drain(void)
|
||||
do_test_delete_by_drain(true, BDRV_DRAIN);
|
||||
}
|
||||
|
||||
-static void test_detach_by_drain_subtree(void)
|
||||
-{
|
||||
- do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN);
|
||||
-}
|
||||
-
|
||||
|
||||
struct detach_by_parent_data {
|
||||
BlockDriverState *parent_b;
|
||||
@@ -1452,7 +1228,10 @@ static void test_detach_indirect(bool by_parent_cb)
|
||||
g_assert(acb != NULL);
|
||||
|
||||
/* Drain and check the expected result */
|
||||
- bdrv_subtree_drained_begin(parent_b);
|
||||
+ bdrv_drained_begin(parent_b);
|
||||
+ bdrv_drained_begin(a);
|
||||
+ bdrv_drained_begin(b);
|
||||
+ bdrv_drained_begin(c);
|
||||
|
||||
g_assert(detach_by_parent_data.child_c != NULL);
|
||||
|
||||
@@ -1467,12 +1246,15 @@ static void test_detach_indirect(bool by_parent_cb)
|
||||
g_assert(QLIST_NEXT(child_a, next) == NULL);
|
||||
|
||||
g_assert_cmpint(parent_a->quiesce_counter, ==, 1);
|
||||
- g_assert_cmpint(parent_b->quiesce_counter, ==, 1);
|
||||
+ g_assert_cmpint(parent_b->quiesce_counter, ==, 3);
|
||||
g_assert_cmpint(a->quiesce_counter, ==, 1);
|
||||
- g_assert_cmpint(b->quiesce_counter, ==, 0);
|
||||
+ g_assert_cmpint(b->quiesce_counter, ==, 1);
|
||||
g_assert_cmpint(c->quiesce_counter, ==, 1);
|
||||
|
||||
- bdrv_subtree_drained_end(parent_b);
|
||||
+ bdrv_drained_end(parent_b);
|
||||
+ bdrv_drained_end(a);
|
||||
+ bdrv_drained_end(b);
|
||||
+ bdrv_drained_end(c);
|
||||
|
||||
bdrv_unref(parent_b);
|
||||
blk_unref(blk);
|
||||
@@ -2202,70 +1984,47 @@ int main(int argc, char **argv)
|
||||
|
||||
g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
|
||||
g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
|
||||
- g_test_add_func("/bdrv-drain/driver-cb/drain_subtree",
|
||||
- test_drv_cb_drain_subtree);
|
||||
|
||||
g_test_add_func("/bdrv-drain/driver-cb/co/drain_all",
|
||||
test_drv_cb_co_drain_all);
|
||||
g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain);
|
||||
- g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree",
|
||||
- test_drv_cb_co_drain_subtree);
|
||||
-
|
||||
|
||||
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
|
||||
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
|
||||
- g_test_add_func("/bdrv-drain/quiesce/drain_subtree",
|
||||
- test_quiesce_drain_subtree);
|
||||
|
||||
g_test_add_func("/bdrv-drain/quiesce/co/drain_all",
|
||||
test_quiesce_co_drain_all);
|
||||
g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain);
|
||||
- g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree",
|
||||
- test_quiesce_co_drain_subtree);
|
||||
|
||||
g_test_add_func("/bdrv-drain/nested", test_nested);
|
||||
- g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
|
||||
|
||||
- g_test_add_func("/bdrv-drain/graph-change/drain_subtree",
|
||||
- test_graph_change_drain_subtree);
|
||||
g_test_add_func("/bdrv-drain/graph-change/drain_all",
|
||||
test_graph_change_drain_all);
|
||||
|
||||
g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all);
|
||||
g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain);
|
||||
- g_test_add_func("/bdrv-drain/iothread/drain_subtree",
|
||||
- test_iothread_drain_subtree);
|
||||
|
||||
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
|
||||
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
|
||||
- g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
|
||||
- test_blockjob_drain_subtree);
|
||||
|
||||
g_test_add_func("/bdrv-drain/blockjob/error/drain_all",
|
||||
test_blockjob_error_drain_all);
|
||||
g_test_add_func("/bdrv-drain/blockjob/error/drain",
|
||||
test_blockjob_error_drain);
|
||||
- g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree",
|
||||
- test_blockjob_error_drain_subtree);
|
||||
|
||||
g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all",
|
||||
test_blockjob_iothread_drain_all);
|
||||
g_test_add_func("/bdrv-drain/blockjob/iothread/drain",
|
||||
test_blockjob_iothread_drain);
|
||||
- g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree",
|
||||
- test_blockjob_iothread_drain_subtree);
|
||||
|
||||
g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all",
|
||||
test_blockjob_iothread_error_drain_all);
|
||||
g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain",
|
||||
test_blockjob_iothread_error_drain);
|
||||
- g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree",
|
||||
- test_blockjob_iothread_error_drain_subtree);
|
||||
|
||||
g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain);
|
||||
g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all);
|
||||
g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain);
|
||||
- g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree);
|
||||
g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb);
|
||||
g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb);
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,302 +0,0 @@
|
||||
From 0e894c93cae97bb792dc483be8e295d097ebd7a1 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:40:58 +0100
|
||||
Subject: [PATCH 16/31] block: Revert .bdrv_drained_begin/end to
|
||||
non-coroutine_fn
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [4/16] 86d6049e40a99604e414c2572b67f74b85868832 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Polling during bdrv_drained_end() can be problematic (and in the future,
|
||||
we may get cases for bdrv_drained_begin() where polling is forbidden,
|
||||
and we don't care about already in-flight requests, but just want to
|
||||
prevent new requests from arriving).
|
||||
|
||||
The .bdrv_drained_begin/end callbacks running in a coroutine is the only
|
||||
reason why we have to do this polling, so make them non-coroutine
|
||||
callbacks again. None of the callers actually yield any more.
|
||||
|
||||
This means that bdrv_drained_end() effectively doesn't poll any more,
|
||||
even if AIO_WAIT_WHILE() loops are still there (their condition is false
|
||||
from the beginning). This is generally not a problem, but in
|
||||
test-bdrv-drain, some additional explicit aio_poll() calls need to be
|
||||
added because the test case wants to verify the final state after BHs
|
||||
have executed.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20221118174110.55183-4-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 5e8ac21717373cbe96ef7a91e216bf5788815d63)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 4 +--
|
||||
block/io.c | 49 +++++---------------------------
|
||||
block/qed.c | 6 ++--
|
||||
block/throttle.c | 8 +++---
|
||||
include/block/block_int-common.h | 10 ++++---
|
||||
tests/unit/test-bdrv-drain.c | 18 ++++++------
|
||||
6 files changed, 32 insertions(+), 63 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index ec184150a2..16a62a329c 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -1713,8 +1713,8 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
|
||||
assert(is_power_of_2(bs->bl.request_alignment));
|
||||
|
||||
for (i = 0; i < bs->quiesce_counter; i++) {
|
||||
- if (drv->bdrv_co_drain_begin) {
|
||||
- drv->bdrv_co_drain_begin(bs);
|
||||
+ if (drv->bdrv_drain_begin) {
|
||||
+ drv->bdrv_drain_begin(bs);
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index b9424024f9..c2ed4b2af9 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -252,55 +252,20 @@ typedef struct {
|
||||
int *drained_end_counter;
|
||||
} BdrvCoDrainData;
|
||||
|
||||
-static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
|
||||
-{
|
||||
- BdrvCoDrainData *data = opaque;
|
||||
- BlockDriverState *bs = data->bs;
|
||||
-
|
||||
- if (data->begin) {
|
||||
- bs->drv->bdrv_co_drain_begin(bs);
|
||||
- } else {
|
||||
- bs->drv->bdrv_co_drain_end(bs);
|
||||
- }
|
||||
-
|
||||
- /* Set data->done and decrement drained_end_counter before bdrv_wakeup() */
|
||||
- qatomic_mb_set(&data->done, true);
|
||||
- if (!data->begin) {
|
||||
- qatomic_dec(data->drained_end_counter);
|
||||
- }
|
||||
- bdrv_dec_in_flight(bs);
|
||||
-
|
||||
- g_free(data);
|
||||
-}
|
||||
-
|
||||
-/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
|
||||
+/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */
|
||||
static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
|
||||
int *drained_end_counter)
|
||||
{
|
||||
- BdrvCoDrainData *data;
|
||||
-
|
||||
- if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
|
||||
- (!begin && !bs->drv->bdrv_co_drain_end)) {
|
||||
+ if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) ||
|
||||
+ (!begin && !bs->drv->bdrv_drain_end)) {
|
||||
return;
|
||||
}
|
||||
|
||||
- data = g_new(BdrvCoDrainData, 1);
|
||||
- *data = (BdrvCoDrainData) {
|
||||
- .bs = bs,
|
||||
- .done = false,
|
||||
- .begin = begin,
|
||||
- .drained_end_counter = drained_end_counter,
|
||||
- };
|
||||
-
|
||||
- if (!begin) {
|
||||
- qatomic_inc(drained_end_counter);
|
||||
+ if (begin) {
|
||||
+ bs->drv->bdrv_drain_begin(bs);
|
||||
+ } else {
|
||||
+ bs->drv->bdrv_drain_end(bs);
|
||||
}
|
||||
-
|
||||
- /* Make sure the driver callback completes during the polling phase for
|
||||
- * drain_begin. */
|
||||
- bdrv_inc_in_flight(bs);
|
||||
- data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
|
||||
- aio_co_schedule(bdrv_get_aio_context(bs), data->co);
|
||||
}
|
||||
|
||||
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
|
||||
diff --git a/block/qed.c b/block/qed.c
|
||||
index 013f826c44..c2691a85b1 100644
|
||||
--- a/block/qed.c
|
||||
+++ b/block/qed.c
|
||||
@@ -262,7 +262,7 @@ static bool coroutine_fn qed_plug_allocating_write_reqs(BDRVQEDState *s)
|
||||
assert(!s->allocating_write_reqs_plugged);
|
||||
if (s->allocating_acb != NULL) {
|
||||
/* Another allocating write came concurrently. This cannot happen
|
||||
- * from bdrv_qed_co_drain_begin, but it can happen when the timer runs.
|
||||
+ * from bdrv_qed_drain_begin, but it can happen when the timer runs.
|
||||
*/
|
||||
qemu_co_mutex_unlock(&s->table_lock);
|
||||
return false;
|
||||
@@ -365,7 +365,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
|
||||
}
|
||||
}
|
||||
|
||||
-static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs)
|
||||
+static void bdrv_qed_drain_begin(BlockDriverState *bs)
|
||||
{
|
||||
BDRVQEDState *s = bs->opaque;
|
||||
|
||||
@@ -1661,7 +1661,7 @@ static BlockDriver bdrv_qed = {
|
||||
.bdrv_co_check = bdrv_qed_co_check,
|
||||
.bdrv_detach_aio_context = bdrv_qed_detach_aio_context,
|
||||
.bdrv_attach_aio_context = bdrv_qed_attach_aio_context,
|
||||
- .bdrv_co_drain_begin = bdrv_qed_co_drain_begin,
|
||||
+ .bdrv_drain_begin = bdrv_qed_drain_begin,
|
||||
};
|
||||
|
||||
static void bdrv_qed_init(void)
|
||||
diff --git a/block/throttle.c b/block/throttle.c
|
||||
index 131eba3ab4..88851c84f4 100644
|
||||
--- a/block/throttle.c
|
||||
+++ b/block/throttle.c
|
||||
@@ -214,7 +214,7 @@ static void throttle_reopen_abort(BDRVReopenState *reopen_state)
|
||||
reopen_state->opaque = NULL;
|
||||
}
|
||||
|
||||
-static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs)
|
||||
+static void throttle_drain_begin(BlockDriverState *bs)
|
||||
{
|
||||
ThrottleGroupMember *tgm = bs->opaque;
|
||||
if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) {
|
||||
@@ -222,7 +222,7 @@ static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs)
|
||||
}
|
||||
}
|
||||
|
||||
-static void coroutine_fn throttle_co_drain_end(BlockDriverState *bs)
|
||||
+static void throttle_drain_end(BlockDriverState *bs)
|
||||
{
|
||||
ThrottleGroupMember *tgm = bs->opaque;
|
||||
assert(tgm->io_limits_disabled);
|
||||
@@ -261,8 +261,8 @@ static BlockDriver bdrv_throttle = {
|
||||
.bdrv_reopen_commit = throttle_reopen_commit,
|
||||
.bdrv_reopen_abort = throttle_reopen_abort,
|
||||
|
||||
- .bdrv_co_drain_begin = throttle_co_drain_begin,
|
||||
- .bdrv_co_drain_end = throttle_co_drain_end,
|
||||
+ .bdrv_drain_begin = throttle_drain_begin,
|
||||
+ .bdrv_drain_end = throttle_drain_end,
|
||||
|
||||
.is_filter = true,
|
||||
.strong_runtime_opts = throttle_strong_runtime_opts,
|
||||
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
|
||||
index 31ae91e56e..40d646d1ed 100644
|
||||
--- a/include/block/block_int-common.h
|
||||
+++ b/include/block/block_int-common.h
|
||||
@@ -735,17 +735,19 @@ struct BlockDriver {
|
||||
void (*bdrv_io_unplug)(BlockDriverState *bs);
|
||||
|
||||
/**
|
||||
- * bdrv_co_drain_begin is called if implemented in the beginning of a
|
||||
+ * bdrv_drain_begin is called if implemented in the beginning of a
|
||||
* drain operation to drain and stop any internal sources of requests in
|
||||
* the driver.
|
||||
- * bdrv_co_drain_end is called if implemented at the end of the drain.
|
||||
+ * bdrv_drain_end is called if implemented at the end of the drain.
|
||||
*
|
||||
* They should be used by the driver to e.g. manage scheduled I/O
|
||||
* requests, or toggle an internal state. After the end of the drain new
|
||||
* requests will continue normally.
|
||||
+ *
|
||||
+ * Implementations of both functions must not call aio_poll().
|
||||
*/
|
||||
- void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs);
|
||||
- void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
|
||||
+ void (*bdrv_drain_begin)(BlockDriverState *bs);
|
||||
+ void (*bdrv_drain_end)(BlockDriverState *bs);
|
||||
|
||||
bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
|
||||
bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)(
|
||||
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
|
||||
index 24f34e24ad..695519ee02 100644
|
||||
--- a/tests/unit/test-bdrv-drain.c
|
||||
+++ b/tests/unit/test-bdrv-drain.c
|
||||
@@ -46,7 +46,7 @@ static void coroutine_fn sleep_in_drain_begin(void *opaque)
|
||||
bdrv_dec_in_flight(bs);
|
||||
}
|
||||
|
||||
-static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
|
||||
+static void bdrv_test_drain_begin(BlockDriverState *bs)
|
||||
{
|
||||
BDRVTestState *s = bs->opaque;
|
||||
s->drain_count++;
|
||||
@@ -57,7 +57,7 @@ static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
|
||||
}
|
||||
}
|
||||
|
||||
-static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs)
|
||||
+static void bdrv_test_drain_end(BlockDriverState *bs)
|
||||
{
|
||||
BDRVTestState *s = bs->opaque;
|
||||
s->drain_count--;
|
||||
@@ -111,8 +111,8 @@ static BlockDriver bdrv_test = {
|
||||
.bdrv_close = bdrv_test_close,
|
||||
.bdrv_co_preadv = bdrv_test_co_preadv,
|
||||
|
||||
- .bdrv_co_drain_begin = bdrv_test_co_drain_begin,
|
||||
- .bdrv_co_drain_end = bdrv_test_co_drain_end,
|
||||
+ .bdrv_drain_begin = bdrv_test_drain_begin,
|
||||
+ .bdrv_drain_end = bdrv_test_drain_end,
|
||||
|
||||
.bdrv_child_perm = bdrv_default_perms,
|
||||
|
||||
@@ -1703,6 +1703,7 @@ static void test_blockjob_commit_by_drained_end(void)
|
||||
bdrv_drained_begin(bs_child);
|
||||
g_assert(!job_has_completed);
|
||||
bdrv_drained_end(bs_child);
|
||||
+ aio_poll(qemu_get_aio_context(), false);
|
||||
g_assert(job_has_completed);
|
||||
|
||||
bdrv_unref(bs_parents[0]);
|
||||
@@ -1858,6 +1859,7 @@ static void test_drop_intermediate_poll(void)
|
||||
|
||||
g_assert(!job_has_completed);
|
||||
ret = bdrv_drop_intermediate(chain[1], chain[0], NULL);
|
||||
+ aio_poll(qemu_get_aio_context(), false);
|
||||
g_assert(ret == 0);
|
||||
g_assert(job_has_completed);
|
||||
|
||||
@@ -1946,7 +1948,7 @@ static void coroutine_fn bdrv_replace_test_drain_co(void *opaque)
|
||||
* .was_drained.
|
||||
* Increment .drain_count.
|
||||
*/
|
||||
-static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs)
|
||||
+static void bdrv_replace_test_drain_begin(BlockDriverState *bs)
|
||||
{
|
||||
BDRVReplaceTestState *s = bs->opaque;
|
||||
|
||||
@@ -1977,7 +1979,7 @@ static void coroutine_fn bdrv_replace_test_read_entry(void *opaque)
|
||||
* If .drain_count reaches 0 and the node has a backing file, issue a
|
||||
* read request.
|
||||
*/
|
||||
-static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs)
|
||||
+static void bdrv_replace_test_drain_end(BlockDriverState *bs)
|
||||
{
|
||||
BDRVReplaceTestState *s = bs->opaque;
|
||||
|
||||
@@ -2002,8 +2004,8 @@ static BlockDriver bdrv_replace_test = {
|
||||
.bdrv_close = bdrv_replace_test_close,
|
||||
.bdrv_co_preadv = bdrv_replace_test_co_preadv,
|
||||
|
||||
- .bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin,
|
||||
- .bdrv_co_drain_end = bdrv_replace_test_co_drain_end,
|
||||
+ .bdrv_drain_begin = bdrv_replace_test_drain_begin,
|
||||
+ .bdrv_drain_end = bdrv_replace_test_drain_end,
|
||||
|
||||
.bdrv_child_perm = bdrv_default_perms,
|
||||
};
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,246 +0,0 @@
|
||||
From 54e290df4bc1c9e83be7357caed6a2b1ba4f21f0 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Mon, 20 Jun 2022 18:26:56 +0200
|
||||
Subject: [PATCH 09/20] block: Split BlockNodeInfo off of ImageInfo
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
|
||||
RH-Bugzilla: 1860292
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [4/12] fc8d69d549bb9a929db218b91697ee3ae95c1ff6 (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
ImageInfo sometimes contains flat information, and sometimes it does
|
||||
not. Split off a BlockNodeInfo struct, which only contains information
|
||||
about a single node and has no link to the backing image.
|
||||
|
||||
We do this so we can extend BlockNodeInfo to a BlockGraphInfo struct,
|
||||
which has links to all child nodes, not just the backing node. It would
|
||||
be strange to base BlockGraphInfo on ImageInfo, because then this
|
||||
extended struct would have two links to the backing node (one in
|
||||
BlockGraphInfo as one of all the child links, and one in ImageInfo).
|
||||
|
||||
Furthermore, it is quite common to ignore the backing-image field
|
||||
altogether: bdrv_query_image_info() does not set it, and
|
||||
bdrv_image_info_dump() does not evaluate it. That signals that we
|
||||
should have different structs for describing a single node and one that
|
||||
has a link to the backing image.
|
||||
|
||||
Still, bdrv_query_image_info() and bdrv_image_info_dump() are not
|
||||
changed too much in this patch. Follow-up patches will handle them.
|
||||
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20220620162704.80987-5-hreitz@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit a2085f8909377b6df738f6c3f7ee6db4d16da8f7)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/qapi.c | 86 ++++++++++++++++++++++++++++++++------------
|
||||
include/block/qapi.h | 3 ++
|
||||
qapi/block-core.json | 24 +++++++++----
|
||||
3 files changed, 85 insertions(+), 28 deletions(-)
|
||||
|
||||
diff --git a/block/qapi.c b/block/qapi.c
|
||||
index 51202b470a..e5022b4481 100644
|
||||
--- a/block/qapi.c
|
||||
+++ b/block/qapi.c
|
||||
@@ -241,30 +241,18 @@ int bdrv_query_snapshot_info_list(BlockDriverState *bs,
|
||||
}
|
||||
|
||||
/**
|
||||
- * bdrv_query_image_info:
|
||||
- * @bs: block device to examine
|
||||
- * @p_info: location to store image information
|
||||
- * @errp: location to store error information
|
||||
- *
|
||||
- * Store "flat" image information in @p_info.
|
||||
- *
|
||||
- * "Flat" means it does *not* query backing image information,
|
||||
- * i.e. (*pinfo)->has_backing_image will be set to false and
|
||||
- * (*pinfo)->backing_image to NULL even when the image does in fact have
|
||||
- * a backing image.
|
||||
- *
|
||||
- * @p_info will be set only on success. On error, store error in @errp.
|
||||
+ * Helper function for other query info functions. Store information about @bs
|
||||
+ * in @info, setting @errp on error.
|
||||
*/
|
||||
-void bdrv_query_image_info(BlockDriverState *bs,
|
||||
- ImageInfo **p_info,
|
||||
- Error **errp)
|
||||
+static void bdrv_do_query_node_info(BlockDriverState *bs,
|
||||
+ BlockNodeInfo *info,
|
||||
+ Error **errp)
|
||||
{
|
||||
int64_t size;
|
||||
const char *backing_filename;
|
||||
BlockDriverInfo bdi;
|
||||
int ret;
|
||||
Error *err = NULL;
|
||||
- ImageInfo *info;
|
||||
|
||||
aio_context_acquire(bdrv_get_aio_context(bs));
|
||||
|
||||
@@ -277,7 +265,6 @@ void bdrv_query_image_info(BlockDriverState *bs,
|
||||
|
||||
bdrv_refresh_filename(bs);
|
||||
|
||||
- info = g_new0(ImageInfo, 1);
|
||||
info->filename = g_strdup(bs->filename);
|
||||
info->format = g_strdup(bdrv_get_format_name(bs));
|
||||
info->virtual_size = size;
|
||||
@@ -298,7 +285,6 @@ void bdrv_query_image_info(BlockDriverState *bs,
|
||||
info->format_specific = bdrv_get_specific_info(bs, &err);
|
||||
if (err) {
|
||||
error_propagate(errp, err);
|
||||
- qapi_free_ImageInfo(info);
|
||||
goto out;
|
||||
}
|
||||
info->has_format_specific = info->format_specific != NULL;
|
||||
@@ -339,16 +325,72 @@ void bdrv_query_image_info(BlockDriverState *bs,
|
||||
break;
|
||||
default:
|
||||
error_propagate(errp, err);
|
||||
- qapi_free_ImageInfo(info);
|
||||
goto out;
|
||||
}
|
||||
|
||||
- *p_info = info;
|
||||
-
|
||||
out:
|
||||
aio_context_release(bdrv_get_aio_context(bs));
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * bdrv_query_block_node_info:
|
||||
+ * @bs: block node to examine
|
||||
+ * @p_info: location to store node information
|
||||
+ * @errp: location to store error information
|
||||
+ *
|
||||
+ * Store image information about @bs in @p_info.
|
||||
+ *
|
||||
+ * @p_info will be set only on success. On error, store error in @errp.
|
||||
+ */
|
||||
+void bdrv_query_block_node_info(BlockDriverState *bs,
|
||||
+ BlockNodeInfo **p_info,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ BlockNodeInfo *info;
|
||||
+ ERRP_GUARD();
|
||||
+
|
||||
+ info = g_new0(BlockNodeInfo, 1);
|
||||
+ bdrv_do_query_node_info(bs, info, errp);
|
||||
+ if (*errp) {
|
||||
+ qapi_free_BlockNodeInfo(info);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ *p_info = info;
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * bdrv_query_image_info:
|
||||
+ * @bs: block node to examine
|
||||
+ * @p_info: location to store image information
|
||||
+ * @errp: location to store error information
|
||||
+ *
|
||||
+ * Store "flat" image information in @p_info.
|
||||
+ *
|
||||
+ * "Flat" means it does *not* query backing image information,
|
||||
+ * i.e. (*pinfo)->has_backing_image will be set to false and
|
||||
+ * (*pinfo)->backing_image to NULL even when the image does in fact have
|
||||
+ * a backing image.
|
||||
+ *
|
||||
+ * @p_info will be set only on success. On error, store error in @errp.
|
||||
+ */
|
||||
+void bdrv_query_image_info(BlockDriverState *bs,
|
||||
+ ImageInfo **p_info,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ ImageInfo *info;
|
||||
+ ERRP_GUARD();
|
||||
+
|
||||
+ info = g_new0(ImageInfo, 1);
|
||||
+ bdrv_do_query_node_info(bs, qapi_ImageInfo_base(info), errp);
|
||||
+ if (*errp) {
|
||||
+ qapi_free_ImageInfo(info);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ *p_info = info;
|
||||
+}
|
||||
+
|
||||
/* @p_info will be set only on success. */
|
||||
static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
|
||||
Error **errp)
|
||||
diff --git a/include/block/qapi.h b/include/block/qapi.h
|
||||
index c09859ea78..c7de4e3fa9 100644
|
||||
--- a/include/block/qapi.h
|
||||
+++ b/include/block/qapi.h
|
||||
@@ -35,6 +35,9 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
|
||||
int bdrv_query_snapshot_info_list(BlockDriverState *bs,
|
||||
SnapshotInfoList **p_list,
|
||||
Error **errp);
|
||||
+void bdrv_query_block_node_info(BlockDriverState *bs,
|
||||
+ BlockNodeInfo **p_info,
|
||||
+ Error **errp);
|
||||
void bdrv_query_image_info(BlockDriverState *bs,
|
||||
ImageInfo **p_info,
|
||||
Error **errp);
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index 4b9365167f..7720da0498 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -251,7 +251,7 @@
|
||||
} }
|
||||
|
||||
##
|
||||
-# @ImageInfo:
|
||||
+# @BlockNodeInfo:
|
||||
#
|
||||
# Information about a QEMU image file
|
||||
#
|
||||
@@ -279,22 +279,34 @@
|
||||
#
|
||||
# @snapshots: list of VM snapshots
|
||||
#
|
||||
-# @backing-image: info of the backing image (since 1.6)
|
||||
-#
|
||||
# @format-specific: structure supplying additional format-specific
|
||||
# information (since 1.7)
|
||||
#
|
||||
-# Since: 1.3
|
||||
+# Since: 8.0
|
||||
##
|
||||
-{ 'struct': 'ImageInfo',
|
||||
+{ 'struct': 'BlockNodeInfo',
|
||||
'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool',
|
||||
'*actual-size': 'int', 'virtual-size': 'int',
|
||||
'*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool',
|
||||
'*backing-filename': 'str', '*full-backing-filename': 'str',
|
||||
'*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
|
||||
- '*backing-image': 'ImageInfo',
|
||||
'*format-specific': 'ImageInfoSpecific' } }
|
||||
|
||||
+##
|
||||
+# @ImageInfo:
|
||||
+#
|
||||
+# Information about a QEMU image file, and potentially its backing image
|
||||
+#
|
||||
+# @backing-image: info of the backing image
|
||||
+#
|
||||
+# Since: 1.3
|
||||
+##
|
||||
+{ 'struct': 'ImageInfo',
|
||||
+ 'base': 'BlockNodeInfo',
|
||||
+ 'data': {
|
||||
+ '*backing-image': 'ImageInfo'
|
||||
+ } }
|
||||
+
|
||||
##
|
||||
# @ImageCheck:
|
||||
#
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,386 @@
|
||||
From 7baea25be90e184175dd5a919ee5878cbd4970c2 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 4 May 2023 13:57:33 +0200
|
||||
Subject: [PATCH 52/56] block: bdrv/blk_co_unref() for calls in coroutine
|
||||
context
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize()
|
||||
RH-Bugzilla: 2185688
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [1/4] 8ebf8486b082c30ca1b39a6ede35e471eaaccfa3 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
These functions must not be called in coroutine context, because they
|
||||
need write access to the graph.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20230504115750.54437-4-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit b2ab5f545fa1eaaf2955dd617bee19a8b3279786)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block.c | 2 +-
|
||||
block/crypto.c | 6 +++---
|
||||
block/parallels.c | 6 +++---
|
||||
block/qcow.c | 6 +++---
|
||||
block/qcow2.c | 14 +++++++-------
|
||||
block/qed.c | 6 +++---
|
||||
block/vdi.c | 6 +++---
|
||||
block/vhdx.c | 6 +++---
|
||||
block/vmdk.c | 18 +++++++++---------
|
||||
block/vpc.c | 6 +++---
|
||||
include/block/block-global-state.h | 3 ++-
|
||||
include/sysemu/block-backend-global-state.h | 5 ++++-
|
||||
12 files changed, 44 insertions(+), 40 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index d79a52ca74..a48112f945 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -680,7 +680,7 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
diff --git a/block/crypto.c b/block/crypto.c
|
||||
index ca67289187..8fd3ad0054 100644
|
||||
--- a/block/crypto.c
|
||||
+++ b/block/crypto.c
|
||||
@@ -355,7 +355,7 @@ block_crypto_co_create_generic(BlockDriverState *bs, int64_t size,
|
||||
ret = 0;
|
||||
cleanup:
|
||||
qcrypto_block_free(crypto);
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -661,7 +661,7 @@ block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp)
|
||||
|
||||
ret = 0;
|
||||
fail:
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -730,7 +730,7 @@ fail:
|
||||
bdrv_co_delete_file_noerr(bs);
|
||||
}
|
||||
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_QCryptoBlockCreateOptions(create_opts);
|
||||
qobject_unref(cryptoopts);
|
||||
return ret;
|
||||
diff --git a/block/parallels.c b/block/parallels.c
|
||||
index 013684801a..b49c35929e 100644
|
||||
--- a/block/parallels.c
|
||||
+++ b/block/parallels.c
|
||||
@@ -613,8 +613,8 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts,
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
return ret;
|
||||
|
||||
exit:
|
||||
@@ -691,7 +691,7 @@ parallels_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
|
||||
done:
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/qcow.c b/block/qcow.c
|
||||
index 490e4f819e..a0c701f578 100644
|
||||
--- a/block/qcow.c
|
||||
+++ b/block/qcow.c
|
||||
@@ -915,8 +915,8 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
|
||||
g_free(tmp);
|
||||
ret = 0;
|
||||
exit:
|
||||
- blk_unref(qcow_blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(qcow_blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
qcrypto_block_free(crypto);
|
||||
return ret;
|
||||
}
|
||||
@@ -1015,7 +1015,7 @@ qcow_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
fail:
|
||||
g_free(backing_fmt);
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/qcow2.c b/block/qcow2.c
|
||||
index 22084730f9..0b8beb8b47 100644
|
||||
--- a/block/qcow2.c
|
||||
+++ b/block/qcow2.c
|
||||
@@ -3711,7 +3711,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
|
||||
goto out;
|
||||
}
|
||||
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
blk = NULL;
|
||||
|
||||
/*
|
||||
@@ -3791,7 +3791,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
|
||||
}
|
||||
}
|
||||
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
blk = NULL;
|
||||
|
||||
/* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning.
|
||||
@@ -3816,9 +3816,9 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
- bdrv_unref(data_bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
+ bdrv_co_unref(data_bs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -3949,8 +3949,8 @@ finish:
|
||||
}
|
||||
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
- bdrv_unref(data_bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
+ bdrv_co_unref(data_bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/qed.c b/block/qed.c
|
||||
index 0705a7b4e2..aff2a2076e 100644
|
||||
--- a/block/qed.c
|
||||
+++ b/block/qed.c
|
||||
@@ -748,8 +748,8 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
|
||||
ret = 0; /* success */
|
||||
out:
|
||||
g_free(l1_table);
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -819,7 +819,7 @@ bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
|
||||
fail:
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/vdi.c b/block/vdi.c
|
||||
index f2434d6153..08331d2dd7 100644
|
||||
--- a/block/vdi.c
|
||||
+++ b/block/vdi.c
|
||||
@@ -886,8 +886,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
|
||||
|
||||
ret = 0;
|
||||
exit:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs_file);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs_file);
|
||||
g_free(bmap);
|
||||
return ret;
|
||||
}
|
||||
@@ -975,7 +975,7 @@ vdi_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
done:
|
||||
qobject_unref(qdict);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
- bdrv_unref(bs_file);
|
||||
+ bdrv_co_unref(bs_file);
|
||||
return ret;
|
||||
}
|
||||
|
||||
diff --git a/block/vhdx.c b/block/vhdx.c
|
||||
index 81420722a1..00777da91a 100644
|
||||
--- a/block/vhdx.c
|
||||
+++ b/block/vhdx.c
|
||||
@@ -2053,8 +2053,8 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts,
|
||||
|
||||
ret = 0;
|
||||
delete_and_exit:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
g_free(creator);
|
||||
return ret;
|
||||
}
|
||||
@@ -2144,7 +2144,7 @@ vhdx_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
|
||||
fail:
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/vmdk.c b/block/vmdk.c
|
||||
index f5f49018fe..01ca13c82b 100644
|
||||
--- a/block/vmdk.c
|
||||
+++ b/block/vmdk.c
|
||||
@@ -2306,7 +2306,7 @@ exit:
|
||||
if (pbb) {
|
||||
*pbb = blk;
|
||||
} else {
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
blk = NULL;
|
||||
}
|
||||
}
|
||||
@@ -2516,12 +2516,12 @@ vmdk_co_do_create(int64_t size,
|
||||
if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) {
|
||||
error_setg(errp, "Invalid backing file format: %s. Must be vmdk",
|
||||
blk_bs(backing)->drv->format_name);
|
||||
- blk_unref(backing);
|
||||
+ blk_co_unref(backing);
|
||||
ret = -EINVAL;
|
||||
goto exit;
|
||||
}
|
||||
ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid);
|
||||
- blk_unref(backing);
|
||||
+ blk_co_unref(backing);
|
||||
if (ret) {
|
||||
error_setg(errp, "Failed to read parent CID");
|
||||
goto exit;
|
||||
@@ -2542,14 +2542,14 @@ vmdk_co_do_create(int64_t size,
|
||||
blk_bs(extent_blk)->filename);
|
||||
created_size += cur_size;
|
||||
extent_idx++;
|
||||
- blk_unref(extent_blk);
|
||||
+ blk_co_unref(extent_blk);
|
||||
}
|
||||
|
||||
/* Check whether we got excess extents */
|
||||
extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain,
|
||||
opaque, NULL);
|
||||
if (extent_blk) {
|
||||
- blk_unref(extent_blk);
|
||||
+ blk_co_unref(extent_blk);
|
||||
error_setg(errp, "List of extents contains unused extents");
|
||||
ret = -EINVAL;
|
||||
goto exit;
|
||||
@@ -2590,7 +2590,7 @@ vmdk_co_do_create(int64_t size,
|
||||
ret = 0;
|
||||
exit:
|
||||
if (blk) {
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
}
|
||||
g_free(desc);
|
||||
g_free(parent_desc_line);
|
||||
@@ -2641,7 +2641,7 @@ vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split,
|
||||
errp)) {
|
||||
goto exit;
|
||||
}
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
exit:
|
||||
g_free(ext_filename);
|
||||
return blk;
|
||||
@@ -2797,12 +2797,12 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx,
|
||||
return NULL;
|
||||
}
|
||||
blk_set_allow_write_beyond_eof(blk, true);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
|
||||
if (size != -1) {
|
||||
ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp);
|
||||
if (ret) {
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
blk = NULL;
|
||||
}
|
||||
}
|
||||
diff --git a/block/vpc.c b/block/vpc.c
|
||||
index b89b0ff8e2..07ddda5b99 100644
|
||||
--- a/block/vpc.c
|
||||
+++ b/block/vpc.c
|
||||
@@ -1082,8 +1082,8 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts,
|
||||
}
|
||||
|
||||
out:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1162,7 +1162,7 @@ vpc_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
|
||||
fail:
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
|
||||
index 399200a9a3..cd4ea554bf 100644
|
||||
--- a/include/block/block-global-state.h
|
||||
+++ b/include/block/block-global-state.h
|
||||
@@ -214,7 +214,8 @@ void bdrv_img_create(const char *filename, const char *fmt,
|
||||
bool quiet, Error **errp);
|
||||
|
||||
void bdrv_ref(BlockDriverState *bs);
|
||||
-void bdrv_unref(BlockDriverState *bs);
|
||||
+void no_coroutine_fn bdrv_unref(BlockDriverState *bs);
|
||||
+void coroutine_fn no_co_wrapper bdrv_co_unref(BlockDriverState *bs);
|
||||
void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
|
||||
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
|
||||
BlockDriverState *child_bs,
|
||||
diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h
|
||||
index 2b6d27db7c..fa83f9389c 100644
|
||||
--- a/include/sysemu/block-backend-global-state.h
|
||||
+++ b/include/sysemu/block-backend-global-state.h
|
||||
@@ -42,7 +42,10 @@ blk_co_new_open(const char *filename, const char *reference, QDict *options,
|
||||
|
||||
int blk_get_refcnt(BlockBackend *blk);
|
||||
void blk_ref(BlockBackend *blk);
|
||||
-void blk_unref(BlockBackend *blk);
|
||||
+
|
||||
+void no_coroutine_fn blk_unref(BlockBackend *blk);
|
||||
+void coroutine_fn no_co_wrapper blk_co_unref(BlockBackend *blk);
|
||||
+
|
||||
void blk_remove_all_bs(void);
|
||||
BlockBackend *blk_by_name(const char *name);
|
||||
BlockBackend *blk_next(BlockBackend *blk);
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,74 @@
|
||||
From b1f0546548e561856252c2bc610a8f4f8fcdf007 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Wed, 26 Jul 2023 09:48:07 +0200
|
||||
Subject: [PATCH 02/14] block/blkio: do not use open flags in qemu_open()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [2/6] 1ccd0ef56182bb5e2374c3b5be98ee1ec05066d6 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
qemu_open() in blkio_virtio_blk_common_open() is used to open the
|
||||
character device (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or in
|
||||
the future eventually the unix socket.
|
||||
|
||||
In all these cases we cannot open the path in read-only mode,
|
||||
when the `read-only` option of blockdev is on, because the exchange
|
||||
of IOCTL commands for example will fail.
|
||||
|
||||
In order to open the device read-only, we have to use the `read-only`
|
||||
property of the libblkio driver as we already do in blkio_file_open().
|
||||
|
||||
Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk")
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2225439
|
||||
Reported-by: Qing Wang <qinwang@redhat.com>
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Message-id: 20230726074807.14041-1-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit a5942c177b7bcc1357e496b7d68668befcfc2bb9)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 21 ++++++++++++---------
|
||||
1 file changed, 12 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 3ea9841bd8..5a82c6cb1a 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -685,15 +685,18 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs,
|
||||
* layer through the "/dev/fdset/N" special path.
|
||||
*/
|
||||
if (fd_supported) {
|
||||
- int open_flags;
|
||||
-
|
||||
- if (flags & BDRV_O_RDWR) {
|
||||
- open_flags = O_RDWR;
|
||||
- } else {
|
||||
- open_flags = O_RDONLY;
|
||||
- }
|
||||
-
|
||||
- fd = qemu_open(path, open_flags, errp);
|
||||
+ /*
|
||||
+ * `path` can contain the path of a character device
|
||||
+ * (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or a unix socket.
|
||||
+ *
|
||||
+ * So, we should always open it with O_RDWR flag, also if BDRV_O_RDWR
|
||||
+ * is not set in the open flags, because the exchange of IOCTL commands
|
||||
+ * for example will fail.
|
||||
+ *
|
||||
+ * In order to open the device read-only, we are using the `read-only`
|
||||
+ * property of the libblkio driver in blkio_file_open().
|
||||
+ */
|
||||
+ fd = qemu_open(path, O_RDWR, errp);
|
||||
if (fd < 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,54 @@
|
||||
From ef99db21e9469f3fc946b7bf3edc1837d7b24e0b Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Tue, 25 Jul 2023 12:37:44 +0200
|
||||
Subject: [PATCH 01/14] block/blkio: enable the completion eventfd
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [1/6] d91b3a465942863550130105ae2f38f47a82a360 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Until libblkio 1.3.0, virtio-blk drivers had completion eventfd
|
||||
notifications enabled from the start, but from the next releases
|
||||
this is no longer the case, so we have to explicitly enable them.
|
||||
|
||||
In fact, the libblkio documentation says they could be disabled,
|
||||
so we should always enable them at the start if we want to be
|
||||
sure to get completion eventfd notifications:
|
||||
|
||||
By default, the driver might not generate completion events for
|
||||
requests so it is necessary to explicitly enable the completion
|
||||
file descriptor before use:
|
||||
|
||||
void blkioq_set_completion_fd_enabled(struct blkioq *q, bool enable);
|
||||
|
||||
I discovered this while trying a development version of libblkio:
|
||||
the guest kernel hangs during boot, while probing the device.
|
||||
|
||||
Fixes: fd66dbd424f5 ("blkio: add libblkio block driver")
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230725103744.77343-1-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 9359c459889fce1804c4e1b2a2ff8f182b4a9ae8)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index afcec359f2..3ea9841bd8 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -844,6 +844,7 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
QLIST_INIT(&s->bounce_bufs);
|
||||
s->blkioq = blkio_get_queue(s->blkio, 0);
|
||||
s->completion_fd = blkioq_get_completion_fd(s->blkioq);
|
||||
+ blkioq_set_completion_fd_enabled(s->blkioq, true);
|
||||
|
||||
blkio_attach_aio_context(bs, bdrv_get_aio_context(bs));
|
||||
return 0;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,67 @@
|
||||
From c1ce3ba81698b9d52ac9dff83c01ee8141ca403d Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Thu, 27 Jul 2023 18:10:19 +0200
|
||||
Subject: [PATCH 05/14] block/blkio: fall back on using `path` when `fd`
|
||||
setting fails
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [5/6] c03cea95146a59b2830ffe2dd56ef77a6630ce3e (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
qemu_open() fails if called with an unix domain socket in this way:
|
||||
-blockdev node-name=drive0,driver=virtio-blk-vhost-user,path=vhost-user-blk.sock,cache.direct=on: Could not open 'vhost-user-blk.sock': No such device or address
|
||||
|
||||
Since virtio-blk-vhost-user does not support fd passing, let`s always fall back
|
||||
on using `path` if we fail the fd passing.
|
||||
|
||||
Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk")
|
||||
Reported-by: Qing Wang <qinwang@redhat.com>
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230727161020.84213-4-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 723bea27b127969931fa26bc0de79372a3d9e148)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 20 ++++++++++----------
|
||||
1 file changed, 10 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 93a8f8fc5c..eef80e9ce5 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -710,19 +710,19 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
|
||||
* In order to open the device read-only, we are using the `read-only`
|
||||
* property of the libblkio driver in blkio_file_open().
|
||||
*/
|
||||
- fd = qemu_open(path, O_RDWR, errp);
|
||||
+ fd = qemu_open(path, O_RDWR, NULL);
|
||||
if (fd < 0) {
|
||||
- return -EINVAL;
|
||||
+ fd_supported = false;
|
||||
+ } else {
|
||||
+ ret = blkio_set_int(s->blkio, "fd", fd);
|
||||
+ if (ret < 0) {
|
||||
+ fd_supported = false;
|
||||
+ qemu_close(fd);
|
||||
+ }
|
||||
}
|
||||
+ }
|
||||
|
||||
- ret = blkio_set_int(s->blkio, "fd", fd);
|
||||
- if (ret < 0) {
|
||||
- error_setg_errno(errp, -ret, "failed to set fd: %s",
|
||||
- blkio_get_error_msg());
|
||||
- qemu_close(fd);
|
||||
- return ret;
|
||||
- }
|
||||
- } else {
|
||||
+ if (!fd_supported) {
|
||||
ret = blkio_set_str(s->blkio, "path", path);
|
||||
if (ret < 0) {
|
||||
error_setg_errno(errp, -ret, "failed to set path: %s",
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,205 @@
|
||||
From 545482400ea87d54b1b839587f8aaad41e30692f Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 4 Jul 2023 14:34:36 +0200
|
||||
Subject: [PATCH 36/37] block/blkio: fix module_block.py parsing
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 181: block/blkio: fix module_block.py parsing
|
||||
RH-Bugzilla: 2213317
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [1/2] c85df95824f4889526a73527771dec9efcb06926 (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
When QEMU is built with --enable-modules, the module_block.py script
|
||||
parses block/*.c to find block drivers that are built as modules. The
|
||||
script generates a table of block drivers called block_driver_modules[].
|
||||
This table is used for block driver module loading.
|
||||
|
||||
The blkio.c driver uses macros to define its BlockDriver structs. This
|
||||
was done to avoid code duplication but the module_block.py script is
|
||||
unable to parse the macro. The result is that libblkio-based block
|
||||
drivers can be built as modules but will not be found at runtime.
|
||||
|
||||
One fix is to make the module_block.py script or build system fancier so
|
||||
it can parse C macros (e.g. by parsing the preprocessed source code). I
|
||||
chose not to do this because it raises the complexity of the build,
|
||||
making future issues harder to debug.
|
||||
|
||||
Keep things simple: use the macro to avoid duplicating BlockDriver
|
||||
function pointers but define .format_name and .protocol_name manually
|
||||
for each BlockDriver. This way the module_block.py is able to parse the
|
||||
code.
|
||||
|
||||
Also get rid of the block driver name macros (e.g. DRIVER_IO_URING)
|
||||
because module_block.py cannot parse them either.
|
||||
|
||||
Fixes: fd66dbd424f5 ("blkio: add libblkio block driver")
|
||||
Reported-by: Qing Wang <qinwang@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230704123436.187761-1-stefanha@redhat.com
|
||||
Cc: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit c21eae1ccc782440f320accb6f90c66cb8f45ee9)
|
||||
|
||||
Conflicts:
|
||||
- Downstream lacks commit 28ff7b4dfbb5 ("block/blkio: convert to
|
||||
blk_io_plug_call() API") so keep the .bdrv_co_io_unplug callback.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
block/blkio.c | 118 ++++++++++++++++++++++++++------------------------
|
||||
1 file changed, 61 insertions(+), 57 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 6a6f20f923..afcec359f2 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -21,16 +21,6 @@
|
||||
|
||||
#include "block/block-io.h"
|
||||
|
||||
-/*
|
||||
- * Keep the QEMU BlockDriver names identical to the libblkio driver names.
|
||||
- * Using macros instead of typing out the string literals avoids typos.
|
||||
- */
|
||||
-#define DRIVER_IO_URING "io_uring"
|
||||
-#define DRIVER_NVME_IO_URING "nvme-io_uring"
|
||||
-#define DRIVER_VIRTIO_BLK_VFIO_PCI "virtio-blk-vfio-pci"
|
||||
-#define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user"
|
||||
-#define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa"
|
||||
-
|
||||
/*
|
||||
* Allocated bounce buffers are kept in a list sorted by buffer address.
|
||||
*/
|
||||
@@ -743,15 +733,15 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
return ret;
|
||||
}
|
||||
|
||||
- if (strcmp(blkio_driver, DRIVER_IO_URING) == 0) {
|
||||
+ if (strcmp(blkio_driver, "io_uring") == 0) {
|
||||
ret = blkio_io_uring_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) {
|
||||
+ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
|
||||
ret = blkio_nvme_io_uring(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VFIO_PCI) == 0) {
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
|
||||
ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) {
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
|
||||
ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) {
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
|
||||
ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
} else {
|
||||
g_assert_not_reached();
|
||||
@@ -1027,50 +1017,64 @@ static void blkio_refresh_limits(BlockDriverState *bs, Error **errp)
|
||||
* - truncate
|
||||
*/
|
||||
|
||||
-#define BLKIO_DRIVER(name, ...) \
|
||||
- { \
|
||||
- .format_name = name, \
|
||||
- .protocol_name = name, \
|
||||
- .instance_size = sizeof(BDRVBlkioState), \
|
||||
- .bdrv_file_open = blkio_file_open, \
|
||||
- .bdrv_close = blkio_close, \
|
||||
- .bdrv_co_getlength = blkio_co_getlength, \
|
||||
- .bdrv_co_truncate = blkio_truncate, \
|
||||
- .bdrv_co_get_info = blkio_co_get_info, \
|
||||
- .bdrv_attach_aio_context = blkio_attach_aio_context, \
|
||||
- .bdrv_detach_aio_context = blkio_detach_aio_context, \
|
||||
- .bdrv_co_pdiscard = blkio_co_pdiscard, \
|
||||
- .bdrv_co_preadv = blkio_co_preadv, \
|
||||
- .bdrv_co_pwritev = blkio_co_pwritev, \
|
||||
- .bdrv_co_flush_to_disk = blkio_co_flush, \
|
||||
- .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
|
||||
- .bdrv_co_io_unplug = blkio_co_io_unplug, \
|
||||
- .bdrv_refresh_limits = blkio_refresh_limits, \
|
||||
- .bdrv_register_buf = blkio_register_buf, \
|
||||
- .bdrv_unregister_buf = blkio_unregister_buf, \
|
||||
- __VA_ARGS__ \
|
||||
- }
|
||||
-
|
||||
-static BlockDriver bdrv_io_uring = BLKIO_DRIVER(
|
||||
- DRIVER_IO_URING,
|
||||
- .bdrv_needs_filename = true,
|
||||
-);
|
||||
-
|
||||
-static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER(
|
||||
- DRIVER_NVME_IO_URING,
|
||||
-);
|
||||
-
|
||||
-static BlockDriver bdrv_virtio_blk_vfio_pci = BLKIO_DRIVER(
|
||||
- DRIVER_VIRTIO_BLK_VFIO_PCI
|
||||
-);
|
||||
+/*
|
||||
+ * Do not include .format_name and .protocol_name because module_block.py
|
||||
+ * does not parse macros in the source code.
|
||||
+ */
|
||||
+#define BLKIO_DRIVER_COMMON \
|
||||
+ .instance_size = sizeof(BDRVBlkioState), \
|
||||
+ .bdrv_file_open = blkio_file_open, \
|
||||
+ .bdrv_close = blkio_close, \
|
||||
+ .bdrv_co_getlength = blkio_co_getlength, \
|
||||
+ .bdrv_co_truncate = blkio_truncate, \
|
||||
+ .bdrv_co_get_info = blkio_co_get_info, \
|
||||
+ .bdrv_attach_aio_context = blkio_attach_aio_context, \
|
||||
+ .bdrv_detach_aio_context = blkio_detach_aio_context, \
|
||||
+ .bdrv_co_pdiscard = blkio_co_pdiscard, \
|
||||
+ .bdrv_co_preadv = blkio_co_preadv, \
|
||||
+ .bdrv_co_pwritev = blkio_co_pwritev, \
|
||||
+ .bdrv_co_flush_to_disk = blkio_co_flush, \
|
||||
+ .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
|
||||
+ .bdrv_co_io_unplug = blkio_co_io_unplug, \
|
||||
+ .bdrv_refresh_limits = blkio_refresh_limits, \
|
||||
+ .bdrv_register_buf = blkio_register_buf, \
|
||||
+ .bdrv_unregister_buf = blkio_unregister_buf,
|
||||
|
||||
-static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER(
|
||||
- DRIVER_VIRTIO_BLK_VHOST_USER
|
||||
-);
|
||||
+/*
|
||||
+ * Use the same .format_name and .protocol_name as the libblkio driver name for
|
||||
+ * consistency.
|
||||
+ */
|
||||
|
||||
-static BlockDriver bdrv_virtio_blk_vhost_vdpa = BLKIO_DRIVER(
|
||||
- DRIVER_VIRTIO_BLK_VHOST_VDPA
|
||||
-);
|
||||
+static BlockDriver bdrv_io_uring = {
|
||||
+ .format_name = "io_uring",
|
||||
+ .protocol_name = "io_uring",
|
||||
+ .bdrv_needs_filename = true,
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
+
|
||||
+static BlockDriver bdrv_nvme_io_uring = {
|
||||
+ .format_name = "nvme-io_uring",
|
||||
+ .protocol_name = "nvme-io_uring",
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
+
|
||||
+static BlockDriver bdrv_virtio_blk_vfio_pci = {
|
||||
+ .format_name = "virtio-blk-vfio-pci",
|
||||
+ .protocol_name = "virtio-blk-vfio-pci",
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
+
|
||||
+static BlockDriver bdrv_virtio_blk_vhost_user = {
|
||||
+ .format_name = "virtio-blk-vhost-user",
|
||||
+ .protocol_name = "virtio-blk-vhost-user",
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
+
|
||||
+static BlockDriver bdrv_virtio_blk_vhost_vdpa = {
|
||||
+ .format_name = "virtio-blk-vhost-vdpa",
|
||||
+ .protocol_name = "virtio-blk-vhost-vdpa",
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
|
||||
static void bdrv_blkio_init(void)
|
||||
{
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,151 @@
|
||||
From 458c33c9f19ed01beeb9b2b494ce6ed10d2ed4ac Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Thu, 27 Jul 2023 18:10:17 +0200
|
||||
Subject: [PATCH 03/14] block/blkio: move blkio_connect() in the drivers
|
||||
functions
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [3/6] c356108d7dfe1ba2098c094f8d12b6e40853560c (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
This is in preparation for the next patch, where for virtio-blk
|
||||
drivers we need to handle the failure of blkio_connect().
|
||||
|
||||
Let's also rename the *_open() functions to *_connect() to make
|
||||
the code reflect the changes applied.
|
||||
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230727161020.84213-2-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 69785d66ae1ec43f77fc65109a21721992bead9f)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 67 ++++++++++++++++++++++++++++++---------------------
|
||||
1 file changed, 40 insertions(+), 27 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 5a82c6cb1a..85d1eed5fb 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -602,8 +602,8 @@ static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size)
|
||||
}
|
||||
}
|
||||
|
||||
-static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
- Error **errp)
|
||||
+static int blkio_io_uring_connect(BlockDriverState *bs, QDict *options,
|
||||
+ int flags, Error **errp)
|
||||
{
|
||||
const char *filename = qdict_get_str(options, "filename");
|
||||
BDRVBlkioState *s = bs->opaque;
|
||||
@@ -626,11 +626,18 @@ static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
}
|
||||
}
|
||||
|
||||
+ ret = blkio_connect(s->blkio);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags,
|
||||
- Error **errp)
|
||||
+static int blkio_nvme_io_uring_connect(BlockDriverState *bs, QDict *options,
|
||||
+ int flags, Error **errp)
|
||||
{
|
||||
const char *path = qdict_get_try_str(options, "path");
|
||||
BDRVBlkioState *s = bs->opaque;
|
||||
@@ -654,11 +661,18 @@ static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
+ ret = blkio_connect(s->blkio);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int blkio_virtio_blk_common_open(BlockDriverState *bs,
|
||||
- QDict *options, int flags, Error **errp)
|
||||
+static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
|
||||
+ int flags, Error **errp)
|
||||
{
|
||||
const char *path = qdict_get_try_str(options, "path");
|
||||
BDRVBlkioState *s = bs->opaque;
|
||||
@@ -717,6 +731,13 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs,
|
||||
}
|
||||
}
|
||||
|
||||
+ ret = blkio_connect(s->blkio);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
qdict_del(options, "path");
|
||||
|
||||
return 0;
|
||||
@@ -736,24 +757,6 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
return ret;
|
||||
}
|
||||
|
||||
- if (strcmp(blkio_driver, "io_uring") == 0) {
|
||||
- ret = blkio_io_uring_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
|
||||
- ret = blkio_nvme_io_uring(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
|
||||
- ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
|
||||
- ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
|
||||
- ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else {
|
||||
- g_assert_not_reached();
|
||||
- }
|
||||
- if (ret < 0) {
|
||||
- blkio_destroy(&s->blkio);
|
||||
- return ret;
|
||||
- }
|
||||
-
|
||||
if (!(flags & BDRV_O_RDWR)) {
|
||||
ret = blkio_set_bool(s->blkio, "read-only", true);
|
||||
if (ret < 0) {
|
||||
@@ -764,10 +767,20 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
}
|
||||
}
|
||||
|
||||
- ret = blkio_connect(s->blkio);
|
||||
+ if (strcmp(blkio_driver, "io_uring") == 0) {
|
||||
+ ret = blkio_io_uring_connect(bs, options, flags, errp);
|
||||
+ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
|
||||
+ ret = blkio_nvme_io_uring_connect(bs, options, flags, errp);
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
|
||||
+ ret = blkio_virtio_blk_connect(bs, options, flags, errp);
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
|
||||
+ ret = blkio_virtio_blk_connect(bs, options, flags, errp);
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
|
||||
+ ret = blkio_virtio_blk_connect(bs, options, flags, errp);
|
||||
+ } else {
|
||||
+ g_assert_not_reached();
|
||||
+ }
|
||||
if (ret < 0) {
|
||||
- error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
- blkio_get_error_msg());
|
||||
blkio_destroy(&s->blkio);
|
||||
return ret;
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,85 @@
|
||||
From ece855a71d9234c58497f37cb5498f507742167d Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Thu, 27 Jul 2023 18:10:18 +0200
|
||||
Subject: [PATCH 04/14] block/blkio: retry blkio_connect() if it fails using
|
||||
`fd`
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [4/6] 14ebc1f333617ce22c68693dec1c9a186d4f8a08 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
libblkio 1.3.0 added support of "fd" property for virtio-blk-vhost-vdpa
|
||||
driver. In QEMU, starting from commit cad2ccc395 ("block/blkio: use
|
||||
qemu_open() to support fd passing for virtio-blk") we are using
|
||||
`blkio_get_int(..., "fd")` to check if the "fd" property is supported
|
||||
for all the virtio-blk-* driver.
|
||||
|
||||
Unfortunately that property is also available for those driver that do
|
||||
not support it, such as virtio-blk-vhost-user.
|
||||
|
||||
So, `blkio_get_int()` is not enough to check whether the driver supports
|
||||
the `fd` property or not. This is because the virito-blk common libblkio
|
||||
driver only checks whether or not `fd` is set during `blkio_connect()`
|
||||
and fails with -EINVAL for those transports that do not support it
|
||||
(all except vhost-vdpa for now).
|
||||
|
||||
So let's handle the `blkio_connect()` failure, retrying it using `path`
|
||||
directly.
|
||||
|
||||
Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk")
|
||||
Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230727161020.84213-3-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 809c319f8a089fbc49223dc29e1cc2b978beeada)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 29 +++++++++++++++++++++++++++++
|
||||
1 file changed, 29 insertions(+)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 85d1eed5fb..93a8f8fc5c 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -732,6 +732,35 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
|
||||
}
|
||||
|
||||
ret = blkio_connect(s->blkio);
|
||||
+ /*
|
||||
+ * If the libblkio driver doesn't support the `fd` property, blkio_connect()
|
||||
+ * will fail with -EINVAL. So let's try calling blkio_connect() again by
|
||||
+ * directly setting `path`.
|
||||
+ */
|
||||
+ if (fd_supported && ret == -EINVAL) {
|
||||
+ qemu_close(fd);
|
||||
+
|
||||
+ /*
|
||||
+ * We need to clear the `fd` property we set previously by setting
|
||||
+ * it to -1.
|
||||
+ */
|
||||
+ ret = blkio_set_int(s->blkio, "fd", -1);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "failed to set fd: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ ret = blkio_set_str(s->blkio, "path", path);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "failed to set path: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ ret = blkio_connect(s->blkio);
|
||||
+ }
|
||||
+
|
||||
if (ret < 0) {
|
||||
error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
blkio_get_error_msg());
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,49 @@
|
||||
From 2f4436e7cc2f63d198229dc8ba32783460c0b185 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Thu, 27 Jul 2023 18:10:20 +0200
|
||||
Subject: [PATCH 06/14] block/blkio: use blkio_set_int("fd") to check fd
|
||||
support
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [6/6] d57aafb2c3a8ed13aa3c6dcce5525a9cc8f5aa21 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Setting the `fd` property fails with virtio-blk-* libblkio drivers
|
||||
that do not support fd passing since
|
||||
https://gitlab.com/libblkio/libblkio/-/merge_requests/208.
|
||||
|
||||
Getting the `fd` property, on the other hand, always succeeds for
|
||||
virtio-blk-* libblkio drivers even when they don't support fd passing.
|
||||
|
||||
This patch switches to setting the `fd` property because it is a
|
||||
better mechanism for probing fd passing support than getting the `fd`
|
||||
property.
|
||||
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230727161020.84213-5-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 1c38fe69e2b8a05c1762b122292fa7e3662f06fd)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index eef80e9ce5..8defbf744f 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -689,7 +689,7 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
- if (blkio_get_int(s->blkio, "fd", &fd) == 0) {
|
||||
+ if (blkio_set_int(s->blkio, "fd", -1) == 0) {
|
||||
fd_supported = true;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,108 @@
|
||||
From fd57241cf0f8c2906fa56118f8da1e65a5b1e4d8 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Tue, 30 May 2023 09:19:40 +0200
|
||||
Subject: [PATCH 3/5] block/blkio: use qemu_open() to support fd passing for
|
||||
virtio-blk
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 169: block/blkio: support fd passing for virtio-blk-vhost-vdpa driver
|
||||
RH-Bugzilla: 2180076
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [1/2] 9ff1a1510500db101648341207a36318a0c41c5a (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Some virtio-blk drivers (e.g. virtio-blk-vhost-vdpa) supports the fd
|
||||
passing. Let's expose this to the user, so the management layer
|
||||
can pass the file descriptor of an already opened path.
|
||||
|
||||
If the libblkio virtio-blk driver supports fd passing, let's always
|
||||
use qemu_open() to open the `path`, so we can handle fd passing
|
||||
from the management layer through the "/dev/fdset/N" special path.
|
||||
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230530071941.8954-2-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit cad2ccc395c7113fb30bc9390774b67b34f06c68)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 53 ++++++++++++++++++++++++++++++++++++++++++---------
|
||||
1 file changed, 44 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 0cdc99a729..6a6f20f923 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -672,25 +672,60 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs,
|
||||
{
|
||||
const char *path = qdict_get_try_str(options, "path");
|
||||
BDRVBlkioState *s = bs->opaque;
|
||||
- int ret;
|
||||
+ bool fd_supported = false;
|
||||
+ int fd, ret;
|
||||
|
||||
if (!path) {
|
||||
error_setg(errp, "missing 'path' option");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
- ret = blkio_set_str(s->blkio, "path", path);
|
||||
- qdict_del(options, "path");
|
||||
- if (ret < 0) {
|
||||
- error_setg_errno(errp, -ret, "failed to set path: %s",
|
||||
- blkio_get_error_msg());
|
||||
- return ret;
|
||||
- }
|
||||
-
|
||||
if (!(flags & BDRV_O_NOCACHE)) {
|
||||
error_setg(errp, "cache.direct=off is not supported");
|
||||
return -EINVAL;
|
||||
}
|
||||
+
|
||||
+ if (blkio_get_int(s->blkio, "fd", &fd) == 0) {
|
||||
+ fd_supported = true;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * If the libblkio driver supports fd passing, let's always use qemu_open()
|
||||
+ * to open the `path`, so we can handle fd passing from the management
|
||||
+ * layer through the "/dev/fdset/N" special path.
|
||||
+ */
|
||||
+ if (fd_supported) {
|
||||
+ int open_flags;
|
||||
+
|
||||
+ if (flags & BDRV_O_RDWR) {
|
||||
+ open_flags = O_RDWR;
|
||||
+ } else {
|
||||
+ open_flags = O_RDONLY;
|
||||
+ }
|
||||
+
|
||||
+ fd = qemu_open(path, open_flags, errp);
|
||||
+ if (fd < 0) {
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ ret = blkio_set_int(s->blkio, "fd", fd);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "failed to set fd: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ qemu_close(fd);
|
||||
+ return ret;
|
||||
+ }
|
||||
+ } else {
|
||||
+ ret = blkio_set_str(s->blkio, "path", path);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "failed to set path: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ qdict_del(options, "path");
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,121 @@
|
||||
From d9190117f3c701380701d6e9b2aa3c2446b9708f Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Mon, 1 May 2023 13:34:43 -0400
|
||||
Subject: [PATCH 01/21] block: compile out assert_bdrv_graph_readable() by
|
||||
default
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 166: block/graph-lock: Disable locking for now
|
||||
RH-Bugzilla: 2186725
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [1/4] d8cb4bb832c85e8216d97e57679a34c7bc6a8f71 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
reader_count() is a performance bottleneck because the global
|
||||
aio_context_list_lock mutex causes thread contention. Put this debugging
|
||||
assertion behind a new ./configure --enable-debug-graph-lock option and
|
||||
disable it by default.
|
||||
|
||||
The --enable-debug-graph-lock option is also enabled by the more general
|
||||
--enable-debug option.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20230501173443.153062-1-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 58a2e3f5c37be02dac3086b81bdda9414b931edf)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block/graph-lock.c | 3 +++
|
||||
configure | 1 +
|
||||
meson.build | 2 ++
|
||||
meson_options.txt | 2 ++
|
||||
scripts/meson-buildoptions.sh | 4 ++++
|
||||
5 files changed, 12 insertions(+)
|
||||
|
||||
diff --git a/block/graph-lock.c b/block/graph-lock.c
|
||||
index 454c31e691..259a7a0bde 100644
|
||||
--- a/block/graph-lock.c
|
||||
+++ b/block/graph-lock.c
|
||||
@@ -265,7 +265,10 @@ void bdrv_graph_rdunlock_main_loop(void)
|
||||
|
||||
void assert_bdrv_graph_readable(void)
|
||||
{
|
||||
+ /* reader_count() is slow due to aio_context_list_lock lock contention */
|
||||
+#ifdef CONFIG_DEBUG_GRAPH_LOCK
|
||||
assert(qemu_in_main_thread() || reader_count());
|
||||
+#endif
|
||||
}
|
||||
|
||||
void assert_bdrv_graph_writable(void)
|
||||
diff --git a/configure b/configure
|
||||
index 800b5850f4..a62a3e6be9 100755
|
||||
--- a/configure
|
||||
+++ b/configure
|
||||
@@ -806,6 +806,7 @@ for opt do
|
||||
--enable-debug)
|
||||
# Enable debugging options that aren't excessively noisy
|
||||
debug_tcg="yes"
|
||||
+ meson_option_parse --enable-debug-graph-lock ""
|
||||
meson_option_parse --enable-debug-mutex ""
|
||||
meson_option_add -Doptimization=0
|
||||
fortify_source="no"
|
||||
diff --git a/meson.build b/meson.build
|
||||
index c44d05a13f..d964e741e7 100644
|
||||
--- a/meson.build
|
||||
+++ b/meson.build
|
||||
@@ -1956,6 +1956,7 @@ if get_option('debug_stack_usage') and have_coroutine_pool
|
||||
have_coroutine_pool = false
|
||||
endif
|
||||
config_host_data.set10('CONFIG_COROUTINE_POOL', have_coroutine_pool)
|
||||
+config_host_data.set('CONFIG_DEBUG_GRAPH_LOCK', get_option('debug_graph_lock'))
|
||||
config_host_data.set('CONFIG_DEBUG_MUTEX', get_option('debug_mutex'))
|
||||
config_host_data.set('CONFIG_DEBUG_STACK_USAGE', get_option('debug_stack_usage'))
|
||||
config_host_data.set('CONFIG_GPROF', get_option('gprof'))
|
||||
@@ -3833,6 +3834,7 @@ summary_info += {'PIE': get_option('b_pie')}
|
||||
summary_info += {'static build': config_host.has_key('CONFIG_STATIC')}
|
||||
summary_info += {'malloc trim support': has_malloc_trim}
|
||||
summary_info += {'membarrier': have_membarrier}
|
||||
+summary_info += {'debug graph lock': get_option('debug_graph_lock')}
|
||||
summary_info += {'debug stack usage': get_option('debug_stack_usage')}
|
||||
summary_info += {'mutex debugging': get_option('debug_mutex')}
|
||||
summary_info += {'memory allocator': get_option('malloc')}
|
||||
diff --git a/meson_options.txt b/meson_options.txt
|
||||
index fc9447d267..bc857fe68b 100644
|
||||
--- a/meson_options.txt
|
||||
+++ b/meson_options.txt
|
||||
@@ -311,6 +311,8 @@ option('rng_none', type: 'boolean', value: false,
|
||||
description: 'dummy RNG, avoid using /dev/(u)random and getrandom()')
|
||||
option('coroutine_pool', type: 'boolean', value: true,
|
||||
description: 'coroutine freelist (better performance)')
|
||||
+option('debug_graph_lock', type: 'boolean', value: false,
|
||||
+ description: 'graph lock debugging support')
|
||||
option('debug_mutex', type: 'boolean', value: false,
|
||||
description: 'mutex debugging support')
|
||||
option('debug_stack_usage', type: 'boolean', value: false,
|
||||
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
|
||||
index 009fab1515..30e1f25259 100644
|
||||
--- a/scripts/meson-buildoptions.sh
|
||||
+++ b/scripts/meson-buildoptions.sh
|
||||
@@ -21,6 +21,8 @@ meson_options_help() {
|
||||
printf "%s\n" ' QEMU'
|
||||
printf "%s\n" ' --enable-cfi Control-Flow Integrity (CFI)'
|
||||
printf "%s\n" ' --enable-cfi-debug Verbose errors in case of CFI violation'
|
||||
+ printf "%s\n" ' --enable-debug-graph-lock'
|
||||
+ printf "%s\n" ' graph lock debugging support'
|
||||
printf "%s\n" ' --enable-debug-mutex mutex debugging support'
|
||||
printf "%s\n" ' --enable-debug-stack-usage'
|
||||
printf "%s\n" ' measure coroutine stack usage'
|
||||
@@ -249,6 +251,8 @@ _meson_option_parse() {
|
||||
--datadir=*) quote_sh "-Ddatadir=$2" ;;
|
||||
--enable-dbus-display) printf "%s" -Ddbus_display=enabled ;;
|
||||
--disable-dbus-display) printf "%s" -Ddbus_display=disabled ;;
|
||||
+ --enable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=true ;;
|
||||
+ --disable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=false ;;
|
||||
--enable-debug-mutex) printf "%s" -Ddebug_mutex=true ;;
|
||||
--disable-debug-mutex) printf "%s" -Ddebug_mutex=false ;;
|
||||
--enable-debug-stack-usage) printf "%s" -Ddebug_stack_usage=true ;;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,70 +0,0 @@
|
||||
From defd6b325264d94ffb1355a8b19f9a77bd694a2f Mon Sep 17 00:00:00 2001
|
||||
From: Vladimir Sementsov-Ogievskiy <vladimir.sementsov-ogievskiy@openvz.org>
|
||||
Date: Mon, 7 Nov 2022 19:35:56 +0300
|
||||
Subject: [PATCH 13/31] block: drop bdrv_remove_filter_or_cow_child
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [1/16] 6339edd738c3b79f8ecb6c1e012e52b6afb1a622 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Drop this simple wrapper used only in one place. We have too many graph
|
||||
modifying functions even without it.
|
||||
|
||||
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@openvz.org>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20221107163558.618889-3-vsementsov@yandex-team.ru>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit f38eaec4c3618dfc4a23e20435cefb5bf8325264)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 15 +--------------
|
||||
1 file changed, 1 insertion(+), 14 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index a18f052374..ec184150a2 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -93,8 +93,6 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
|
||||
static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
BlockDriverState *new_bs);
|
||||
static void bdrv_remove_child(BdrvChild *child, Transaction *tran);
|
||||
-static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
|
||||
- Transaction *tran);
|
||||
|
||||
static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
|
||||
BlockReopenQueue *queue,
|
||||
@@ -5073,17 +5071,6 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran)
|
||||
tran_add(tran, &bdrv_remove_child_drv, child);
|
||||
}
|
||||
|
||||
-/*
|
||||
- * A function to remove backing-chain child of @bs if exists: cow child for
|
||||
- * format nodes (always .backing) and filter child for filters (may be .file or
|
||||
- * .backing)
|
||||
- */
|
||||
-static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
|
||||
- Transaction *tran)
|
||||
-{
|
||||
- bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran);
|
||||
-}
|
||||
-
|
||||
static int bdrv_replace_node_noperm(BlockDriverState *from,
|
||||
BlockDriverState *to,
|
||||
bool auto_skip, Transaction *tran,
|
||||
@@ -5168,7 +5155,7 @@ static int bdrv_replace_node_common(BlockDriverState *from,
|
||||
}
|
||||
|
||||
if (detach_subchain) {
|
||||
- bdrv_remove_filter_or_cow_child(to_cow_parent, tran);
|
||||
+ bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran);
|
||||
}
|
||||
|
||||
found = g_hash_table_new(NULL, NULL);
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,145 +0,0 @@
|
||||
From 4af86458d6bea2a6e15fd57d4d4bbe88e35f7e72 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Mon, 20 Jun 2022 18:26:54 +0200
|
||||
Subject: [PATCH 07/20] block/file: Add file-specific image info
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
|
||||
RH-Bugzilla: 1860292
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [2/12] d8cc351d6c16c41b2000e41dc555f13093a9edce (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
Add some (optional) information that the file driver can provide for
|
||||
image files, namely the extent size hint.
|
||||
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20220620162704.80987-3-hreitz@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 7f36a50ab4e7d39369cac67be4ba9d6ee4081dc0)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/file-posix.c | 30 ++++++++++++++++++++++++++++++
|
||||
qapi/block-core.json | 26 ++++++++++++++++++++++++--
|
||||
2 files changed, 54 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/block/file-posix.c b/block/file-posix.c
|
||||
index b9647c5ffc..df3da79aed 100644
|
||||
--- a/block/file-posix.c
|
||||
+++ b/block/file-posix.c
|
||||
@@ -3095,6 +3095,34 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static ImageInfoSpecific *raw_get_specific_info(BlockDriverState *bs,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ ImageInfoSpecificFile *file_info = g_new0(ImageInfoSpecificFile, 1);
|
||||
+ ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1);
|
||||
+
|
||||
+ *spec_info = (ImageInfoSpecific){
|
||||
+ .type = IMAGE_INFO_SPECIFIC_KIND_FILE,
|
||||
+ .u.file.data = file_info,
|
||||
+ };
|
||||
+
|
||||
+#ifdef FS_IOC_FSGETXATTR
|
||||
+ {
|
||||
+ BDRVRawState *s = bs->opaque;
|
||||
+ struct fsxattr attr;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = ioctl(s->fd, FS_IOC_FSGETXATTR, &attr);
|
||||
+ if (!ret && attr.fsx_extsize != 0) {
|
||||
+ file_info->has_extent_size_hint = true;
|
||||
+ file_info->extent_size_hint = attr.fsx_extsize;
|
||||
+ }
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
+ return spec_info;
|
||||
+}
|
||||
+
|
||||
static BlockStatsSpecificFile get_blockstats_specific_file(BlockDriverState *bs)
|
||||
{
|
||||
BDRVRawState *s = bs->opaque;
|
||||
@@ -3328,6 +3356,7 @@ BlockDriver bdrv_file = {
|
||||
.bdrv_co_truncate = raw_co_truncate,
|
||||
.bdrv_getlength = raw_getlength,
|
||||
.bdrv_get_info = raw_get_info,
|
||||
+ .bdrv_get_specific_info = raw_get_specific_info,
|
||||
.bdrv_get_allocated_file_size
|
||||
= raw_get_allocated_file_size,
|
||||
.bdrv_get_specific_stats = raw_get_specific_stats,
|
||||
@@ -3700,6 +3729,7 @@ static BlockDriver bdrv_host_device = {
|
||||
.bdrv_co_truncate = raw_co_truncate,
|
||||
.bdrv_getlength = raw_getlength,
|
||||
.bdrv_get_info = raw_get_info,
|
||||
+ .bdrv_get_specific_info = raw_get_specific_info,
|
||||
.bdrv_get_allocated_file_size
|
||||
= raw_get_allocated_file_size,
|
||||
.bdrv_get_specific_stats = hdev_get_specific_stats,
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index 95ac4fa634..f5d822cbd6 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -139,16 +139,29 @@
|
||||
'*encryption-format': 'RbdImageEncryptionFormat'
|
||||
} }
|
||||
|
||||
+##
|
||||
+# @ImageInfoSpecificFile:
|
||||
+#
|
||||
+# @extent-size-hint: Extent size hint (if available)
|
||||
+#
|
||||
+# Since: 8.0
|
||||
+##
|
||||
+{ 'struct': 'ImageInfoSpecificFile',
|
||||
+ 'data': {
|
||||
+ '*extent-size-hint': 'size'
|
||||
+ } }
|
||||
+
|
||||
##
|
||||
# @ImageInfoSpecificKind:
|
||||
#
|
||||
# @luks: Since 2.7
|
||||
# @rbd: Since 6.1
|
||||
+# @file: Since 8.0
|
||||
#
|
||||
# Since: 1.7
|
||||
##
|
||||
{ 'enum': 'ImageInfoSpecificKind',
|
||||
- 'data': [ 'qcow2', 'vmdk', 'luks', 'rbd' ] }
|
||||
+ 'data': [ 'qcow2', 'vmdk', 'luks', 'rbd', 'file' ] }
|
||||
|
||||
##
|
||||
# @ImageInfoSpecificQCow2Wrapper:
|
||||
@@ -185,6 +198,14 @@
|
||||
{ 'struct': 'ImageInfoSpecificRbdWrapper',
|
||||
'data': { 'data': 'ImageInfoSpecificRbd' } }
|
||||
|
||||
+##
|
||||
+# @ImageInfoSpecificFileWrapper:
|
||||
+#
|
||||
+# Since: 8.0
|
||||
+##
|
||||
+{ 'struct': 'ImageInfoSpecificFileWrapper',
|
||||
+ 'data': { 'data': 'ImageInfoSpecificFile' } }
|
||||
+
|
||||
##
|
||||
# @ImageInfoSpecific:
|
||||
#
|
||||
@@ -199,7 +220,8 @@
|
||||
'qcow2': 'ImageInfoSpecificQCow2Wrapper',
|
||||
'vmdk': 'ImageInfoSpecificVmdkWrapper',
|
||||
'luks': 'ImageInfoSpecificLUKSWrapper',
|
||||
- 'rbd': 'ImageInfoSpecificRbdWrapper'
|
||||
+ 'rbd': 'ImageInfoSpecificRbdWrapper',
|
||||
+ 'file': 'ImageInfoSpecificFileWrapper'
|
||||
} }
|
||||
|
||||
##
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,206 +0,0 @@
|
||||
From c8c282c2e1d74cfc5de6527f7e20dfc3e76b67ac Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Mon, 20 Jun 2022 18:27:00 +0200
|
||||
Subject: [PATCH 13/20] block/qapi: Add indentation to bdrv_node_info_dump()
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
|
||||
RH-Bugzilla: 1860292
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [8/12] d3a697e81ab9828457198075e5815a592363c725 (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
In order to let qemu-img info present a block graph, add a parameter to
|
||||
bdrv_node_info_dump() and bdrv_image_info_specific_dump() so that the
|
||||
information of nodes below the root level can be given an indentation.
|
||||
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20220620162704.80987-9-hreitz@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 76c9e9750d1bd580e8ed4465f6be3a986434e7c3)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/monitor/block-hmp-cmds.c | 2 +-
|
||||
block/qapi.c | 47 +++++++++++++++++++---------------
|
||||
include/block/qapi.h | 5 ++--
|
||||
qemu-img.c | 2 +-
|
||||
qemu-io-cmds.c | 3 ++-
|
||||
5 files changed, 34 insertions(+), 25 deletions(-)
|
||||
|
||||
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
|
||||
index aa37faa601..72824d4e2e 100644
|
||||
--- a/block/monitor/block-hmp-cmds.c
|
||||
+++ b/block/monitor/block-hmp-cmds.c
|
||||
@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info,
|
||||
monitor_printf(mon, "\nImages:\n");
|
||||
image_info = inserted->image;
|
||||
while (1) {
|
||||
- bdrv_node_info_dump(qapi_ImageInfo_base(image_info));
|
||||
+ bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0);
|
||||
if (image_info->has_backing_image) {
|
||||
image_info = image_info->backing_image;
|
||||
} else {
|
||||
diff --git a/block/qapi.c b/block/qapi.c
|
||||
index f208c21ccf..3e35603f0c 100644
|
||||
--- a/block/qapi.c
|
||||
+++ b/block/qapi.c
|
||||
@@ -915,7 +915,8 @@ static bool qobject_is_empty_dump(const QObject *obj)
|
||||
* prepending an optional prefix if the dump is not empty.
|
||||
*/
|
||||
void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
|
||||
- const char *prefix)
|
||||
+ const char *prefix,
|
||||
+ int indentation)
|
||||
{
|
||||
QObject *obj, *data;
|
||||
Visitor *v = qobject_output_visitor_new(&obj);
|
||||
@@ -925,48 +926,51 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
|
||||
data = qdict_get(qobject_to(QDict, obj), "data");
|
||||
if (!qobject_is_empty_dump(data)) {
|
||||
if (prefix) {
|
||||
- qemu_printf("%s", prefix);
|
||||
+ qemu_printf("%*s%s", indentation * 4, "", prefix);
|
||||
}
|
||||
- dump_qobject(1, data);
|
||||
+ dump_qobject(indentation + 1, data);
|
||||
}
|
||||
qobject_unref(obj);
|
||||
visit_free(v);
|
||||
}
|
||||
|
||||
-void bdrv_node_info_dump(BlockNodeInfo *info)
|
||||
+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation)
|
||||
{
|
||||
char *size_buf, *dsize_buf;
|
||||
+ g_autofree char *ind_s = g_strdup_printf("%*s", indentation * 4, "");
|
||||
+
|
||||
if (!info->has_actual_size) {
|
||||
dsize_buf = g_strdup("unavailable");
|
||||
} else {
|
||||
dsize_buf = size_to_str(info->actual_size);
|
||||
}
|
||||
size_buf = size_to_str(info->virtual_size);
|
||||
- qemu_printf("image: %s\n"
|
||||
- "file format: %s\n"
|
||||
- "virtual size: %s (%" PRId64 " bytes)\n"
|
||||
- "disk size: %s\n",
|
||||
- info->filename, info->format, size_buf,
|
||||
- info->virtual_size,
|
||||
- dsize_buf);
|
||||
+ qemu_printf("%simage: %s\n"
|
||||
+ "%sfile format: %s\n"
|
||||
+ "%svirtual size: %s (%" PRId64 " bytes)\n"
|
||||
+ "%sdisk size: %s\n",
|
||||
+ ind_s, info->filename,
|
||||
+ ind_s, info->format,
|
||||
+ ind_s, size_buf, info->virtual_size,
|
||||
+ ind_s, dsize_buf);
|
||||
g_free(size_buf);
|
||||
g_free(dsize_buf);
|
||||
|
||||
if (info->has_encrypted && info->encrypted) {
|
||||
- qemu_printf("encrypted: yes\n");
|
||||
+ qemu_printf("%sencrypted: yes\n", ind_s);
|
||||
}
|
||||
|
||||
if (info->has_cluster_size) {
|
||||
- qemu_printf("cluster_size: %" PRId64 "\n",
|
||||
- info->cluster_size);
|
||||
+ qemu_printf("%scluster_size: %" PRId64 "\n",
|
||||
+ ind_s, info->cluster_size);
|
||||
}
|
||||
|
||||
if (info->has_dirty_flag && info->dirty_flag) {
|
||||
- qemu_printf("cleanly shut down: no\n");
|
||||
+ qemu_printf("%scleanly shut down: no\n", ind_s);
|
||||
}
|
||||
|
||||
if (info->has_backing_filename) {
|
||||
- qemu_printf("backing file: %s", info->backing_filename);
|
||||
+ qemu_printf("%sbacking file: %s", ind_s, info->backing_filename);
|
||||
if (!info->has_full_backing_filename) {
|
||||
qemu_printf(" (cannot determine actual path)");
|
||||
} else if (strcmp(info->backing_filename,
|
||||
@@ -975,15 +979,16 @@ void bdrv_node_info_dump(BlockNodeInfo *info)
|
||||
}
|
||||
qemu_printf("\n");
|
||||
if (info->has_backing_filename_format) {
|
||||
- qemu_printf("backing file format: %s\n",
|
||||
- info->backing_filename_format);
|
||||
+ qemu_printf("%sbacking file format: %s\n",
|
||||
+ ind_s, info->backing_filename_format);
|
||||
}
|
||||
}
|
||||
|
||||
if (info->has_snapshots) {
|
||||
SnapshotInfoList *elem;
|
||||
|
||||
- qemu_printf("Snapshot list:\n");
|
||||
+ qemu_printf("%sSnapshot list:\n", ind_s);
|
||||
+ qemu_printf("%s", ind_s);
|
||||
bdrv_snapshot_dump(NULL);
|
||||
qemu_printf("\n");
|
||||
|
||||
@@ -1003,6 +1008,7 @@ void bdrv_node_info_dump(BlockNodeInfo *info)
|
||||
|
||||
pstrcpy(sn.id_str, sizeof(sn.id_str), elem->value->id);
|
||||
pstrcpy(sn.name, sizeof(sn.name), elem->value->name);
|
||||
+ qemu_printf("%s", ind_s);
|
||||
bdrv_snapshot_dump(&sn);
|
||||
qemu_printf("\n");
|
||||
}
|
||||
@@ -1010,6 +1016,7 @@ void bdrv_node_info_dump(BlockNodeInfo *info)
|
||||
|
||||
if (info->has_format_specific) {
|
||||
bdrv_image_info_specific_dump(info->format_specific,
|
||||
- "Format specific information:\n");
|
||||
+ "Format specific information:\n",
|
||||
+ indentation);
|
||||
}
|
||||
}
|
||||
diff --git a/include/block/qapi.h b/include/block/qapi.h
|
||||
index 196436020e..38855f2ae9 100644
|
||||
--- a/include/block/qapi.h
|
||||
+++ b/include/block/qapi.h
|
||||
@@ -49,6 +49,7 @@ void bdrv_query_block_graph_info(BlockDriverState *bs,
|
||||
|
||||
void bdrv_snapshot_dump(QEMUSnapshotInfo *sn);
|
||||
void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
|
||||
- const char *prefix);
|
||||
-void bdrv_node_info_dump(BlockNodeInfo *info);
|
||||
+ const char *prefix,
|
||||
+ int indentation);
|
||||
+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation);
|
||||
#endif
|
||||
diff --git a/qemu-img.c b/qemu-img.c
|
||||
index 3b2ca3bbcb..30b4ea58bb 100644
|
||||
--- a/qemu-img.c
|
||||
+++ b/qemu-img.c
|
||||
@@ -2859,7 +2859,7 @@ static void dump_human_image_info_list(BlockNodeInfoList *list)
|
||||
}
|
||||
delim = true;
|
||||
|
||||
- bdrv_node_info_dump(elem->value);
|
||||
+ bdrv_node_info_dump(elem->value, 0);
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
|
||||
index f4a374528e..fdcb89211b 100644
|
||||
--- a/qemu-io-cmds.c
|
||||
+++ b/qemu-io-cmds.c
|
||||
@@ -1826,7 +1826,8 @@ static int info_f(BlockBackend *blk, int argc, char **argv)
|
||||
}
|
||||
if (spec_info) {
|
||||
bdrv_image_info_specific_dump(spec_info,
|
||||
- "Format specific information:\n");
|
||||
+ "Format specific information:\n",
|
||||
+ 0);
|
||||
qapi_free_ImageInfoSpecific(spec_info);
|
||||
}
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,155 +0,0 @@
|
||||
From 0044e3848b02ef6edba5961d1f4b6297d137d207 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Mon, 20 Jun 2022 18:26:59 +0200
|
||||
Subject: [PATCH 12/20] block/qapi: Introduce BlockGraphInfo
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
|
||||
RH-Bugzilla: 1860292
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [7/12] de47bac372cd552b812c774a2f35f95923af74ff (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
Introduce a new QAPI type BlockGraphInfo and an associated
|
||||
bdrv_query_block_graph_info() function that recursively gathers
|
||||
BlockNodeInfo objects through a block graph.
|
||||
|
||||
A follow-up patch is going to make "qemu-img info" use this to print
|
||||
information about all nodes that are (usually implicitly) opened for a
|
||||
given image file.
|
||||
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20220620162704.80987-8-hreitz@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 6cab33997b91eb86e82a6a2ae58a24f835249d4a)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/qapi.c | 48 ++++++++++++++++++++++++++++++++++++++++++++
|
||||
include/block/qapi.h | 3 +++
|
||||
qapi/block-core.json | 35 ++++++++++++++++++++++++++++++++
|
||||
3 files changed, 86 insertions(+)
|
||||
|
||||
diff --git a/block/qapi.c b/block/qapi.c
|
||||
index 5d0a8d2ce3..f208c21ccf 100644
|
||||
--- a/block/qapi.c
|
||||
+++ b/block/qapi.c
|
||||
@@ -411,6 +411,54 @@ fail:
|
||||
qapi_free_ImageInfo(info);
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * bdrv_query_block_graph_info:
|
||||
+ * @bs: root node to start from
|
||||
+ * @p_info: location to store image information
|
||||
+ * @errp: location to store error information
|
||||
+ *
|
||||
+ * Store image information about the graph starting from @bs in @p_info.
|
||||
+ *
|
||||
+ * @p_info will be set only on success. On error, store error in @errp.
|
||||
+ */
|
||||
+void bdrv_query_block_graph_info(BlockDriverState *bs,
|
||||
+ BlockGraphInfo **p_info,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ BlockGraphInfo *info;
|
||||
+ BlockChildInfoList **children_list_tail;
|
||||
+ BdrvChild *c;
|
||||
+ ERRP_GUARD();
|
||||
+
|
||||
+ info = g_new0(BlockGraphInfo, 1);
|
||||
+ bdrv_do_query_node_info(bs, qapi_BlockGraphInfo_base(info), errp);
|
||||
+ if (*errp) {
|
||||
+ goto fail;
|
||||
+ }
|
||||
+
|
||||
+ children_list_tail = &info->children;
|
||||
+
|
||||
+ QLIST_FOREACH(c, &bs->children, next) {
|
||||
+ BlockChildInfo *c_info;
|
||||
+
|
||||
+ c_info = g_new0(BlockChildInfo, 1);
|
||||
+ QAPI_LIST_APPEND(children_list_tail, c_info);
|
||||
+
|
||||
+ c_info->name = g_strdup(c->name);
|
||||
+ bdrv_query_block_graph_info(c->bs, &c_info->info, errp);
|
||||
+ if (*errp) {
|
||||
+ goto fail;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ *p_info = info;
|
||||
+ return;
|
||||
+
|
||||
+fail:
|
||||
+ assert(*errp != NULL);
|
||||
+ qapi_free_BlockGraphInfo(info);
|
||||
+}
|
||||
+
|
||||
/* @p_info will be set only on success. */
|
||||
static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
|
||||
Error **errp)
|
||||
diff --git a/include/block/qapi.h b/include/block/qapi.h
|
||||
index 2174bf8fa2..196436020e 100644
|
||||
--- a/include/block/qapi.h
|
||||
+++ b/include/block/qapi.h
|
||||
@@ -43,6 +43,9 @@ void bdrv_query_image_info(BlockDriverState *bs,
|
||||
bool flat,
|
||||
bool skip_implicit_filters,
|
||||
Error **errp);
|
||||
+void bdrv_query_block_graph_info(BlockDriverState *bs,
|
||||
+ BlockGraphInfo **p_info,
|
||||
+ Error **errp);
|
||||
|
||||
void bdrv_snapshot_dump(QEMUSnapshotInfo *sn);
|
||||
void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index 4cf2deeb6c..d703e0fb16 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -307,6 +307,41 @@
|
||||
'*backing-image': 'ImageInfo'
|
||||
} }
|
||||
|
||||
+##
|
||||
+# @BlockChildInfo:
|
||||
+#
|
||||
+# Information about all nodes in the block graph starting at some node,
|
||||
+# annotated with information about that node in relation to its parent.
|
||||
+#
|
||||
+# @name: Child name of the root node in the BlockGraphInfo struct, in its role
|
||||
+# as the child of some undescribed parent node
|
||||
+#
|
||||
+# @info: Block graph information starting at this node
|
||||
+#
|
||||
+# Since: 8.0
|
||||
+##
|
||||
+{ 'struct': 'BlockChildInfo',
|
||||
+ 'data': {
|
||||
+ 'name': 'str',
|
||||
+ 'info': 'BlockGraphInfo'
|
||||
+ } }
|
||||
+
|
||||
+##
|
||||
+# @BlockGraphInfo:
|
||||
+#
|
||||
+# Information about all nodes in a block (sub)graph in the form of BlockNodeInfo
|
||||
+# data.
|
||||
+# The base BlockNodeInfo struct contains the information for the (sub)graph's
|
||||
+# root node.
|
||||
+#
|
||||
+# @children: Array of links to this node's child nodes' information
|
||||
+#
|
||||
+# Since: 8.0
|
||||
+##
|
||||
+{ 'struct': 'BlockGraphInfo',
|
||||
+ 'base': 'BlockNodeInfo',
|
||||
+ 'data': { 'children': ['BlockChildInfo'] } }
|
||||
+
|
||||
##
|
||||
# @ImageCheck:
|
||||
#
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,197 +0,0 @@
|
||||
From ae2c3df00d673d436fe4d8ec9103a3b76d7e6233 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Mon, 20 Jun 2022 18:26:58 +0200
|
||||
Subject: [PATCH 11/20] block/qapi: Let bdrv_query_image_info() recurse
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
|
||||
RH-Bugzilla: 1860292
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [6/12] 451a83fd682cd6dd6026c22974d18c2f12ee06e3 (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
There is no real reason why bdrv_query_image_info() should generally not
|
||||
recurse. The ImageInfo struct has a pointer to the backing image, so it
|
||||
should generally be filled, unless the caller explicitly opts out.
|
||||
|
||||
This moves the recursing code from bdrv_block_device_info() into
|
||||
bdrv_query_image_info().
|
||||
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20220620162704.80987-7-hreitz@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 5d8813593f3f673fc96eed199beb35690cc46f58)
|
||||
|
||||
Conflicts:
|
||||
block/qapi.c: Conflicts with
|
||||
54fde4ff0621c22b15cbaaa3c74301cc0dbd1c9e ("qapi block: Elide
|
||||
redundant has_FOO in generated C"), which dropped
|
||||
`has_backing_image`. Without that commit (and 44ea9d9be before it),
|
||||
we still need to set `has_backing_image` in
|
||||
`bdrv_query_image_info()`.
|
||||
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/qapi.c | 94 +++++++++++++++++++++++++++-----------------
|
||||
include/block/qapi.h | 2 +
|
||||
2 files changed, 59 insertions(+), 37 deletions(-)
|
||||
|
||||
diff --git a/block/qapi.c b/block/qapi.c
|
||||
index ad88bf9b38..5d0a8d2ce3 100644
|
||||
--- a/block/qapi.c
|
||||
+++ b/block/qapi.c
|
||||
@@ -47,8 +47,10 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
|
||||
Error **errp)
|
||||
{
|
||||
ImageInfo **p_image_info;
|
||||
+ ImageInfo *backing_info;
|
||||
BlockDriverState *bs0, *backing;
|
||||
BlockDeviceInfo *info;
|
||||
+ ERRP_GUARD();
|
||||
|
||||
if (!bs->drv) {
|
||||
error_setg(errp, "Block device %s is ejected", bs->node_name);
|
||||
@@ -149,38 +151,21 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
|
||||
bs0 = bs;
|
||||
p_image_info = &info->image;
|
||||
info->backing_file_depth = 0;
|
||||
- while (1) {
|
||||
- Error *local_err = NULL;
|
||||
- bdrv_query_image_info(bs0, p_image_info, &local_err);
|
||||
- if (local_err) {
|
||||
- error_propagate(errp, local_err);
|
||||
- qapi_free_BlockDeviceInfo(info);
|
||||
- return NULL;
|
||||
- }
|
||||
-
|
||||
- /* stop gathering data for flat output */
|
||||
- if (flat) {
|
||||
- break;
|
||||
- }
|
||||
|
||||
- if (bs0->drv && bdrv_filter_or_cow_child(bs0)) {
|
||||
- /*
|
||||
- * Put any filtered child here (for backwards compatibility to when
|
||||
- * we put bs0->backing here, which might be any filtered child).
|
||||
- */
|
||||
- info->backing_file_depth++;
|
||||
- bs0 = bdrv_filter_or_cow_bs(bs0);
|
||||
- (*p_image_info)->has_backing_image = true;
|
||||
- p_image_info = &((*p_image_info)->backing_image);
|
||||
- } else {
|
||||
- break;
|
||||
- }
|
||||
+ /*
|
||||
+ * Skip automatically inserted nodes that the user isn't aware of for
|
||||
+ * query-block (blk != NULL), but not for query-named-block-nodes
|
||||
+ */
|
||||
+ bdrv_query_image_info(bs0, p_image_info, flat, blk != NULL, errp);
|
||||
+ if (*errp) {
|
||||
+ qapi_free_BlockDeviceInfo(info);
|
||||
+ return NULL;
|
||||
+ }
|
||||
|
||||
- /* Skip automatically inserted nodes that the user isn't aware of for
|
||||
- * query-block (blk != NULL), but not for query-named-block-nodes */
|
||||
- if (blk) {
|
||||
- bs0 = bdrv_skip_implicit_filters(bs0);
|
||||
- }
|
||||
+ backing_info = info->image->backing_image;
|
||||
+ while (backing_info) {
|
||||
+ info->backing_file_depth++;
|
||||
+ backing_info = backing_info->backing_image;
|
||||
}
|
||||
|
||||
return info;
|
||||
@@ -363,19 +348,28 @@ void bdrv_query_block_node_info(BlockDriverState *bs,
|
||||
* bdrv_query_image_info:
|
||||
* @bs: block node to examine
|
||||
* @p_info: location to store image information
|
||||
+ * @flat: skip backing node information
|
||||
+ * @skip_implicit_filters: skip implicit filters in the backing chain
|
||||
* @errp: location to store error information
|
||||
*
|
||||
- * Store "flat" image information in @p_info.
|
||||
+ * Store image information in @p_info, potentially recursively covering the
|
||||
+ * backing chain.
|
||||
*
|
||||
- * "Flat" means it does *not* query backing image information,
|
||||
- * i.e. (*pinfo)->has_backing_image will be set to false and
|
||||
- * (*pinfo)->backing_image to NULL even when the image does in fact have
|
||||
- * a backing image.
|
||||
+ * If @flat is true, do not query backing image information, i.e.
|
||||
+ * (*p_info)->has_backing_image will be set to false and
|
||||
+ * (*p_info)->backing_image to NULL even when the image does in fact have a
|
||||
+ * backing image.
|
||||
+ *
|
||||
+ * If @skip_implicit_filters is true, implicit filter nodes in the backing chain
|
||||
+ * will be skipped when querying backing image information.
|
||||
+ * (@skip_implicit_filters is ignored when @flat is true.)
|
||||
*
|
||||
* @p_info will be set only on success. On error, store error in @errp.
|
||||
*/
|
||||
void bdrv_query_image_info(BlockDriverState *bs,
|
||||
ImageInfo **p_info,
|
||||
+ bool flat,
|
||||
+ bool skip_implicit_filters,
|
||||
Error **errp)
|
||||
{
|
||||
ImageInfo *info;
|
||||
@@ -384,11 +378,37 @@ void bdrv_query_image_info(BlockDriverState *bs,
|
||||
info = g_new0(ImageInfo, 1);
|
||||
bdrv_do_query_node_info(bs, qapi_ImageInfo_base(info), errp);
|
||||
if (*errp) {
|
||||
- qapi_free_ImageInfo(info);
|
||||
- return;
|
||||
+ goto fail;
|
||||
+ }
|
||||
+
|
||||
+ if (!flat) {
|
||||
+ BlockDriverState *backing;
|
||||
+
|
||||
+ /*
|
||||
+ * Use any filtered child here (for backwards compatibility to when
|
||||
+ * we always took bs->backing, which might be any filtered child).
|
||||
+ */
|
||||
+ backing = bdrv_filter_or_cow_bs(bs);
|
||||
+ if (skip_implicit_filters) {
|
||||
+ backing = bdrv_skip_implicit_filters(backing);
|
||||
+ }
|
||||
+
|
||||
+ if (backing) {
|
||||
+ bdrv_query_image_info(backing, &info->backing_image, false,
|
||||
+ skip_implicit_filters, errp);
|
||||
+ if (*errp) {
|
||||
+ goto fail;
|
||||
+ }
|
||||
+ info->has_backing_image = true;
|
||||
+ }
|
||||
}
|
||||
|
||||
*p_info = info;
|
||||
+ return;
|
||||
+
|
||||
+fail:
|
||||
+ assert(*errp);
|
||||
+ qapi_free_ImageInfo(info);
|
||||
}
|
||||
|
||||
/* @p_info will be set only on success. */
|
||||
diff --git a/include/block/qapi.h b/include/block/qapi.h
|
||||
index 22198dcd0c..2174bf8fa2 100644
|
||||
--- a/include/block/qapi.h
|
||||
+++ b/include/block/qapi.h
|
||||
@@ -40,6 +40,8 @@ void bdrv_query_block_node_info(BlockDriverState *bs,
|
||||
Error **errp);
|
||||
void bdrv_query_image_info(BlockDriverState *bs,
|
||||
ImageInfo **p_info,
|
||||
+ bool flat,
|
||||
+ bool skip_implicit_filters,
|
||||
Error **errp);
|
||||
|
||||
void bdrv_snapshot_dump(QEMUSnapshotInfo *sn);
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,99 +0,0 @@
|
||||
From b952c8f1da6f8597736c0e040565830139369359 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Tue, 14 Feb 2023 18:16:21 +0100
|
||||
Subject: [PATCH] block: temporarily hold the new AioContext of bs_top in
|
||||
bdrv_append()
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 153: block: temporarily hold the new AioContext of bs_top in bdrv_append()
|
||||
RH-Bugzilla: 2168209
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [1/1] 5b190426d996e8c9f7a781bd97aee8d25756dbd3 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
bdrv_append() is called with bs_top AioContext held, but
|
||||
bdrv_attach_child_noperm() could change the AioContext of bs_top.
|
||||
|
||||
bdrv_replace_node_noperm() calls bdrv_drained_begin() starting from
|
||||
commit 2398747128 ("block: Don't poll in bdrv_replace_child_noperm()").
|
||||
bdrv_drained_begin() can call BDRV_POLL_WHILE that assumes the new lock
|
||||
is taken, so let's temporarily hold the new AioContext to prevent QEMU
|
||||
from failing in BDRV_POLL_WHILE when it tries to release the wrong
|
||||
AioContext.
|
||||
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2168209
|
||||
Reported-by: Aihua Liang <aliang@redhat.com>
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-Id: <20230214171621.11574-1-sgarzare@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 60d90bf43c169b9d1dbcb17ed794b7b02c6862b1)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 23 +++++++++++++++++++++++
|
||||
1 file changed, 23 insertions(+)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 0d78711416..9e1dcb9e47 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -5275,6 +5275,8 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
|
||||
* child.
|
||||
*
|
||||
* This function does not create any image files.
|
||||
+ *
|
||||
+ * The caller must hold the AioContext lock for @bs_top.
|
||||
*/
|
||||
int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
|
||||
Error **errp)
|
||||
@@ -5282,11 +5284,14 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
|
||||
int ret;
|
||||
BdrvChild *child;
|
||||
Transaction *tran = tran_new();
|
||||
+ AioContext *old_context, *new_context = NULL;
|
||||
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
assert(!bs_new->backing);
|
||||
|
||||
+ old_context = bdrv_get_aio_context(bs_top);
|
||||
+
|
||||
child = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
|
||||
&child_of_bds, bdrv_backing_role(bs_new),
|
||||
tran, errp);
|
||||
@@ -5295,6 +5300,19 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
|
||||
goto out;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * bdrv_attach_child_noperm could change the AioContext of bs_top.
|
||||
+ * bdrv_replace_node_noperm calls bdrv_drained_begin, so let's temporarily
|
||||
+ * hold the new AioContext, since bdrv_drained_begin calls BDRV_POLL_WHILE
|
||||
+ * that assumes the new lock is taken.
|
||||
+ */
|
||||
+ new_context = bdrv_get_aio_context(bs_top);
|
||||
+
|
||||
+ if (old_context != new_context) {
|
||||
+ aio_context_release(old_context);
|
||||
+ aio_context_acquire(new_context);
|
||||
+ }
|
||||
+
|
||||
ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp);
|
||||
if (ret < 0) {
|
||||
goto out;
|
||||
@@ -5306,6 +5324,11 @@ out:
|
||||
|
||||
bdrv_refresh_limits(bs_top, NULL, NULL);
|
||||
|
||||
+ if (new_context && old_context != new_context) {
|
||||
+ aio_context_release(new_context);
|
||||
+ aio_context_acquire(old_context);
|
||||
+ }
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,140 +0,0 @@
|
||||
From d8caed018afb0f60f449e971398d2a8d6c2992e7 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Mon, 20 Jun 2022 18:26:55 +0200
|
||||
Subject: [PATCH 08/20] block/vmdk: Change extent info type
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
|
||||
RH-Bugzilla: 1860292
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [3/12] efe50a2797c679ce6bb5faa423047461a34e6792 (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
VMDK's implementation of .bdrv_get_specific_info() returns information
|
||||
about its extent files, ostensibly in the form of ImageInfo objects.
|
||||
However, it does not get this information through
|
||||
bdrv_query_image_info(), but fills only a select few fields with custom
|
||||
information that does not always match the fields' purposes.
|
||||
|
||||
For example, @format, which is supposed to be a block driver name, is
|
||||
filled with the extent type, e.g. SPARSE or FLAT.
|
||||
|
||||
In ImageInfo, @compressed shows whether the data that can be seen in the
|
||||
image is stored in compressed form or not. For example, a compressed
|
||||
qcow2 image will store compressed data in its data file, but when
|
||||
accessing the qcow2 node, you will see normal data. This is not how
|
||||
VMDK uses the @compressed field for its extent files: Instead, it
|
||||
signifies whether accessing the extent file will yield compressed data
|
||||
(which the VMDK driver then (de-)compresses).
|
||||
|
||||
Create a new structure to represent the extent information. This allows
|
||||
us to clarify the fields' meanings, and it clearly shows that these are
|
||||
not complete ImageInfo objects. (That is, if a user wants an extent
|
||||
file's ImageInfo object, they will need to query it separately, and will
|
||||
not get it from ImageInfoSpecificVmdk.extents.)
|
||||
|
||||
Note that this removes the last use of ['ImageInfo'] (i.e. an array of
|
||||
ImageInfo objects), so the QAPI generator will no longer generate
|
||||
ImageInfoList by default. However, we use it in qemu-img.c, so we need
|
||||
to create a dummy object to force the generate to create that type,
|
||||
similarly to DummyForceArrays in machine.json (introduced in commit
|
||||
9f08c8ec73878122ad4b061ed334f0437afaaa32 ("qapi: Lazy creation of array
|
||||
types")).
|
||||
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20220620162704.80987-4-hreitz@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 456e75171a85c19a5bfa202eefcbdc4ef1692f05)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/vmdk.c | 8 ++++----
|
||||
qapi/block-core.json | 38 +++++++++++++++++++++++++++++++++++++-
|
||||
2 files changed, 41 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/block/vmdk.c b/block/vmdk.c
|
||||
index 26376352b9..4435b9880b 100644
|
||||
--- a/block/vmdk.c
|
||||
+++ b/block/vmdk.c
|
||||
@@ -2901,12 +2901,12 @@ static int vmdk_has_zero_init(BlockDriverState *bs)
|
||||
return 1;
|
||||
}
|
||||
|
||||
-static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent)
|
||||
+static VmdkExtentInfo *vmdk_get_extent_info(VmdkExtent *extent)
|
||||
{
|
||||
- ImageInfo *info = g_new0(ImageInfo, 1);
|
||||
+ VmdkExtentInfo *info = g_new0(VmdkExtentInfo, 1);
|
||||
|
||||
bdrv_refresh_filename(extent->file->bs);
|
||||
- *info = (ImageInfo){
|
||||
+ *info = (VmdkExtentInfo){
|
||||
.filename = g_strdup(extent->file->bs->filename),
|
||||
.format = g_strdup(extent->type),
|
||||
.virtual_size = extent->sectors * BDRV_SECTOR_SIZE,
|
||||
@@ -2985,7 +2985,7 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs,
|
||||
int i;
|
||||
BDRVVmdkState *s = bs->opaque;
|
||||
ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1);
|
||||
- ImageInfoList **tail;
|
||||
+ VmdkExtentInfoList **tail;
|
||||
|
||||
*spec_info = (ImageInfoSpecific){
|
||||
.type = IMAGE_INFO_SPECIFIC_KIND_VMDK,
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index f5d822cbd6..4b9365167f 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -124,7 +124,33 @@
|
||||
'create-type': 'str',
|
||||
'cid': 'int',
|
||||
'parent-cid': 'int',
|
||||
- 'extents': ['ImageInfo']
|
||||
+ 'extents': ['VmdkExtentInfo']
|
||||
+ } }
|
||||
+
|
||||
+##
|
||||
+# @VmdkExtentInfo:
|
||||
+#
|
||||
+# Information about a VMDK extent file
|
||||
+#
|
||||
+# @filename: Name of the extent file
|
||||
+#
|
||||
+# @format: Extent type (e.g. FLAT or SPARSE)
|
||||
+#
|
||||
+# @virtual-size: Number of bytes covered by this extent
|
||||
+#
|
||||
+# @cluster-size: Cluster size in bytes (for non-flat extents)
|
||||
+#
|
||||
+# @compressed: Whether this extent contains compressed data
|
||||
+#
|
||||
+# Since: 8.0
|
||||
+##
|
||||
+{ 'struct': 'VmdkExtentInfo',
|
||||
+ 'data': {
|
||||
+ 'filename': 'str',
|
||||
+ 'format': 'str',
|
||||
+ 'virtual-size': 'int',
|
||||
+ '*cluster-size': 'int',
|
||||
+ '*compressed': 'bool'
|
||||
} }
|
||||
|
||||
##
|
||||
@@ -5754,3 +5780,13 @@
|
||||
'data': { 'device': 'str', '*id': 'str', '*name': 'str'},
|
||||
'returns': 'SnapshotInfo',
|
||||
'allow-preconfig': true }
|
||||
+
|
||||
+##
|
||||
+# @DummyBlockCoreForceArrays:
|
||||
+#
|
||||
+# Not used by QMP; hack to let us use ImageInfoList internally
|
||||
+#
|
||||
+# Since: 8.0
|
||||
+##
|
||||
+{ 'struct': 'DummyBlockCoreForceArrays',
|
||||
+ 'data': { 'unused-image-info': ['ImageInfo'] } }
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,55 @@
|
||||
From 961bc392ee60743344236ddd247ab646a0eec914 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 07/21] checkpatch: add qemu_bh_new/aio_bh_new checks
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [3/13] e0473487f0e3186c42559a5c36a8650f27ab26ae (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit ef56ffbdd6b0605dc1e305611287b948c970e236
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:08 2023 -0400
|
||||
|
||||
checkpatch: add qemu_bh_new/aio_bh_new checks
|
||||
|
||||
Advise authors to use the _guarded versions of the APIs, instead.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Message-Id: <20230427211013.2994127-4-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
scripts/checkpatch.pl | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
|
||||
index d768171dcf..eeaec436eb 100755
|
||||
--- a/scripts/checkpatch.pl
|
||||
+++ b/scripts/checkpatch.pl
|
||||
@@ -2865,6 +2865,14 @@ sub process {
|
||||
if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) {
|
||||
ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr);
|
||||
}
|
||||
+# recommend qemu_bh_new_guarded instead of qemu_bh_new
|
||||
+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) {
|
||||
+ ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr);
|
||||
+ }
|
||||
+# recommend aio_bh_new_guarded instead of aio_bh_new
|
||||
+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) {
|
||||
+ ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr);
|
||||
+ }
|
||||
# check for module_init(), use category-specific init macros explicitly please
|
||||
if ($line =~ /^module_init\s*\(/) {
|
||||
ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,127 +0,0 @@
|
||||
From b886411a682b56bfe674f0a35d40c67c8e9dc87a Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 21 Feb 2023 16:22:17 -0500
|
||||
Subject: [PATCH 02/12] dma-helpers: prevent dma_blk_cb() vs dma_aio_cancel()
|
||||
race
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread
|
||||
RH-Bugzilla: 2155748
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
|
||||
RH-Commit: [2/3] eeeea43c25d8f4fa84591b05547fb77e4058abff (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
dma_blk_cb() only takes the AioContext lock around ->io_func(). That
|
||||
means the rest of dma_blk_cb() is not protected. In particular, the
|
||||
DMAAIOCB field accesses happen outside the lock.
|
||||
|
||||
There is a race when the main loop thread holds the AioContext lock and
|
||||
invokes scsi_device_purge_requests() -> bdrv_aio_cancel() ->
|
||||
dma_aio_cancel() while an IOThread executes dma_blk_cb(). The dbs->acb
|
||||
field determines how cancellation proceeds. If dma_aio_cancel() sees
|
||||
dbs->acb == NULL while dma_blk_cb() is still running, the request can be
|
||||
completed twice (-ECANCELED and the actual return value).
|
||||
|
||||
The following assertion can occur with virtio-scsi when an IOThread is
|
||||
used:
|
||||
|
||||
../hw/scsi/scsi-disk.c:368: scsi_dma_complete: Assertion `r->req.aiocb != NULL' failed.
|
||||
|
||||
Fix the race by holding the AioContext across dma_blk_cb(). Now
|
||||
dma_aio_cancel() under the AioContext lock will not see
|
||||
inconsistent/intermediate states.
|
||||
|
||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20230221212218.1378734-3-stefanha@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit abfcd2760b3e70727bbc0792221b8b98a733dc32)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
hw/scsi/scsi-disk.c | 4 +---
|
||||
softmmu/dma-helpers.c | 12 +++++++-----
|
||||
2 files changed, 8 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
|
||||
index 5327f93f4c..b12d8b0816 100644
|
||||
--- a/hw/scsi/scsi-disk.c
|
||||
+++ b/hw/scsi/scsi-disk.c
|
||||
@@ -354,13 +354,12 @@ done:
|
||||
scsi_req_unref(&r->req);
|
||||
}
|
||||
|
||||
+/* Called with AioContext lock held */
|
||||
static void scsi_dma_complete(void *opaque, int ret)
|
||||
{
|
||||
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
|
||||
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||
|
||||
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||
-
|
||||
assert(r->req.aiocb != NULL);
|
||||
r->req.aiocb = NULL;
|
||||
|
||||
@@ -370,7 +369,6 @@ static void scsi_dma_complete(void *opaque, int ret)
|
||||
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
|
||||
}
|
||||
scsi_dma_complete_noio(r, ret);
|
||||
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
|
||||
}
|
||||
|
||||
static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
|
||||
diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c
|
||||
index 7820fec54c..2463964805 100644
|
||||
--- a/softmmu/dma-helpers.c
|
||||
+++ b/softmmu/dma-helpers.c
|
||||
@@ -113,17 +113,19 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
|
||||
static void dma_blk_cb(void *opaque, int ret)
|
||||
{
|
||||
DMAAIOCB *dbs = (DMAAIOCB *)opaque;
|
||||
+ AioContext *ctx = dbs->ctx;
|
||||
dma_addr_t cur_addr, cur_len;
|
||||
void *mem;
|
||||
|
||||
trace_dma_blk_cb(dbs, ret);
|
||||
|
||||
+ aio_context_acquire(ctx);
|
||||
dbs->acb = NULL;
|
||||
dbs->offset += dbs->iov.size;
|
||||
|
||||
if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
|
||||
dma_complete(dbs, ret);
|
||||
- return;
|
||||
+ goto out;
|
||||
}
|
||||
dma_blk_unmap(dbs);
|
||||
|
||||
@@ -164,9 +166,9 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||
|
||||
if (dbs->iov.size == 0) {
|
||||
trace_dma_map_wait(dbs);
|
||||
- dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs);
|
||||
+ dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
|
||||
cpu_register_map_client(dbs->bh);
|
||||
- return;
|
||||
+ goto out;
|
||||
}
|
||||
|
||||
if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
|
||||
@@ -174,11 +176,11 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||
QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
|
||||
}
|
||||
|
||||
- aio_context_acquire(dbs->ctx);
|
||||
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
|
||||
dma_blk_cb, dbs, dbs->io_func_opaque);
|
||||
- aio_context_release(dbs->ctx);
|
||||
assert(dbs->acb);
|
||||
+out:
|
||||
+ aio_context_release(ctx);
|
||||
}
|
||||
|
||||
static void dma_aio_cancel(BlockAIOCB *acb)
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,61 +0,0 @@
|
||||
From 67bbeb056f75adc6c964468d876531ab68366fe0 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:24:36 -0500
|
||||
Subject: [PATCH 07/12] edu: add smp_mb__after_rmw()
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2175660
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [4/9] 2ad6fd6cb33fde39d2d017d94c0dde2152ad70c4 (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
|
||||
|
||||
commit 2482aeea4195ad84cf3d4e5b15b28ec5b420ed5a
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Thu Mar 2 11:16:13 2023 +0100
|
||||
|
||||
edu: add smp_mb__after_rmw()
|
||||
|
||||
Ensure ordering between clearing the COMPUTING flag and checking
|
||||
IRQFACT, and between setting the IRQFACT flag and checking
|
||||
COMPUTING. This ensures that no wakeups are lost.
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
hw/misc/edu.c | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
diff --git a/hw/misc/edu.c b/hw/misc/edu.c
|
||||
index e935c418d4..a1f8bc77e7 100644
|
||||
--- a/hw/misc/edu.c
|
||||
+++ b/hw/misc/edu.c
|
||||
@@ -267,6 +267,8 @@ static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val,
|
||||
case 0x20:
|
||||
if (val & EDU_STATUS_IRQFACT) {
|
||||
qatomic_or(&edu->status, EDU_STATUS_IRQFACT);
|
||||
+ /* Order check of the COMPUTING flag after setting IRQFACT. */
|
||||
+ smp_mb__after_rmw();
|
||||
} else {
|
||||
qatomic_and(&edu->status, ~EDU_STATUS_IRQFACT);
|
||||
}
|
||||
@@ -349,6 +351,9 @@ static void *edu_fact_thread(void *opaque)
|
||||
qemu_mutex_unlock(&edu->thr_mutex);
|
||||
qatomic_and(&edu->status, ~EDU_STATUS_COMPUTING);
|
||||
|
||||
+ /* Clear COMPUTING flag before checking IRQFACT. */
|
||||
+ smp_mb__after_rmw();
|
||||
+
|
||||
if (qatomic_read(&edu->status) & EDU_STATUS_IRQFACT) {
|
||||
qemu_mutex_lock_iothread();
|
||||
edu_raise_irq(edu, FACT_IRQ);
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,153 @@
|
||||
From 516bf44de08a13d97c08e210137078e642ce8e88 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Wed, 17 May 2023 17:28:32 +0200
|
||||
Subject: [PATCH 02/21] graph-lock: Disable locking for now
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 166: block/graph-lock: Disable locking for now
|
||||
RH-Bugzilla: 2186725
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [2/4] 39d42fb527aad0491a018743289de7b762108317 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
In QEMU 8.0, we've been seeing deadlocks in bdrv_graph_wrlock(). They
|
||||
come from callers that hold an AioContext lock, which is not allowed
|
||||
during polling. In theory, we could temporarily release the lock, but
|
||||
callers are inconsistent about whether they hold a lock, and if they do,
|
||||
some are also confused about which one they hold. While all of this is
|
||||
fixable, it's not trivial, and the best course of action for 8.0.1 is
|
||||
probably just disabling the graph locking code temporarily.
|
||||
|
||||
We don't currently rely on graph locking yet. It is supposed to replace
|
||||
the AioContext lock eventually to enable multiqueue support, but as long
|
||||
as we still have the AioContext lock, it is sufficient without the graph
|
||||
lock. Once the AioContext lock goes away, the deadlock doesn't exist any
|
||||
more either and this commit can be reverted. (Of course, it can also be
|
||||
reverted while the AioContext lock still exists if the callers have been
|
||||
fixed.)
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20230517152834.277483-2-kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 80fc5d260002432628710f8b0c7cfc7d9b97bb9d)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block/graph-lock.c | 24 ++++++++++++++++++++++++
|
||||
1 file changed, 24 insertions(+)
|
||||
|
||||
diff --git a/block/graph-lock.c b/block/graph-lock.c
|
||||
index 259a7a0bde..2490926c90 100644
|
||||
--- a/block/graph-lock.c
|
||||
+++ b/block/graph-lock.c
|
||||
@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock;
|
||||
/* Protects the list of aiocontext and orphaned_reader_count */
|
||||
static QemuMutex aio_context_list_lock;
|
||||
|
||||
+#if 0
|
||||
/* Written and read with atomic operations. */
|
||||
static int has_writer;
|
||||
+#endif
|
||||
|
||||
/*
|
||||
* A reader coroutine could move from an AioContext to another.
|
||||
@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx)
|
||||
g_free(ctx->bdrv_graph);
|
||||
}
|
||||
|
||||
+#if 0
|
||||
static uint32_t reader_count(void)
|
||||
{
|
||||
BdrvGraphRWlock *brdv_graph;
|
||||
@@ -105,10 +108,17 @@ static uint32_t reader_count(void)
|
||||
assert((int32_t)rd >= 0);
|
||||
return rd;
|
||||
}
|
||||
+#endif
|
||||
|
||||
void bdrv_graph_wrlock(void)
|
||||
{
|
||||
GLOBAL_STATE_CODE();
|
||||
+ /*
|
||||
+ * TODO Some callers hold an AioContext lock when this is called, which
|
||||
+ * causes deadlocks. Reenable once the AioContext locking is cleaned up (or
|
||||
+ * AioContext locks are gone).
|
||||
+ */
|
||||
+#if 0
|
||||
assert(!qatomic_read(&has_writer));
|
||||
|
||||
/* Make sure that constantly arriving new I/O doesn't cause starvation */
|
||||
@@ -139,11 +149,13 @@ void bdrv_graph_wrlock(void)
|
||||
} while (reader_count() >= 1);
|
||||
|
||||
bdrv_drain_all_end();
|
||||
+#endif
|
||||
}
|
||||
|
||||
void bdrv_graph_wrunlock(void)
|
||||
{
|
||||
GLOBAL_STATE_CODE();
|
||||
+#if 0
|
||||
QEMU_LOCK_GUARD(&aio_context_list_lock);
|
||||
assert(qatomic_read(&has_writer));
|
||||
|
||||
@@ -155,10 +167,13 @@ void bdrv_graph_wrunlock(void)
|
||||
|
||||
/* Wake up all coroutine that are waiting to read the graph */
|
||||
qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
void coroutine_fn bdrv_graph_co_rdlock(void)
|
||||
{
|
||||
+ /* TODO Reenable when wrlock is reenabled */
|
||||
+#if 0
|
||||
BdrvGraphRWlock *bdrv_graph;
|
||||
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
|
||||
|
||||
@@ -223,10 +238,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void)
|
||||
qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
|
||||
}
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
void coroutine_fn bdrv_graph_co_rdunlock(void)
|
||||
{
|
||||
+#if 0
|
||||
BdrvGraphRWlock *bdrv_graph;
|
||||
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
|
||||
|
||||
@@ -249,6 +266,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void)
|
||||
if (qatomic_read(&has_writer)) {
|
||||
aio_wait_kick();
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
void bdrv_graph_rdlock_main_loop(void)
|
||||
@@ -266,13 +284,19 @@ void bdrv_graph_rdunlock_main_loop(void)
|
||||
void assert_bdrv_graph_readable(void)
|
||||
{
|
||||
/* reader_count() is slow due to aio_context_list_lock lock contention */
|
||||
+ /* TODO Reenable when wrlock is reenabled */
|
||||
+#if 0
|
||||
#ifdef CONFIG_DEBUG_GRAPH_LOCK
|
||||
assert(qemu_in_main_thread() || reader_count());
|
||||
#endif
|
||||
+#endif
|
||||
}
|
||||
|
||||
void assert_bdrv_graph_writable(void)
|
||||
{
|
||||
assert(qemu_in_main_thread());
|
||||
+ /* TODO Reenable when wrlock is reenabled */
|
||||
+#if 0
|
||||
assert(qatomic_read(&has_writer));
|
||||
+#endif
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,40 @@
|
||||
From b4645e7682aa1bde6f89df0eff2a9de83720eecc Mon Sep 17 00:00:00 2001
|
||||
From: Ani Sinha <anisinha@redhat.com>
|
||||
Date: Tue, 2 May 2023 15:51:53 +0530
|
||||
Subject: [PATCH 3/3] hw/acpi: Mark acpi blobs as resizable on RHEL pc machines
|
||||
version 7.6 and above
|
||||
|
||||
RH-Author: Ani Sinha <None>
|
||||
RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3
|
||||
RH-Bugzilla: 1934134
|
||||
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Commit: [2/2] 95d443af6e75c569d89d04d028012c3c56c0c3a4 (anisinha/centos-qemu-kvm)
|
||||
|
||||
Please look at QEMU upstream commit
|
||||
1af507756bae7 ("hw/acpi: limit warning on acpi table size to pc machines older than version 2.3")
|
||||
This patch adapts the above change so that it applies to RHEL pc machines of
|
||||
version 7.6 and newer. These are the machine types that are currently supported
|
||||
in RHEL. Q35 machines are not affected.
|
||||
|
||||
Signed-off-by: Ani Sinha <anisinha@redhat.com>
|
||||
---
|
||||
hw/i386/pc_piix.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index 4d5880e249..6c7be628e1 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -914,6 +914,7 @@ static void pc_machine_rhel7_options(MachineClass *m)
|
||||
m->default_machine_opts = "firmware=bios-256k.bin,hpet=off";
|
||||
pcmc->default_nic_model = "e1000";
|
||||
pcmc->pci_root_uid = 0;
|
||||
+ pcmc->resizable_acpi_blob = true;
|
||||
m->default_display = "std";
|
||||
m->no_parallel = 1;
|
||||
m->numa_mem_supported = true;
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,101 @@
|
||||
From 3f70da88788c398877b8ded0b27689530385302b Mon Sep 17 00:00:00 2001
|
||||
From: Ani Sinha <anisinha@redhat.com>
|
||||
Date: Wed, 29 Mar 2023 10:27:26 +0530
|
||||
Subject: [PATCH 2/3] hw/acpi: limit warning on acpi table size to pc machines
|
||||
older than version 2.3
|
||||
|
||||
RH-Author: Ani Sinha <None>
|
||||
RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3
|
||||
RH-Bugzilla: 1934134
|
||||
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Commit: [1/2] 96c3b6d51e16734eb4e8de52635e0ca036964090 (anisinha/centos-qemu-kvm)
|
||||
|
||||
i440fx machine versions 2.3 and newer supports dynamic ram
|
||||
resizing. See commit a1666142db6233 ("acpi-build: make ROMs RAM blocks resizeable") .
|
||||
Currently supported all q35 machine types (versions 2.4 and newer) supports
|
||||
resizable RAM/ROM blocks.Therefore the warning generated when the ACPI table
|
||||
size exceeds a pre-defined value does not apply to those machine versions.
|
||||
Add a check limiting the warning message to only those machines that does not
|
||||
support expandable ram blocks (that is, i440fx machines with version 2.2
|
||||
and older).
|
||||
|
||||
Signed-off-by: Ani Sinha <anisinha@redhat.com>
|
||||
Message-Id: <20230329045726.14028-1-anisinha@redhat.com>
|
||||
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit 1af507756bae775028c27d30e602e2b9c72cd074)
|
||||
---
|
||||
hw/i386/acpi-build.c | 6 ++++--
|
||||
hw/i386/pc.c | 1 +
|
||||
hw/i386/pc_piix.c | 1 +
|
||||
include/hw/i386/pc.h | 3 +++
|
||||
4 files changed, 9 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
|
||||
index ec857a117e..9bc4d8a981 100644
|
||||
--- a/hw/i386/acpi-build.c
|
||||
+++ b/hw/i386/acpi-build.c
|
||||
@@ -2695,7 +2695,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
|
||||
int legacy_table_size =
|
||||
ROUND_UP(tables_blob->len - aml_len + legacy_aml_len,
|
||||
ACPI_BUILD_ALIGN_SIZE);
|
||||
- if (tables_blob->len > legacy_table_size) {
|
||||
+ if ((tables_blob->len > legacy_table_size) &&
|
||||
+ !pcmc->resizable_acpi_blob) {
|
||||
/* Should happen only with PCI bridges and -M pc-i440fx-2.0. */
|
||||
warn_report("ACPI table size %u exceeds %d bytes,"
|
||||
" migration may not work",
|
||||
@@ -2706,7 +2707,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
|
||||
g_array_set_size(tables_blob, legacy_table_size);
|
||||
} else {
|
||||
/* Make sure we have a buffer in case we need to resize the tables. */
|
||||
- if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) {
|
||||
+ if ((tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) &&
|
||||
+ !pcmc->resizable_acpi_blob) {
|
||||
/* As of QEMU 2.1, this fires with 160 VCPUs and 255 memory slots. */
|
||||
warn_report("ACPI table size %u exceeds %d bytes,"
|
||||
" migration may not work",
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index f216922cee..7db5a2348f 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -2092,6 +2092,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
|
||||
pcmc->acpi_data_size = 0x20000 + 0x8000;
|
||||
pcmc->pvh_enabled = true;
|
||||
pcmc->kvmclock_create_always = true;
|
||||
+ pcmc->resizable_acpi_blob = true;
|
||||
assert(!mc->get_hotplug_handler);
|
||||
mc->async_pf_vmexit_disable = false;
|
||||
mc->get_hotplug_handler = pc_get_hotplug_handler;
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index fc704d783f..4d5880e249 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -750,6 +750,7 @@ static void pc_i440fx_2_2_machine_options(MachineClass *m)
|
||||
compat_props_add(m->compat_props, hw_compat_2_2, hw_compat_2_2_len);
|
||||
compat_props_add(m->compat_props, pc_compat_2_2, pc_compat_2_2_len);
|
||||
pcmc->rsdp_in_ram = false;
|
||||
+ pcmc->resizable_acpi_blob = false;
|
||||
}
|
||||
|
||||
DEFINE_I440FX_MACHINE(v2_2, "pc-i440fx-2.2", pc_compat_2_2_fn,
|
||||
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
|
||||
index d218ad1628..2f514d13d8 100644
|
||||
--- a/include/hw/i386/pc.h
|
||||
+++ b/include/hw/i386/pc.h
|
||||
@@ -130,6 +130,9 @@ struct PCMachineClass {
|
||||
|
||||
/* create kvmclock device even when KVM PV features are not exposed */
|
||||
bool kvmclock_create_always;
|
||||
+
|
||||
+ /* resizable acpi blob compat */
|
||||
+ bool resizable_acpi_blob;
|
||||
};
|
||||
|
||||
#define TYPE_PC_MACHINE "generic-pc-machine"
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,60 @@
|
||||
From 7b57aec372fc238cbaafe86557f9fb4b560895b1 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Tue, 27 Jun 2023 20:20:09 +1000
|
||||
Subject: [PATCH 2/6] hw/arm: Validate cluster and NUMA node boundary
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines
|
||||
RH-Bugzilla: 2171363
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [2/3] fcac7ea85d9f73613989903c642fc1bf6c51946b
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363
|
||||
|
||||
There are two ARM machines where NUMA is aware: 'virt' and 'sbsa-ref'.
|
||||
Both of them are required to follow cluster-NUMA-node boundary. To
|
||||
enable the validation to warn about the irregular configuration where
|
||||
multiple CPUs in one cluster have been associated with different NUMA
|
||||
nodes.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Message-Id: <20230509002739.18388-3-gshan@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit fecff672351ace5e39adf7dbcf7a8ee748b201cb)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/sbsa-ref.c | 2 ++
|
||||
hw/arm/virt.c | 2 ++
|
||||
2 files changed, 4 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
|
||||
index 0b93558dde..efb380e7c8 100644
|
||||
--- a/hw/arm/sbsa-ref.c
|
||||
+++ b/hw/arm/sbsa-ref.c
|
||||
@@ -864,6 +864,8 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data)
|
||||
mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids;
|
||||
mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props;
|
||||
mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id;
|
||||
+ /* platform instead of architectural choice */
|
||||
+ mc->cpu_cluster_has_numa_boundary = true;
|
||||
}
|
||||
|
||||
static const TypeInfo sbsa_ref_info = {
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 9be53e9355..df6a0231bc 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3083,6 +3083,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
|
||||
mc->smp_props.clusters_supported = true;
|
||||
mc->auto_enable_numa_with_memhp = true;
|
||||
mc->auto_enable_numa_with_memdev = true;
|
||||
+ /* platform instead of architectural choice */
|
||||
+ mc->cpu_cluster_has_numa_boundary = true;
|
||||
mc->default_ram_id = "mach-virt.ram";
|
||||
|
||||
object_class_property_add(oc, "acpi", "OnOffAuto",
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,166 @@
|
||||
From a3412036477e8c91e0b71fcd91de4e24a9904077 Mon Sep 17 00:00:00 2001
|
||||
From: Peter Maydell <peter.maydell@linaro.org>
|
||||
Date: Tue, 25 Jul 2023 10:56:51 +0100
|
||||
Subject: [PATCH 09/14] hw/arm/smmu: Handle big-endian hosts correctly
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes
|
||||
RH-Bugzilla: 2229133
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
RH-Commit: [3/3] df9c8d228b25273e0c4927a10b21e66fb4bef5f0 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133
|
||||
|
||||
The implementation of the SMMUv3 has multiple places where it reads a
|
||||
data structure from the guest and directly operates on it without
|
||||
doing a guest-to-host endianness conversion. Since all SMMU data
|
||||
structures are little-endian, this means that the SMMU doesn't work
|
||||
on a big-endian host. In particular, this causes the Avocado test
|
||||
machine_aarch64_virt.py:Aarch64VirtMachine.test_alpine_virt_tcg_gic_max
|
||||
to fail on an s390x host.
|
||||
|
||||
Add appropriate byte-swapping on reads and writes of guest in-memory
|
||||
data structures so that the device works correctly on big-endian
|
||||
hosts.
|
||||
|
||||
As part of this we constrain queue_read() to operate only on Cmd
|
||||
structs and queue_write() on Evt structs, because in practice these
|
||||
are the only data structures the two functions are used with, and we
|
||||
need to know what the data structure is to be able to byte-swap its
|
||||
parts correctly.
|
||||
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Tested-by: Thomas Huth <thuth@redhat.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Message-id: 20230717132641.764660-1-peter.maydell@linaro.org
|
||||
Cc: qemu-stable@nongnu.org
|
||||
(cherry picked from commit c6445544d4cea2628fbad3bad09f3d3a03c749d3)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/arm/smmu-common.c | 3 +--
|
||||
hw/arm/smmuv3.c | 39 +++++++++++++++++++++++++++++++--------
|
||||
2 files changed, 32 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
|
||||
index e7f1c1f219..daa02ce798 100644
|
||||
--- a/hw/arm/smmu-common.c
|
||||
+++ b/hw/arm/smmu-common.c
|
||||
@@ -192,8 +192,7 @@ static int get_pte(dma_addr_t baseaddr, uint32_t index, uint64_t *pte,
|
||||
dma_addr_t addr = baseaddr + index * sizeof(*pte);
|
||||
|
||||
/* TODO: guarantee 64-bit single-copy atomicity */
|
||||
- ret = dma_memory_read(&address_space_memory, addr, pte, sizeof(*pte),
|
||||
- MEMTXATTRS_UNSPECIFIED);
|
||||
+ ret = ldq_le_dma(&address_space_memory, addr, pte, MEMTXATTRS_UNSPECIFIED);
|
||||
|
||||
if (ret != MEMTX_OK) {
|
||||
info->type = SMMU_PTW_ERR_WALK_EABT;
|
||||
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
|
||||
index 270c80b665..cfb56725a6 100644
|
||||
--- a/hw/arm/smmuv3.c
|
||||
+++ b/hw/arm/smmuv3.c
|
||||
@@ -98,20 +98,34 @@ static void smmuv3_write_gerrorn(SMMUv3State *s, uint32_t new_gerrorn)
|
||||
trace_smmuv3_write_gerrorn(toggled & pending, s->gerrorn);
|
||||
}
|
||||
|
||||
-static inline MemTxResult queue_read(SMMUQueue *q, void *data)
|
||||
+static inline MemTxResult queue_read(SMMUQueue *q, Cmd *cmd)
|
||||
{
|
||||
dma_addr_t addr = Q_CONS_ENTRY(q);
|
||||
+ MemTxResult ret;
|
||||
+ int i;
|
||||
|
||||
- return dma_memory_read(&address_space_memory, addr, data, q->entry_size,
|
||||
- MEMTXATTRS_UNSPECIFIED);
|
||||
+ ret = dma_memory_read(&address_space_memory, addr, cmd, sizeof(Cmd),
|
||||
+ MEMTXATTRS_UNSPECIFIED);
|
||||
+ if (ret != MEMTX_OK) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+ for (i = 0; i < ARRAY_SIZE(cmd->word); i++) {
|
||||
+ le32_to_cpus(&cmd->word[i]);
|
||||
+ }
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
-static MemTxResult queue_write(SMMUQueue *q, void *data)
|
||||
+static MemTxResult queue_write(SMMUQueue *q, Evt *evt_in)
|
||||
{
|
||||
dma_addr_t addr = Q_PROD_ENTRY(q);
|
||||
MemTxResult ret;
|
||||
+ Evt evt = *evt_in;
|
||||
+ int i;
|
||||
|
||||
- ret = dma_memory_write(&address_space_memory, addr, data, q->entry_size,
|
||||
+ for (i = 0; i < ARRAY_SIZE(evt.word); i++) {
|
||||
+ cpu_to_le32s(&evt.word[i]);
|
||||
+ }
|
||||
+ ret = dma_memory_write(&address_space_memory, addr, &evt, sizeof(Evt),
|
||||
MEMTXATTRS_UNSPECIFIED);
|
||||
if (ret != MEMTX_OK) {
|
||||
return ret;
|
||||
@@ -291,7 +305,7 @@ static void smmuv3_init_regs(SMMUv3State *s)
|
||||
static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf,
|
||||
SMMUEventInfo *event)
|
||||
{
|
||||
- int ret;
|
||||
+ int ret, i;
|
||||
|
||||
trace_smmuv3_get_ste(addr);
|
||||
/* TODO: guarantee 64-bit single-copy atomicity */
|
||||
@@ -304,6 +318,9 @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf,
|
||||
event->u.f_ste_fetch.addr = addr;
|
||||
return -EINVAL;
|
||||
}
|
||||
+ for (i = 0; i < ARRAY_SIZE(buf->word); i++) {
|
||||
+ le32_to_cpus(&buf->word[i]);
|
||||
+ }
|
||||
return 0;
|
||||
|
||||
}
|
||||
@@ -313,7 +330,7 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
|
||||
CD *buf, SMMUEventInfo *event)
|
||||
{
|
||||
dma_addr_t addr = STE_CTXPTR(ste);
|
||||
- int ret;
|
||||
+ int ret, i;
|
||||
|
||||
trace_smmuv3_get_cd(addr);
|
||||
/* TODO: guarantee 64-bit single-copy atomicity */
|
||||
@@ -326,6 +343,9 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
|
||||
event->u.f_ste_fetch.addr = addr;
|
||||
return -EINVAL;
|
||||
}
|
||||
+ for (i = 0; i < ARRAY_SIZE(buf->word); i++) {
|
||||
+ le32_to_cpus(&buf->word[i]);
|
||||
+ }
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -407,7 +427,7 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
|
||||
return -EINVAL;
|
||||
}
|
||||
if (s->features & SMMU_FEATURE_2LVL_STE) {
|
||||
- int l1_ste_offset, l2_ste_offset, max_l2_ste, span;
|
||||
+ int l1_ste_offset, l2_ste_offset, max_l2_ste, span, i;
|
||||
dma_addr_t l1ptr, l2ptr;
|
||||
STEDesc l1std;
|
||||
|
||||
@@ -431,6 +451,9 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
|
||||
event->u.f_ste_fetch.addr = l1ptr;
|
||||
return -EINVAL;
|
||||
}
|
||||
+ for (i = 0; i < ARRAY_SIZE(l1std.word); i++) {
|
||||
+ le32_to_cpus(&l1std.word[i]);
|
||||
+ }
|
||||
|
||||
span = L1STD_SPAN(&l1std);
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,169 +0,0 @@
|
||||
From 4ab2aff624908e49b099f00609875f4d03e9e1ec Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Wed, 21 Dec 2022 08:48:45 +0800
|
||||
Subject: [PATCH 6/8] hw/arm/virt: Add 'compact-highmem' property
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
|
||||
RH-Bugzilla: 2113840
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [6/8] 781506f3445493f05b511547370b6d88ef092457
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
|
||||
|
||||
After the improvement to high memory region address assignment is
|
||||
applied, the memory layout can be changed, introducing possible
|
||||
migration breakage. For example, VIRT_HIGH_PCIE_MMIO memory region
|
||||
is disabled or enabled when the optimization is applied or not, with
|
||||
the following configuration. The configuration is only achievable by
|
||||
modifying the source code until more properties are added to allow
|
||||
users selectively disable those high memory regions.
|
||||
|
||||
pa_bits = 40;
|
||||
vms->highmem_redists = false;
|
||||
vms->highmem_ecam = false;
|
||||
vms->highmem_mmio = true;
|
||||
|
||||
# qemu-system-aarch64 -accel kvm -cpu host \
|
||||
-machine virt-7.2,compact-highmem={on, off} \
|
||||
-m 4G,maxmem=511G -monitor stdio
|
||||
|
||||
Region compact-highmem=off compact-highmem=on
|
||||
----------------------------------------------------------------
|
||||
MEM [1GB 512GB] [1GB 512GB]
|
||||
HIGH_GIC_REDISTS2 [512GB 512GB+64MB] [disabled]
|
||||
HIGH_PCIE_ECAM [512GB+256MB 512GB+512MB] [disabled]
|
||||
HIGH_PCIE_MMIO [disabled] [512GB 1TB]
|
||||
|
||||
In order to keep backwords compatibility, we need to disable the
|
||||
optimization on machine, which is virt-7.1 or ealier than it. It
|
||||
means the optimization is enabled by default from virt-7.2. Besides,
|
||||
'compact-highmem' property is added so that the optimization can be
|
||||
explicitly enabled or disabled on all machine types by users.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Reviewed-by: Marc Zyngier <maz@kernel.org>
|
||||
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
Message-id: 20221029224307.138822-7-gshan@redhat.com
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit f40408a9fe5d1db70a75a33d2b26c8af8a5d57b0)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Conflicts:
|
||||
hw/arm/virt.c
|
||||
Comment out the handlers of property 'compact-highmem' since
|
||||
the property isn't exposed.
|
||||
---
|
||||
docs/system/arm/virt.rst | 4 ++++
|
||||
hw/arm/virt.c | 34 ++++++++++++++++++++++++++++++++++
|
||||
include/hw/arm/virt.h | 1 +
|
||||
3 files changed, 39 insertions(+)
|
||||
|
||||
diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
|
||||
index 20442ea2c1..4454706392 100644
|
||||
--- a/docs/system/arm/virt.rst
|
||||
+++ b/docs/system/arm/virt.rst
|
||||
@@ -94,6 +94,10 @@ highmem
|
||||
address space above 32 bits. The default is ``on`` for machine types
|
||||
later than ``virt-2.12``.
|
||||
|
||||
+compact-highmem
|
||||
+ Set ``on``/``off`` to enable/disable the compact layout for high memory regions.
|
||||
+ The default is ``on`` for machine types later than ``virt-7.2``.
|
||||
+
|
||||
gic-version
|
||||
Specify the version of the Generic Interrupt Controller (GIC) to provide.
|
||||
Valid values are:
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 6896e0ca0f..6087511ae9 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -216,6 +216,12 @@ static const MemMapEntry base_memmap[] = {
|
||||
* Note the extended_memmap is sized so that it eventually also includes the
|
||||
* base_memmap entries (VIRT_HIGH_GIC_REDIST2 index is greater than the last
|
||||
* index of base_memmap).
|
||||
+ *
|
||||
+ * The memory map for these Highmem IO Regions can be in legacy or compact
|
||||
+ * layout, depending on 'compact-highmem' property. With legacy layout, the
|
||||
+ * PA space for one specific region is always reserved, even if the region
|
||||
+ * has been disabled or doesn't fit into the PA space. However, the PA space
|
||||
+ * for the region won't be reserved in these circumstances with compact layout.
|
||||
*/
|
||||
static MemMapEntry extended_memmap[] = {
|
||||
/* Additional 64 MB redist region (can contain up to 512 redistributors) */
|
||||
@@ -2400,6 +2406,22 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp)
|
||||
vms->highmem = value;
|
||||
}
|
||||
|
||||
+#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
+static bool virt_get_compact_highmem(Object *obj, Error **errp)
|
||||
+{
|
||||
+ VirtMachineState *vms = VIRT_MACHINE(obj);
|
||||
+
|
||||
+ return vms->highmem_compact;
|
||||
+}
|
||||
+
|
||||
+static void virt_set_compact_highmem(Object *obj, bool value, Error **errp)
|
||||
+{
|
||||
+ VirtMachineState *vms = VIRT_MACHINE(obj);
|
||||
+
|
||||
+ vms->highmem_compact = value;
|
||||
+}
|
||||
+#endif /* disabled for RHEL */
|
||||
+
|
||||
static bool virt_get_its(Object *obj, Error **errp)
|
||||
{
|
||||
VirtMachineState *vms = VIRT_MACHINE(obj);
|
||||
@@ -3023,6 +3045,13 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
|
||||
"Set on/off to enable/disable using "
|
||||
"physical address space above 32 bits");
|
||||
|
||||
+ object_class_property_add_bool(oc, "compact-highmem",
|
||||
+ virt_get_compact_highmem,
|
||||
+ virt_set_compact_highmem);
|
||||
+ object_class_property_set_description(oc, "compact-highmem",
|
||||
+ "Set on/off to enable/disable compact "
|
||||
+ "layout for high memory regions");
|
||||
+
|
||||
object_class_property_add_str(oc, "gic-version", virt_get_gic_version,
|
||||
virt_set_gic_version);
|
||||
object_class_property_set_description(oc, "gic-version",
|
||||
@@ -3107,6 +3136,7 @@ static void virt_instance_init(Object *obj)
|
||||
|
||||
/* High memory is enabled by default */
|
||||
vms->highmem = true;
|
||||
+ vms->highmem_compact = !vmc->no_highmem_compact;
|
||||
vms->gic_version = VIRT_GIC_VERSION_NOSEL;
|
||||
|
||||
vms->highmem_ecam = !vmc->no_highmem_ecam;
|
||||
@@ -3176,8 +3206,12 @@ DEFINE_VIRT_MACHINE_AS_LATEST(7, 2)
|
||||
|
||||
static void virt_machine_7_1_options(MachineClass *mc)
|
||||
{
|
||||
+ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
|
||||
+
|
||||
virt_machine_7_2_options(mc);
|
||||
compat_props_add(mc->compat_props, hw_compat_7_1, hw_compat_7_1_len);
|
||||
+ /* Compact layout for high memory regions was introduced with 7.2 */
|
||||
+ vmc->no_highmem_compact = true;
|
||||
}
|
||||
DEFINE_VIRT_MACHINE(7, 1)
|
||||
|
||||
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
|
||||
index 15bd291311..85e7d61868 100644
|
||||
--- a/include/hw/arm/virt.h
|
||||
+++ b/include/hw/arm/virt.h
|
||||
@@ -125,6 +125,7 @@ struct VirtMachineClass {
|
||||
bool no_pmu;
|
||||
bool claim_edge_triggered_timers;
|
||||
bool smbios_old_sys_ver;
|
||||
+ bool no_highmem_compact;
|
||||
bool no_highmem_ecam;
|
||||
bool no_ged; /* Machines < 4.2 have no support for ACPI GED device */
|
||||
bool kvm_no_adjvtime;
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,179 +0,0 @@
|
||||
From 30e86a7c4fbcdc95b74bcb2a15745cb221783091 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Wed, 21 Dec 2022 08:48:45 +0800
|
||||
Subject: [PATCH 7/8] hw/arm/virt: Add properties to disable high memory
|
||||
regions
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
|
||||
RH-Bugzilla: 2113840
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [7/8] 16f8762393b447a590b31c9e4d8d3c58c6bc9fa8
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
|
||||
|
||||
The 3 high memory regions are usually enabled by default, but they may
|
||||
be not used. For example, VIRT_HIGH_GIC_REDIST2 isn't needed by GICv2.
|
||||
This leads to waste in the PA space.
|
||||
|
||||
Add properties ("highmem-redists", "highmem-ecam", "highmem-mmio") to
|
||||
allow users selectively disable them if needed. After that, the high
|
||||
memory region for GICv3 or GICv4 redistributor can be disabled by user,
|
||||
the number of maximal supported CPUs needs to be calculated based on
|
||||
'vms->highmem_redists'. The follow-up error message is also improved
|
||||
to indicate if the high memory region for GICv3 and GICv4 has been
|
||||
enabled or not.
|
||||
|
||||
Suggested-by: Marc Zyngier <maz@kernel.org>
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Reviewed-by: Marc Zyngier <maz@kernel.org>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Message-id: 20221029224307.138822-8-gshan@redhat.com
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit 6a48c64eec355ab1aff694eb4522d07a8e461368)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Conflicts:
|
||||
hw/arm/virt.c
|
||||
Comment out the handlers of the property 'highmem-redists',
|
||||
'highmem-ecam' and 'highmem-mmio' since they aren't exposed.
|
||||
---
|
||||
docs/system/arm/virt.rst | 13 +++++++
|
||||
hw/arm/virt.c | 75 ++++++++++++++++++++++++++++++++++++++--
|
||||
2 files changed, 86 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
|
||||
index 4454706392..188a4f211f 100644
|
||||
--- a/docs/system/arm/virt.rst
|
||||
+++ b/docs/system/arm/virt.rst
|
||||
@@ -98,6 +98,19 @@ compact-highmem
|
||||
Set ``on``/``off`` to enable/disable the compact layout for high memory regions.
|
||||
The default is ``on`` for machine types later than ``virt-7.2``.
|
||||
|
||||
+highmem-redists
|
||||
+ Set ``on``/``off`` to enable/disable the high memory region for GICv3 or
|
||||
+ GICv4 redistributor. The default is ``on``. Setting this to ``off`` will
|
||||
+ limit the maximum number of CPUs when GICv3 or GICv4 is used.
|
||||
+
|
||||
+highmem-ecam
|
||||
+ Set ``on``/``off`` to enable/disable the high memory region for PCI ECAM.
|
||||
+ The default is ``on`` for machine types later than ``virt-3.0``.
|
||||
+
|
||||
+highmem-mmio
|
||||
+ Set ``on``/``off`` to enable/disable the high memory region for PCI MMIO.
|
||||
+ The default is ``on``.
|
||||
+
|
||||
gic-version
|
||||
Specify the version of the Generic Interrupt Controller (GIC) to provide.
|
||||
Valid values are:
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 6087511ae9..304fa0d6e7 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -2142,14 +2142,20 @@ static void machvirt_init(MachineState *machine)
|
||||
if (vms->gic_version == VIRT_GIC_VERSION_2) {
|
||||
virt_max_cpus = GIC_NCPU;
|
||||
} else {
|
||||
- virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST) +
|
||||
- virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2);
|
||||
+ virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST);
|
||||
+ if (vms->highmem_redists) {
|
||||
+ virt_max_cpus += virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2);
|
||||
+ }
|
||||
}
|
||||
|
||||
if (max_cpus > virt_max_cpus) {
|
||||
error_report("Number of SMP CPUs requested (%d) exceeds max CPUs "
|
||||
"supported by machine 'mach-virt' (%d)",
|
||||
max_cpus, virt_max_cpus);
|
||||
+ if (vms->gic_version != VIRT_GIC_VERSION_2 && !vms->highmem_redists) {
|
||||
+ error_printf("Try 'highmem-redists=on' for more CPUs\n");
|
||||
+ }
|
||||
+
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@@ -2420,6 +2426,49 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp)
|
||||
|
||||
vms->highmem_compact = value;
|
||||
}
|
||||
+
|
||||
+static bool virt_get_highmem_redists(Object *obj, Error **errp)
|
||||
+{
|
||||
+ VirtMachineState *vms = VIRT_MACHINE(obj);
|
||||
+
|
||||
+ return vms->highmem_redists;
|
||||
+}
|
||||
+
|
||||
+static void virt_set_highmem_redists(Object *obj, bool value, Error **errp)
|
||||
+{
|
||||
+ VirtMachineState *vms = VIRT_MACHINE(obj);
|
||||
+
|
||||
+ vms->highmem_redists = value;
|
||||
+}
|
||||
+
|
||||
+static bool virt_get_highmem_ecam(Object *obj, Error **errp)
|
||||
+{
|
||||
+ VirtMachineState *vms = VIRT_MACHINE(obj);
|
||||
+
|
||||
+ return vms->highmem_ecam;
|
||||
+}
|
||||
+
|
||||
+static void virt_set_highmem_ecam(Object *obj, bool value, Error **errp)
|
||||
+{
|
||||
+ VirtMachineState *vms = VIRT_MACHINE(obj);
|
||||
+
|
||||
+ vms->highmem_ecam = value;
|
||||
+}
|
||||
+
|
||||
+static bool virt_get_highmem_mmio(Object *obj, Error **errp)
|
||||
+{
|
||||
+ VirtMachineState *vms = VIRT_MACHINE(obj);
|
||||
+
|
||||
+ return vms->highmem_mmio;
|
||||
+}
|
||||
+
|
||||
+static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp)
|
||||
+{
|
||||
+ VirtMachineState *vms = VIRT_MACHINE(obj);
|
||||
+
|
||||
+ vms->highmem_mmio = value;
|
||||
+}
|
||||
+
|
||||
#endif /* disabled for RHEL */
|
||||
|
||||
static bool virt_get_its(Object *obj, Error **errp)
|
||||
@@ -3052,6 +3101,28 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
|
||||
"Set on/off to enable/disable compact "
|
||||
"layout for high memory regions");
|
||||
|
||||
+ object_class_property_add_bool(oc, "highmem-redists",
|
||||
+ virt_get_highmem_redists,
|
||||
+ virt_set_highmem_redists);
|
||||
+ object_class_property_set_description(oc, "highmem-redists",
|
||||
+ "Set on/off to enable/disable high "
|
||||
+ "memory region for GICv3 or GICv4 "
|
||||
+ "redistributor");
|
||||
+
|
||||
+ object_class_property_add_bool(oc, "highmem-ecam",
|
||||
+ virt_get_highmem_ecam,
|
||||
+ virt_set_highmem_ecam);
|
||||
+ object_class_property_set_description(oc, "highmem-ecam",
|
||||
+ "Set on/off to enable/disable high "
|
||||
+ "memory region for PCI ECAM");
|
||||
+
|
||||
+ object_class_property_add_bool(oc, "highmem-mmio",
|
||||
+ virt_get_highmem_mmio,
|
||||
+ virt_set_highmem_mmio);
|
||||
+ object_class_property_set_description(oc, "highmem-mmio",
|
||||
+ "Set on/off to enable/disable high "
|
||||
+ "memory region for PCI MMIO");
|
||||
+
|
||||
object_class_property_add_str(oc, "gic-version", virt_get_gic_version,
|
||||
virt_set_gic_version);
|
||||
object_class_property_set_description(oc, "gic-version",
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,51 +0,0 @@
|
||||
From 969ea1ff46b52c5fe6d87f2eeb1625871a2dfb2a Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Wed, 21 Dec 2022 08:48:45 +0800
|
||||
Subject: [PATCH 8/8] hw/arm/virt: Enable compat high memory region address
|
||||
assignment for 9.2.0 machine
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
|
||||
RH-Bugzilla: 2113840
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [8/8] beda1791c0c35dce5c669efd47685302b8468032
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
|
||||
Upstream: RHEL only
|
||||
|
||||
The compact high memory region address assignment is enabled for 9.2.0,
|
||||
but it's kept as disabled for 9.0.0, to keep the backwards compatibility
|
||||
on 9.0.0. Note that these newly added properties ('compact-highmem',
|
||||
'highmem-redists', 'highmem-ecam', and 'highmem-mmio') in the upstream
|
||||
aren't exposed for the downstream.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 304fa0d6e7..e41c0b462c 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3581,6 +3581,7 @@ static void rhel_virt_instance_init(Object *obj)
|
||||
|
||||
/* High memory is enabled by default */
|
||||
vms->highmem = true;
|
||||
+ vms->highmem_compact = !vmc->no_highmem_compact;
|
||||
vms->gic_version = VIRT_GIC_VERSION_NOSEL;
|
||||
|
||||
vms->highmem_ecam = !vmc->no_highmem_ecam;
|
||||
@@ -3659,5 +3660,7 @@ static void rhel900_virt_options(MachineClass *mc)
|
||||
|
||||
/* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */
|
||||
vmc->no_tcg_lpa2 = true;
|
||||
+ /* Compact layout for high memory regions was introduced with 9.2.0 */
|
||||
+ vmc->no_highmem_compact = true;
|
||||
}
|
||||
DEFINE_RHEL_MACHINE(9, 0, 0)
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,112 +0,0 @@
|
||||
From 1c7fad3776a14ca35b24dc2fdb262d4ddf40d6eb Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Wed, 21 Dec 2022 08:48:45 +0800
|
||||
Subject: [PATCH 5/8] hw/arm/virt: Improve high memory region address
|
||||
assignment
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
|
||||
RH-Bugzilla: 2113840
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [5/8] 4d77fa78b5258a1bd8d30405cec5ba3311d42f92
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
|
||||
|
||||
There are three high memory regions, which are VIRT_HIGH_REDIST2,
|
||||
VIRT_HIGH_PCIE_ECAM and VIRT_HIGH_PCIE_MMIO. Their base addresses
|
||||
are floating on highest RAM address. However, they can be disabled
|
||||
in several cases.
|
||||
|
||||
(1) One specific high memory region is likely to be disabled by
|
||||
code by toggling vms->highmem_{redists, ecam, mmio}.
|
||||
|
||||
(2) VIRT_HIGH_PCIE_ECAM region is disabled on machine, which is
|
||||
'virt-2.12' or ealier than it.
|
||||
|
||||
(3) VIRT_HIGH_PCIE_ECAM region is disabled when firmware is loaded
|
||||
on 32-bits system.
|
||||
|
||||
(4) One specific high memory region is disabled when it breaks the
|
||||
PA space limit.
|
||||
|
||||
The current implementation of virt_set_{memmap, high_memmap}() isn't
|
||||
optimized because the high memory region's PA space is always reserved,
|
||||
regardless of whatever the actual state in the corresponding
|
||||
vms->highmem_{redists, ecam, mmio} flag. In the code, 'base' and
|
||||
'vms->highest_gpa' are always increased for case (1), (2) and (3).
|
||||
It's unnecessary since the assigned PA space for the disabled high
|
||||
memory region won't be used afterwards.
|
||||
|
||||
Improve the address assignment for those three high memory region by
|
||||
skipping the address assignment for one specific high memory region if
|
||||
it has been disabled in case (1), (2) and (3). The memory layout may
|
||||
be changed after the improvement is applied, which leads to potential
|
||||
migration breakage. So 'vms->highmem_compact' is added to control if
|
||||
the improvement should be applied. For now, 'vms->highmem_compact' is
|
||||
set to false, meaning that we don't have memory layout change until it
|
||||
becomes configurable through property 'compact-highmem' in next patch.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Reviewed-by: Marc Zyngier <maz@kernel.org>
|
||||
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
Message-id: 20221029224307.138822-6-gshan@redhat.com
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit 4a4ff9edc6a8fdc76082af5b41b059217138c09b)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 15 ++++++++++-----
|
||||
include/hw/arm/virt.h | 1 +
|
||||
2 files changed, 11 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 6e3b9fc060..6896e0ca0f 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -1768,18 +1768,23 @@ static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
vms->memmap[i].size = region_size;
|
||||
|
||||
/*
|
||||
- * Check each device to see if they fit in the PA space,
|
||||
- * moving highest_gpa as we go.
|
||||
+ * Check each device to see if it fits in the PA space,
|
||||
+ * moving highest_gpa as we go. For compatibility, move
|
||||
+ * highest_gpa for disabled fitting devices as well, if
|
||||
+ * the compact layout has been disabled.
|
||||
*
|
||||
* For each device that doesn't fit, disable it.
|
||||
*/
|
||||
fits = (region_base + region_size) <= BIT_ULL(pa_bits);
|
||||
- if (fits) {
|
||||
- vms->highest_gpa = region_base + region_size - 1;
|
||||
+ *region_enabled &= fits;
|
||||
+ if (vms->highmem_compact && !*region_enabled) {
|
||||
+ continue;
|
||||
}
|
||||
|
||||
- *region_enabled &= fits;
|
||||
base = region_base + region_size;
|
||||
+ if (fits) {
|
||||
+ vms->highest_gpa = base - 1;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
|
||||
index 22b54ec510..15bd291311 100644
|
||||
--- a/include/hw/arm/virt.h
|
||||
+++ b/include/hw/arm/virt.h
|
||||
@@ -144,6 +144,7 @@ struct VirtMachineState {
|
||||
PFlashCFI01 *flash[2];
|
||||
bool secure;
|
||||
bool highmem;
|
||||
+ bool highmem_compact;
|
||||
bool highmem_ecam;
|
||||
bool highmem_mmio;
|
||||
bool highmem_redists;
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,82 +0,0 @@
|
||||
From 305a369fd18f29914bf96cc181add532d435d8ed Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Wed, 21 Dec 2022 08:48:45 +0800
|
||||
Subject: [PATCH 3/8] hw/arm/virt: Introduce variable region_base in
|
||||
virt_set_high_memmap()
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
|
||||
RH-Bugzilla: 2113840
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [3/8] 15de90df217d680ccc858b679898b3993e1c050a
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
|
||||
|
||||
This introduces variable 'region_base' for the base address of the
|
||||
specific high memory region. It's the preparatory work to optimize
|
||||
high memory region address assignment.
|
||||
|
||||
No functional change intended.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Reviewed-by: Marc Zyngier <maz@kernel.org>
|
||||
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
Message-id: 20221029224307.138822-4-gshan@redhat.com
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit fa245799b9407fc7b561da185b3d889df5e16a88)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 12 ++++++------
|
||||
1 file changed, 6 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index ca098d40b8..ddcf7ee2f8 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -1739,15 +1739,15 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
|
||||
static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
hwaddr base, int pa_bits)
|
||||
{
|
||||
- hwaddr region_size;
|
||||
+ hwaddr region_base, region_size;
|
||||
bool fits;
|
||||
int i;
|
||||
|
||||
for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
|
||||
+ region_base = ROUND_UP(base, extended_memmap[i].size);
|
||||
region_size = extended_memmap[i].size;
|
||||
|
||||
- base = ROUND_UP(base, region_size);
|
||||
- vms->memmap[i].base = base;
|
||||
+ vms->memmap[i].base = region_base;
|
||||
vms->memmap[i].size = region_size;
|
||||
|
||||
/*
|
||||
@@ -1756,9 +1756,9 @@ static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
*
|
||||
* For each device that doesn't fit, disable it.
|
||||
*/
|
||||
- fits = (base + region_size) <= BIT_ULL(pa_bits);
|
||||
+ fits = (region_base + region_size) <= BIT_ULL(pa_bits);
|
||||
if (fits) {
|
||||
- vms->highest_gpa = base + region_size - 1;
|
||||
+ vms->highest_gpa = region_base + region_size - 1;
|
||||
}
|
||||
|
||||
switch (i) {
|
||||
@@ -1773,7 +1773,7 @@ static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
break;
|
||||
}
|
||||
|
||||
- base += region_size;
|
||||
+ base = region_base + region_size;
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,95 +0,0 @@
|
||||
From a2ddd68c8365ec602db6b2a9cf83bb441ca701cc Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Wed, 21 Dec 2022 08:48:45 +0800
|
||||
Subject: [PATCH 4/8] hw/arm/virt: Introduce virt_get_high_memmap_enabled()
|
||||
helper
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
|
||||
RH-Bugzilla: 2113840
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [4/8] 65524de2fc106600bbaff641caa8c4f2f8027114
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
|
||||
|
||||
This introduces virt_get_high_memmap_enabled() helper, which returns
|
||||
the pointer to vms->highmem_{redists, ecam, mmio}. The pointer will
|
||||
be used in the subsequent patches.
|
||||
|
||||
No functional change intended.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Reviewed-by: Marc Zyngier <maz@kernel.org>
|
||||
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
Message-id: 20221029224307.138822-5-gshan@redhat.com
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit a5cb1350b19a5c2a58ab4edddf609ed429c13085)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 32 +++++++++++++++++++-------------
|
||||
1 file changed, 19 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index ddcf7ee2f8..6e3b9fc060 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -1736,14 +1736,31 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
|
||||
return arm_cpu_mp_affinity(idx, clustersz);
|
||||
}
|
||||
|
||||
+static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms,
|
||||
+ int index)
|
||||
+{
|
||||
+ bool *enabled_array[] = {
|
||||
+ &vms->highmem_redists,
|
||||
+ &vms->highmem_ecam,
|
||||
+ &vms->highmem_mmio,
|
||||
+ };
|
||||
+
|
||||
+ assert(ARRAY_SIZE(extended_memmap) - VIRT_LOWMEMMAP_LAST ==
|
||||
+ ARRAY_SIZE(enabled_array));
|
||||
+ assert(index - VIRT_LOWMEMMAP_LAST < ARRAY_SIZE(enabled_array));
|
||||
+
|
||||
+ return enabled_array[index - VIRT_LOWMEMMAP_LAST];
|
||||
+}
|
||||
+
|
||||
static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
hwaddr base, int pa_bits)
|
||||
{
|
||||
hwaddr region_base, region_size;
|
||||
- bool fits;
|
||||
+ bool *region_enabled, fits;
|
||||
int i;
|
||||
|
||||
for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
|
||||
+ region_enabled = virt_get_high_memmap_enabled(vms, i);
|
||||
region_base = ROUND_UP(base, extended_memmap[i].size);
|
||||
region_size = extended_memmap[i].size;
|
||||
|
||||
@@ -1761,18 +1778,7 @@ static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
vms->highest_gpa = region_base + region_size - 1;
|
||||
}
|
||||
|
||||
- switch (i) {
|
||||
- case VIRT_HIGH_GIC_REDIST2:
|
||||
- vms->highmem_redists &= fits;
|
||||
- break;
|
||||
- case VIRT_HIGH_PCIE_ECAM:
|
||||
- vms->highmem_ecam &= fits;
|
||||
- break;
|
||||
- case VIRT_HIGH_PCIE_MMIO:
|
||||
- vms->highmem_mmio &= fits;
|
||||
- break;
|
||||
- }
|
||||
-
|
||||
+ *region_enabled &= fits;
|
||||
base = region_base + region_size;
|
||||
}
|
||||
}
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,130 +0,0 @@
|
||||
From 5dff87c5ea60054709021025c9513ec259433ce2 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Wed, 21 Dec 2022 08:48:45 +0800
|
||||
Subject: [PATCH 1/8] hw/arm/virt: Introduce virt_set_high_memmap() helper
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
|
||||
RH-Bugzilla: 2113840
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/8] 5f6ba5af7a2c21d8473c58e088ee99b11336c673
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
|
||||
|
||||
This introduces virt_set_high_memmap() helper. The logic of high
|
||||
memory region address assignment is moved to the helper. The intention
|
||||
is to make the subsequent optimization for high memory region address
|
||||
assignment easier.
|
||||
|
||||
No functional change intended.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Reviewed-by: Marc Zyngier <maz@kernel.org>
|
||||
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
Message-id: 20221029224307.138822-2-gshan@redhat.com
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit 4af6b6edece5ef273d29972d53547f823d2bc1c0)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 74 ++++++++++++++++++++++++++++-----------------------
|
||||
1 file changed, 41 insertions(+), 33 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index bf18838b87..bea5f54720 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -1736,6 +1736,46 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
|
||||
return arm_cpu_mp_affinity(idx, clustersz);
|
||||
}
|
||||
|
||||
+static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
+ hwaddr base, int pa_bits)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
|
||||
+ hwaddr size = extended_memmap[i].size;
|
||||
+ bool fits;
|
||||
+
|
||||
+ base = ROUND_UP(base, size);
|
||||
+ vms->memmap[i].base = base;
|
||||
+ vms->memmap[i].size = size;
|
||||
+
|
||||
+ /*
|
||||
+ * Check each device to see if they fit in the PA space,
|
||||
+ * moving highest_gpa as we go.
|
||||
+ *
|
||||
+ * For each device that doesn't fit, disable it.
|
||||
+ */
|
||||
+ fits = (base + size) <= BIT_ULL(pa_bits);
|
||||
+ if (fits) {
|
||||
+ vms->highest_gpa = base + size - 1;
|
||||
+ }
|
||||
+
|
||||
+ switch (i) {
|
||||
+ case VIRT_HIGH_GIC_REDIST2:
|
||||
+ vms->highmem_redists &= fits;
|
||||
+ break;
|
||||
+ case VIRT_HIGH_PCIE_ECAM:
|
||||
+ vms->highmem_ecam &= fits;
|
||||
+ break;
|
||||
+ case VIRT_HIGH_PCIE_MMIO:
|
||||
+ vms->highmem_mmio &= fits;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ base += size;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
|
||||
{
|
||||
MachineState *ms = MACHINE(vms);
|
||||
@@ -1791,39 +1831,7 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
|
||||
/* We know for sure that at least the memory fits in the PA space */
|
||||
vms->highest_gpa = memtop - 1;
|
||||
|
||||
- for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
|
||||
- hwaddr size = extended_memmap[i].size;
|
||||
- bool fits;
|
||||
-
|
||||
- base = ROUND_UP(base, size);
|
||||
- vms->memmap[i].base = base;
|
||||
- vms->memmap[i].size = size;
|
||||
-
|
||||
- /*
|
||||
- * Check each device to see if they fit in the PA space,
|
||||
- * moving highest_gpa as we go.
|
||||
- *
|
||||
- * For each device that doesn't fit, disable it.
|
||||
- */
|
||||
- fits = (base + size) <= BIT_ULL(pa_bits);
|
||||
- if (fits) {
|
||||
- vms->highest_gpa = base + size - 1;
|
||||
- }
|
||||
-
|
||||
- switch (i) {
|
||||
- case VIRT_HIGH_GIC_REDIST2:
|
||||
- vms->highmem_redists &= fits;
|
||||
- break;
|
||||
- case VIRT_HIGH_PCIE_ECAM:
|
||||
- vms->highmem_ecam &= fits;
|
||||
- break;
|
||||
- case VIRT_HIGH_PCIE_MMIO:
|
||||
- vms->highmem_mmio &= fits;
|
||||
- break;
|
||||
- }
|
||||
-
|
||||
- base += size;
|
||||
- }
|
||||
+ virt_set_high_memmap(vms, base, pa_bits);
|
||||
|
||||
if (device_memory_size > 0) {
|
||||
ms->device_memory = g_malloc0(sizeof(*ms->device_memory));
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,83 +0,0 @@
|
||||
From bd5b7edbf8f4425f4b4e0d49a00cbdd48d9c6f48 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Wed, 21 Dec 2022 08:48:45 +0800
|
||||
Subject: [PATCH 2/8] hw/arm/virt: Rename variable size to region_size in
|
||||
virt_set_high_memmap()
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
|
||||
RH-Bugzilla: 2113840
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/8] 1cadf1b00686cceb45821a58fdcb509bc5da335d
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
|
||||
|
||||
This renames variable 'size' to 'region_size' in virt_set_high_memmap().
|
||||
Its counterpart ('region_base') will be introduced in next patch.
|
||||
|
||||
No functional change intended.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Reviewed-by: Marc Zyngier <maz@kernel.org>
|
||||
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
Message-id: 20221029224307.138822-3-gshan@redhat.com
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit 370bea9d1c78796eec235ed6cb4310f489931a62)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 15 ++++++++-------
|
||||
1 file changed, 8 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index bea5f54720..ca098d40b8 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -1739,15 +1739,16 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
|
||||
static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
hwaddr base, int pa_bits)
|
||||
{
|
||||
+ hwaddr region_size;
|
||||
+ bool fits;
|
||||
int i;
|
||||
|
||||
for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
|
||||
- hwaddr size = extended_memmap[i].size;
|
||||
- bool fits;
|
||||
+ region_size = extended_memmap[i].size;
|
||||
|
||||
- base = ROUND_UP(base, size);
|
||||
+ base = ROUND_UP(base, region_size);
|
||||
vms->memmap[i].base = base;
|
||||
- vms->memmap[i].size = size;
|
||||
+ vms->memmap[i].size = region_size;
|
||||
|
||||
/*
|
||||
* Check each device to see if they fit in the PA space,
|
||||
@@ -1755,9 +1756,9 @@ static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
*
|
||||
* For each device that doesn't fit, disable it.
|
||||
*/
|
||||
- fits = (base + size) <= BIT_ULL(pa_bits);
|
||||
+ fits = (base + region_size) <= BIT_ULL(pa_bits);
|
||||
if (fits) {
|
||||
- vms->highest_gpa = base + size - 1;
|
||||
+ vms->highest_gpa = base + region_size - 1;
|
||||
}
|
||||
|
||||
switch (i) {
|
||||
@@ -1772,7 +1773,7 @@ static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
break;
|
||||
}
|
||||
|
||||
- base += size;
|
||||
+ base += region_size;
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,41 @@
|
||||
From 022529f6d0ee306da857825c72a98bf7ddf5de22 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Tue, 27 Jun 2023 20:20:09 +1000
|
||||
Subject: [PATCH 3/6] hw/arm/virt: Validate cluster and NUMA node boundary for
|
||||
RHEL machines
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines
|
||||
RH-Bugzilla: 2171363
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [3/3] a396c499259b566861ca007b01f8539bf6113711
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363
|
||||
Upstream Status: RHEL only
|
||||
|
||||
Set mc->cpu_cluster_has_numa_boundary to true so that the boundary of
|
||||
CPU cluster and NUMA node will be validated for 'virt-rhel*' machines.
|
||||
A warning message will be printed if the boundary is broken.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index df6a0231bc..faf68488d5 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3530,6 +3530,8 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data)
|
||||
mc->smp_props.clusters_supported = true;
|
||||
mc->auto_enable_numa_with_memhp = true;
|
||||
mc->auto_enable_numa_with_memdev = true;
|
||||
+ /* platform instead of architectural choice */
|
||||
+ mc->cpu_cluster_has_numa_boundary = true;
|
||||
mc->default_ram_id = "mach-virt.ram";
|
||||
|
||||
object_class_property_add(oc, "acpi", "OnOffAuto",
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,44 @@
|
||||
From 491cf9e251026d135f315b7fe0d8771841f06e9f Mon Sep 17 00:00:00 2001
|
||||
From: Leonardo Bras <leobras@redhat.com>
|
||||
Date: Tue, 25 Jul 2023 15:34:45 -0300
|
||||
Subject: [PATCH 8/9] hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type
|
||||
<= pc-q35-rhel9.2.0
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Leonardo Brás <leobras@redhat.com>
|
||||
RH-MergeRequest: 192: hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type <= pc-q35-rhel9.2.0
|
||||
RH-Bugzilla: 2223691
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: quintela1 <quintela@redhat.com>
|
||||
RH-Commit: [1/1] e57816f8ad15a9ce5f342b061c103ae011ec1223 (LeoBras/centos-qemu-kvm)
|
||||
|
||||
This is a downstream-only patch to that sets off the property
|
||||
x-pcie-err-unc-mask for machine types <= pc-q35-rhel9.2.0, allowing
|
||||
live migrations to RHEL9.2 happen successfully.
|
||||
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2223691
|
||||
Fixes: 293a34b4be ("hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine
|
||||
type < 8.0")
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
---
|
||||
hw/core/machine.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 5ea52317b9..6f5117669d 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -62,6 +62,8 @@ GlobalProperty hw_compat_rhel_9_2[] = {
|
||||
{ "virtio-mem", "x-early-migration", "false" },
|
||||
/* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
{ "migration", "x-preempt-pre-7-2", "true" },
|
||||
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
+ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" },
|
||||
};
|
||||
const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2);
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,118 @@
|
||||
From 3ac01bb90da12538898f95b2fb4e7f6bc1557eb3 Mon Sep 17 00:00:00 2001
|
||||
From: Leonardo Bras <leobras@redhat.com>
|
||||
Date: Tue, 2 May 2023 21:27:02 -0300
|
||||
Subject: [PATCH 18/21] hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine
|
||||
type < 8.0
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Leonardo Brás <leobras@redhat.com>
|
||||
RH-MergeRequest: 170: hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0
|
||||
RH-Bugzilla: 2189423
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/1] ad62dd5a8567f386770577513c00a0bf36bd3df1 (LeoBras/centos-qemu-kvm)
|
||||
|
||||
Since it's implementation on v8.0.0-rc0, having the PCI_ERR_UNCOR_MASK
|
||||
set for machine types < 8.0 will cause migration to fail if the target
|
||||
QEMU version is < 8.0.0 :
|
||||
|
||||
qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10a read: 40 device: 0 cmask: ff wmask: 0 w1cmask:0
|
||||
qemu-system-x86_64: Failed to load PCIDevice:config
|
||||
qemu-system-x86_64: Failed to load e1000e:parent_obj
|
||||
qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:02.0/e1000e'
|
||||
qemu-system-x86_64: load of migration failed: Invalid argument
|
||||
|
||||
The above test migrated a 7.2 machine type from QEMU master to QEMU 7.2.0,
|
||||
with this cmdline:
|
||||
|
||||
./qemu-system-x86_64 -M pc-q35-7.2 [-incoming XXX]
|
||||
|
||||
In order to fix this, property x-pcie-err-unc-mask was introduced to
|
||||
control when PCI_ERR_UNCOR_MASK is enabled. This property is enabled by
|
||||
default, but is disabled if machine type <= 7.2.
|
||||
|
||||
Fixes: 010746ae1d ("hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register")
|
||||
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
Message-Id: <20230503002701.854329-1-leobras@redhat.com>
|
||||
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Reviewed-by: Juan Quintela <quintela@redhat.com>
|
||||
Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1576
|
||||
Tested-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit 5ed3dabe57dd9f4c007404345e5f5bf0e347317f)
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
---
|
||||
hw/core/machine.c | 1 +
|
||||
hw/pci/pci.c | 2 ++
|
||||
hw/pci/pcie_aer.c | 11 +++++++----
|
||||
include/hw/pci/pci.h | 2 ++
|
||||
4 files changed, 12 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 0e0120b7f2..c28702b690 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -43,6 +43,7 @@ GlobalProperty hw_compat_7_2[] = {
|
||||
{ "e1000e", "migrate-timadj", "off" },
|
||||
{ "virtio-mem", "x-early-migration", "false" },
|
||||
{ "migration", "x-preempt-pre-7-2", "true" },
|
||||
+ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" },
|
||||
};
|
||||
const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
|
||||
|
||||
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
|
||||
index def5000e7b..8ad4349e96 100644
|
||||
--- a/hw/pci/pci.c
|
||||
+++ b/hw/pci/pci.c
|
||||
@@ -79,6 +79,8 @@ static Property pci_props[] = {
|
||||
DEFINE_PROP_STRING("failover_pair_id", PCIDevice,
|
||||
failover_pair_id),
|
||||
DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0),
|
||||
+ DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
|
||||
+ QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
|
||||
DEFINE_PROP_END_OF_LIST()
|
||||
};
|
||||
|
||||
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
|
||||
index 103667c368..374d593ead 100644
|
||||
--- a/hw/pci/pcie_aer.c
|
||||
+++ b/hw/pci/pcie_aer.c
|
||||
@@ -112,10 +112,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset,
|
||||
|
||||
pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
|
||||
PCI_ERR_UNC_SUPPORTED);
|
||||
- pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK,
|
||||
- PCI_ERR_UNC_MASK_DEFAULT);
|
||||
- pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
|
||||
- PCI_ERR_UNC_SUPPORTED);
|
||||
+
|
||||
+ if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) {
|
||||
+ pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK,
|
||||
+ PCI_ERR_UNC_MASK_DEFAULT);
|
||||
+ pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
|
||||
+ PCI_ERR_UNC_SUPPORTED);
|
||||
+ }
|
||||
|
||||
pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
|
||||
PCI_ERR_UNC_SEVERITY_DEFAULT);
|
||||
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
|
||||
index d5a40cd058..6dc6742fc4 100644
|
||||
--- a/include/hw/pci/pci.h
|
||||
+++ b/include/hw/pci/pci.h
|
||||
@@ -207,6 +207,8 @@ enum {
|
||||
QEMU_PCIE_EXTCAP_INIT = (1 << QEMU_PCIE_EXTCAP_INIT_BITNR),
|
||||
#define QEMU_PCIE_CXL_BITNR 10
|
||||
QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR),
|
||||
+#define QEMU_PCIE_ERR_UNC_MASK_BITNR 11
|
||||
+ QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
|
||||
};
|
||||
|
||||
typedef struct PCIINTxRoute {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,470 @@
|
||||
From d1b7a9b25c0df9016cd8e93d40837314b1a81d70 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 08/21] hw: replace most qemu_bh_new calls with
|
||||
qemu_bh_new_guarded
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [4/13] bcbc67dd0023aee2b3a342665237daa83b183c7b (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit f63192b0544af5d3e4d5edfd85ab520fcf671377
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:09 2023 -0400
|
||||
|
||||
hw: replace most qemu_bh_new calls with qemu_bh_new_guarded
|
||||
|
||||
This protects devices from bh->mmio reentrancy issues.
|
||||
|
||||
Thanks: Thomas Huth <thuth@redhat.com> for diagnosing OS X test failure.
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Reviewed-by: Paul Durrant <paul@xen.org>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Message-Id: <20230427211013.2994127-5-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/9pfs/xen-9p-backend.c | 5 ++++-
|
||||
hw/block/dataplane/virtio-blk.c | 3 ++-
|
||||
hw/block/dataplane/xen-block.c | 5 +++--
|
||||
hw/char/virtio-serial-bus.c | 3 ++-
|
||||
hw/display/qxl.c | 9 ++++++---
|
||||
hw/display/virtio-gpu.c | 6 ++++--
|
||||
hw/ide/ahci.c | 3 ++-
|
||||
hw/ide/ahci_internal.h | 1 +
|
||||
hw/ide/core.c | 4 +++-
|
||||
hw/misc/imx_rngc.c | 6 ++++--
|
||||
hw/misc/macio/mac_dbdma.c | 2 +-
|
||||
hw/net/virtio-net.c | 3 ++-
|
||||
hw/nvme/ctrl.c | 6 ++++--
|
||||
hw/scsi/mptsas.c | 3 ++-
|
||||
hw/scsi/scsi-bus.c | 3 ++-
|
||||
hw/scsi/vmw_pvscsi.c | 3 ++-
|
||||
hw/usb/dev-uas.c | 3 ++-
|
||||
hw/usb/hcd-dwc2.c | 3 ++-
|
||||
hw/usb/hcd-ehci.c | 3 ++-
|
||||
hw/usb/hcd-uhci.c | 2 +-
|
||||
hw/usb/host-libusb.c | 6 ++++--
|
||||
hw/usb/redirect.c | 6 ++++--
|
||||
hw/usb/xen-usb.c | 3 ++-
|
||||
hw/virtio/virtio-balloon.c | 5 +++--
|
||||
hw/virtio/virtio-crypto.c | 3 ++-
|
||||
25 files changed, 66 insertions(+), 33 deletions(-)
|
||||
|
||||
diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
|
||||
index 74f3a05f88..0e266c552b 100644
|
||||
--- a/hw/9pfs/xen-9p-backend.c
|
||||
+++ b/hw/9pfs/xen-9p-backend.c
|
||||
@@ -61,6 +61,7 @@ typedef struct Xen9pfsDev {
|
||||
|
||||
int num_rings;
|
||||
Xen9pfsRing *rings;
|
||||
+ MemReentrancyGuard mem_reentrancy_guard;
|
||||
} Xen9pfsDev;
|
||||
|
||||
static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev);
|
||||
@@ -443,7 +444,9 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
|
||||
xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data +
|
||||
XEN_FLEX_RING_SIZE(ring_order);
|
||||
|
||||
- xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]);
|
||||
+ xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh,
|
||||
+ &xen_9pdev->rings[i],
|
||||
+ &xen_9pdev->mem_reentrancy_guard);
|
||||
xen_9pdev->rings[i].out_cons = 0;
|
||||
xen_9pdev->rings[i].out_size = 0;
|
||||
xen_9pdev->rings[i].inprogress = false;
|
||||
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
|
||||
index b28d81737e..a6202997ee 100644
|
||||
--- a/hw/block/dataplane/virtio-blk.c
|
||||
+++ b/hw/block/dataplane/virtio-blk.c
|
||||
@@ -127,7 +127,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
|
||||
} else {
|
||||
s->ctx = qemu_get_aio_context();
|
||||
}
|
||||
- s->bh = aio_bh_new(s->ctx, notify_guest_bh, s);
|
||||
+ s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s,
|
||||
+ &DEVICE(vdev)->mem_reentrancy_guard);
|
||||
s->batch_notify_vqs = bitmap_new(conf->num_queues);
|
||||
|
||||
*dataplane = s;
|
||||
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
|
||||
index 734da42ea7..d8bc39d359 100644
|
||||
--- a/hw/block/dataplane/xen-block.c
|
||||
+++ b/hw/block/dataplane/xen-block.c
|
||||
@@ -633,8 +633,9 @@ XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev,
|
||||
} else {
|
||||
dataplane->ctx = qemu_get_aio_context();
|
||||
}
|
||||
- dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh,
|
||||
- dataplane);
|
||||
+ dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh,
|
||||
+ dataplane,
|
||||
+ &DEVICE(xendev)->mem_reentrancy_guard);
|
||||
|
||||
return dataplane;
|
||||
}
|
||||
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
|
||||
index 7d4601cb5d..dd619f0731 100644
|
||||
--- a/hw/char/virtio-serial-bus.c
|
||||
+++ b/hw/char/virtio-serial-bus.c
|
||||
@@ -985,7 +985,8 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp)
|
||||
return;
|
||||
}
|
||||
|
||||
- port->bh = qemu_bh_new(flush_queued_data_bh, port);
|
||||
+ port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
port->elem = NULL;
|
||||
}
|
||||
|
||||
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
|
||||
index 80ce1e9a93..f1c0eb7dfc 100644
|
||||
--- a/hw/display/qxl.c
|
||||
+++ b/hw/display/qxl.c
|
||||
@@ -2201,11 +2201,14 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp)
|
||||
|
||||
qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl);
|
||||
|
||||
- qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl);
|
||||
+ qxl->update_irq = qemu_bh_new_guarded(qxl_update_irq_bh, qxl,
|
||||
+ &DEVICE(qxl)->mem_reentrancy_guard);
|
||||
qxl_reset_state(qxl);
|
||||
|
||||
- qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl);
|
||||
- qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd);
|
||||
+ qxl->update_area_bh = qemu_bh_new_guarded(qxl_render_update_area_bh, qxl,
|
||||
+ &DEVICE(qxl)->mem_reentrancy_guard);
|
||||
+ qxl->ssd.cursor_bh = qemu_bh_new_guarded(qemu_spice_cursor_refresh_bh, &qxl->ssd,
|
||||
+ &DEVICE(qxl)->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static void qxl_realize_primary(PCIDevice *dev, Error **errp)
|
||||
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
|
||||
index 5e15c79b94..66ac9b6cc5 100644
|
||||
--- a/hw/display/virtio-gpu.c
|
||||
+++ b/hw/display/virtio-gpu.c
|
||||
@@ -1339,8 +1339,10 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
|
||||
|
||||
g->ctrl_vq = virtio_get_queue(vdev, 0);
|
||||
g->cursor_vq = virtio_get_queue(vdev, 1);
|
||||
- g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g);
|
||||
- g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g);
|
||||
+ g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g,
|
||||
+ &qdev->mem_reentrancy_guard);
|
||||
+ g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g,
|
||||
+ &qdev->mem_reentrancy_guard);
|
||||
QTAILQ_INIT(&g->reslist);
|
||||
QTAILQ_INIT(&g->cmdq);
|
||||
QTAILQ_INIT(&g->fenceq);
|
||||
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
|
||||
index 55902e1df7..4e76d6b191 100644
|
||||
--- a/hw/ide/ahci.c
|
||||
+++ b/hw/ide/ahci.c
|
||||
@@ -1509,7 +1509,8 @@ static void ahci_cmd_done(const IDEDMA *dma)
|
||||
ahci_write_fis_d2h(ad);
|
||||
|
||||
if (ad->port_regs.cmd_issue && !ad->check_bh) {
|
||||
- ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad);
|
||||
+ ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad,
|
||||
+ &ad->mem_reentrancy_guard);
|
||||
qemu_bh_schedule(ad->check_bh);
|
||||
}
|
||||
}
|
||||
diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h
|
||||
index 303fcd7235..2480455372 100644
|
||||
--- a/hw/ide/ahci_internal.h
|
||||
+++ b/hw/ide/ahci_internal.h
|
||||
@@ -321,6 +321,7 @@ struct AHCIDevice {
|
||||
bool init_d2h_sent;
|
||||
AHCICmdHdr *cur_cmd;
|
||||
NCQTransferState ncq_tfs[AHCI_MAX_CMDS];
|
||||
+ MemReentrancyGuard mem_reentrancy_guard;
|
||||
};
|
||||
|
||||
struct AHCIPCIState {
|
||||
diff --git a/hw/ide/core.c b/hw/ide/core.c
|
||||
index 45d14a25e9..de48ff9f86 100644
|
||||
--- a/hw/ide/core.c
|
||||
+++ b/hw/ide/core.c
|
||||
@@ -513,6 +513,7 @@ BlockAIOCB *ide_issue_trim(
|
||||
BlockCompletionFunc *cb, void *cb_opaque, void *opaque)
|
||||
{
|
||||
IDEState *s = opaque;
|
||||
+ IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master;
|
||||
TrimAIOCB *iocb;
|
||||
|
||||
/* Paired with a decrement in ide_trim_bh_cb() */
|
||||
@@ -520,7 +521,8 @@ BlockAIOCB *ide_issue_trim(
|
||||
|
||||
iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque);
|
||||
iocb->s = s;
|
||||
- iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
|
||||
+ iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
iocb->ret = 0;
|
||||
iocb->qiov = qiov;
|
||||
iocb->i = -1;
|
||||
diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c
|
||||
index 632c03779c..082c6980ad 100644
|
||||
--- a/hw/misc/imx_rngc.c
|
||||
+++ b/hw/misc/imx_rngc.c
|
||||
@@ -228,8 +228,10 @@ static void imx_rngc_realize(DeviceState *dev, Error **errp)
|
||||
sysbus_init_mmio(sbd, &s->iomem);
|
||||
|
||||
sysbus_init_irq(sbd, &s->irq);
|
||||
- s->self_test_bh = qemu_bh_new(imx_rngc_self_test, s);
|
||||
- s->seed_bh = qemu_bh_new(imx_rngc_seed, s);
|
||||
+ s->self_test_bh = qemu_bh_new_guarded(imx_rngc_self_test, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
+ s->seed_bh = qemu_bh_new_guarded(imx_rngc_seed, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static void imx_rngc_reset(DeviceState *dev)
|
||||
diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c
|
||||
index 43bb1f56ba..80a789f32b 100644
|
||||
--- a/hw/misc/macio/mac_dbdma.c
|
||||
+++ b/hw/misc/macio/mac_dbdma.c
|
||||
@@ -914,7 +914,7 @@ static void mac_dbdma_realize(DeviceState *dev, Error **errp)
|
||||
{
|
||||
DBDMAState *s = MAC_DBDMA(dev);
|
||||
|
||||
- s->bh = qemu_bh_new(DBDMA_run_bh, s);
|
||||
+ s->bh = qemu_bh_new_guarded(DBDMA_run_bh, s, &dev->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static void mac_dbdma_class_init(ObjectClass *oc, void *data)
|
||||
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
|
||||
index 53e1c32643..447f669921 100644
|
||||
--- a/hw/net/virtio-net.c
|
||||
+++ b/hw/net/virtio-net.c
|
||||
@@ -2917,7 +2917,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index)
|
||||
n->vqs[index].tx_vq =
|
||||
virtio_add_queue(vdev, n->net_conf.tx_queue_size,
|
||||
virtio_net_handle_tx_bh);
|
||||
- n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
|
||||
+ n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
|
||||
+ &DEVICE(vdev)->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
n->vqs[index].tx_waiting = 0;
|
||||
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
|
||||
index ac24eeb5ed..e5a468975e 100644
|
||||
--- a/hw/nvme/ctrl.c
|
||||
+++ b/hw/nvme/ctrl.c
|
||||
@@ -4607,7 +4607,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
|
||||
QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry);
|
||||
}
|
||||
|
||||
- sq->bh = qemu_bh_new(nvme_process_sq, sq);
|
||||
+ sq->bh = qemu_bh_new_guarded(nvme_process_sq, sq,
|
||||
+ &DEVICE(sq->ctrl)->mem_reentrancy_guard);
|
||||
|
||||
if (n->dbbuf_enabled) {
|
||||
sq->db_addr = n->dbbuf_dbs + (sqid << 3);
|
||||
@@ -5253,7 +5254,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
|
||||
}
|
||||
}
|
||||
n->cq[cqid] = cq;
|
||||
- cq->bh = qemu_bh_new(nvme_post_cqes, cq);
|
||||
+ cq->bh = qemu_bh_new_guarded(nvme_post_cqes, cq,
|
||||
+ &DEVICE(cq->ctrl)->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
|
||||
diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c
|
||||
index c485da792c..3de288b454 100644
|
||||
--- a/hw/scsi/mptsas.c
|
||||
+++ b/hw/scsi/mptsas.c
|
||||
@@ -1322,7 +1322,8 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp)
|
||||
}
|
||||
s->max_devices = MPTSAS_NUM_PORTS;
|
||||
|
||||
- s->request_bh = qemu_bh_new(mptsas_fetch_requests, s);
|
||||
+ s->request_bh = qemu_bh_new_guarded(mptsas_fetch_requests, s,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
|
||||
scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info);
|
||||
}
|
||||
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
|
||||
index c97176110c..3c20b47ad0 100644
|
||||
--- a/hw/scsi/scsi-bus.c
|
||||
+++ b/hw/scsi/scsi-bus.c
|
||||
@@ -193,7 +193,8 @@ static void scsi_dma_restart_cb(void *opaque, bool running, RunState state)
|
||||
AioContext *ctx = blk_get_aio_context(s->conf.blk);
|
||||
/* The reference is dropped in scsi_dma_restart_bh.*/
|
||||
object_ref(OBJECT(s));
|
||||
- s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s);
|
||||
+ s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s,
|
||||
+ &DEVICE(s)->mem_reentrancy_guard);
|
||||
qemu_bh_schedule(s->bh);
|
||||
}
|
||||
}
|
||||
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
|
||||
index fa76696855..4de34536e9 100644
|
||||
--- a/hw/scsi/vmw_pvscsi.c
|
||||
+++ b/hw/scsi/vmw_pvscsi.c
|
||||
@@ -1184,7 +1184,8 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp)
|
||||
pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET);
|
||||
}
|
||||
|
||||
- s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s);
|
||||
+ s->completion_worker = qemu_bh_new_guarded(pvscsi_process_completion_queue, s,
|
||||
+ &DEVICE(pci_dev)->mem_reentrancy_guard);
|
||||
|
||||
scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info);
|
||||
/* override default SCSI bus hotplug-handler, with pvscsi's one */
|
||||
diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
|
||||
index 88f99c05d5..f013ded91e 100644
|
||||
--- a/hw/usb/dev-uas.c
|
||||
+++ b/hw/usb/dev-uas.c
|
||||
@@ -937,7 +937,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp)
|
||||
|
||||
QTAILQ_INIT(&uas->results);
|
||||
QTAILQ_INIT(&uas->requests);
|
||||
- uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas);
|
||||
+ uas->status_bh = qemu_bh_new_guarded(usb_uas_send_status_bh, uas,
|
||||
+ &d->mem_reentrancy_guard);
|
||||
|
||||
dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE);
|
||||
scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info);
|
||||
diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c
|
||||
index 8755e9cbb0..a0c4e782b2 100644
|
||||
--- a/hw/usb/hcd-dwc2.c
|
||||
+++ b/hw/usb/hcd-dwc2.c
|
||||
@@ -1364,7 +1364,8 @@ static void dwc2_realize(DeviceState *dev, Error **errp)
|
||||
s->fi = USB_FRMINTVL - 1;
|
||||
s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s);
|
||||
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s);
|
||||
- s->async_bh = qemu_bh_new(dwc2_work_bh, s);
|
||||
+ s->async_bh = qemu_bh_new_guarded(dwc2_work_bh, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
|
||||
sysbus_init_irq(sbd, &s->irq);
|
||||
}
|
||||
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
|
||||
index d4da8dcb8d..c930c60921 100644
|
||||
--- a/hw/usb/hcd-ehci.c
|
||||
+++ b/hw/usb/hcd-ehci.c
|
||||
@@ -2533,7 +2533,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp)
|
||||
}
|
||||
|
||||
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s);
|
||||
- s->async_bh = qemu_bh_new(ehci_work_bh, s);
|
||||
+ s->async_bh = qemu_bh_new_guarded(ehci_work_bh, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
s->device = dev;
|
||||
|
||||
s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s);
|
||||
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
|
||||
index 8ac1175ad2..77baaa7a6b 100644
|
||||
--- a/hw/usb/hcd-uhci.c
|
||||
+++ b/hw/usb/hcd-uhci.c
|
||||
@@ -1190,7 +1190,7 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp)
|
||||
USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL);
|
||||
}
|
||||
}
|
||||
- s->bh = qemu_bh_new(uhci_bh, s);
|
||||
+ s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard);
|
||||
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s);
|
||||
s->num_ports_vmstate = NB_PORTS;
|
||||
QTAILQ_INIT(&s->queues);
|
||||
diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c
|
||||
index 176868d345..f500db85ab 100644
|
||||
--- a/hw/usb/host-libusb.c
|
||||
+++ b/hw/usb/host-libusb.c
|
||||
@@ -1141,7 +1141,8 @@ static void usb_host_nodev_bh(void *opaque)
|
||||
static void usb_host_nodev(USBHostDevice *s)
|
||||
{
|
||||
if (!s->bh_nodev) {
|
||||
- s->bh_nodev = qemu_bh_new(usb_host_nodev_bh, s);
|
||||
+ s->bh_nodev = qemu_bh_new_guarded(usb_host_nodev_bh, s,
|
||||
+ &DEVICE(s)->mem_reentrancy_guard);
|
||||
}
|
||||
qemu_bh_schedule(s->bh_nodev);
|
||||
}
|
||||
@@ -1739,7 +1740,8 @@ static int usb_host_post_load(void *opaque, int version_id)
|
||||
USBHostDevice *dev = opaque;
|
||||
|
||||
if (!dev->bh_postld) {
|
||||
- dev->bh_postld = qemu_bh_new(usb_host_post_load_bh, dev);
|
||||
+ dev->bh_postld = qemu_bh_new_guarded(usb_host_post_load_bh, dev,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
}
|
||||
qemu_bh_schedule(dev->bh_postld);
|
||||
dev->bh_postld_pending = true;
|
||||
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
|
||||
index fd7df599bc..39fbaaab16 100644
|
||||
--- a/hw/usb/redirect.c
|
||||
+++ b/hw/usb/redirect.c
|
||||
@@ -1441,8 +1441,10 @@ static void usbredir_realize(USBDevice *udev, Error **errp)
|
||||
}
|
||||
}
|
||||
|
||||
- dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev);
|
||||
- dev->device_reject_bh = qemu_bh_new(usbredir_device_reject_bh, dev);
|
||||
+ dev->chardev_close_bh = qemu_bh_new_guarded(usbredir_chardev_close_bh, dev,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
+ dev->device_reject_bh = qemu_bh_new_guarded(usbredir_device_reject_bh, dev,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
dev->attach_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, usbredir_do_attach, dev);
|
||||
|
||||
packet_id_queue_init(&dev->cancelled, dev, "cancelled");
|
||||
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
|
||||
index 66cb3f7c24..38ee660a30 100644
|
||||
--- a/hw/usb/xen-usb.c
|
||||
+++ b/hw/usb/xen-usb.c
|
||||
@@ -1032,7 +1032,8 @@ static void usbback_alloc(struct XenLegacyDevice *xendev)
|
||||
|
||||
QTAILQ_INIT(&usbif->req_free_q);
|
||||
QSIMPLEQ_INIT(&usbif->hotplug_q);
|
||||
- usbif->bh = qemu_bh_new(usbback_bh, usbif);
|
||||
+ usbif->bh = qemu_bh_new_guarded(usbback_bh, usbif,
|
||||
+ &DEVICE(xendev)->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static int usbback_free(struct XenLegacyDevice *xendev)
|
||||
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
|
||||
index 43092aa634..5186e831dd 100644
|
||||
--- a/hw/virtio/virtio-balloon.c
|
||||
+++ b/hw/virtio/virtio-balloon.c
|
||||
@@ -909,8 +909,9 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
|
||||
precopy_add_notifier(&s->free_page_hint_notify);
|
||||
|
||||
object_ref(OBJECT(s->iothread));
|
||||
- s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
|
||||
- virtio_ballloon_get_free_page_hints, s);
|
||||
+ s->free_page_bh = aio_bh_new_guarded(iothread_get_aio_context(s->iothread),
|
||||
+ virtio_ballloon_get_free_page_hints, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) {
|
||||
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
|
||||
index 802e1b9659..2fe804510f 100644
|
||||
--- a/hw/virtio/virtio-crypto.c
|
||||
+++ b/hw/virtio/virtio-crypto.c
|
||||
@@ -1074,7 +1074,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp)
|
||||
vcrypto->vqs[i].dataq =
|
||||
virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh);
|
||||
vcrypto->vqs[i].dataq_bh =
|
||||
- qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]);
|
||||
+ qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i],
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
vcrypto->vqs[i].vcrypto = vcrypto;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,141 @@
|
||||
From 8075a9e05699ef0c4e078017eefc20db3186328f Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Mon, 29 May 2023 14:21:08 -0400
|
||||
Subject: [PATCH 17/21] hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI
|
||||
controller (CVE-2023-0330)
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [13/13] 0b6fa742075ef2db3a354ee672dccca3747051cc (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit b987718bbb1d0eabf95499b976212dd5f0120d75
|
||||
Author: Thomas Huth <thuth@redhat.com>
|
||||
Date: Mon May 22 11:10:11 2023 +0200
|
||||
|
||||
hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI controller (CVE-2023-0330)
|
||||
|
||||
We cannot use the generic reentrancy guard in the LSI code, so
|
||||
we have to manually prevent endless reentrancy here. The problematic
|
||||
lsi_execute_script() function has already a way to detect whether
|
||||
too many instructions have been executed - we just have to slightly
|
||||
change the logic here that it also takes into account if the function
|
||||
has been called too often in a reentrant way.
|
||||
|
||||
The code in fuzz-lsi53c895a-test.c has been taken from an earlier
|
||||
patch by Mauro Matteo Cascella.
|
||||
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1563
|
||||
Message-Id: <20230522091011.1082574-1-thuth@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/scsi/lsi53c895a.c | 23 +++++++++++++++------
|
||||
tests/qtest/fuzz-lsi53c895a-test.c | 33 ++++++++++++++++++++++++++++++
|
||||
2 files changed, 50 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
|
||||
index 048436352b..f7d45b0b20 100644
|
||||
--- a/hw/scsi/lsi53c895a.c
|
||||
+++ b/hw/scsi/lsi53c895a.c
|
||||
@@ -1134,15 +1134,24 @@ static void lsi_execute_script(LSIState *s)
|
||||
uint32_t addr, addr_high;
|
||||
int opcode;
|
||||
int insn_processed = 0;
|
||||
+ static int reentrancy_level;
|
||||
+
|
||||
+ reentrancy_level++;
|
||||
|
||||
s->istat1 |= LSI_ISTAT1_SRUN;
|
||||
again:
|
||||
- if (++insn_processed > LSI_MAX_INSN) {
|
||||
- /* Some windows drivers make the device spin waiting for a memory
|
||||
- location to change. If we have been executed a lot of code then
|
||||
- assume this is the case and force an unexpected device disconnect.
|
||||
- This is apparently sufficient to beat the drivers into submission.
|
||||
- */
|
||||
+ /*
|
||||
+ * Some windows drivers make the device spin waiting for a memory location
|
||||
+ * to change. If we have executed more than LSI_MAX_INSN instructions then
|
||||
+ * assume this is the case and force an unexpected device disconnect. This
|
||||
+ * is apparently sufficient to beat the drivers into submission.
|
||||
+ *
|
||||
+ * Another issue (CVE-2023-0330) can occur if the script is programmed to
|
||||
+ * trigger itself again and again. Avoid this problem by stopping after
|
||||
+ * being called multiple times in a reentrant way (8 is an arbitrary value
|
||||
+ * which should be enough for all valid use cases).
|
||||
+ */
|
||||
+ if (++insn_processed > LSI_MAX_INSN || reentrancy_level > 8) {
|
||||
if (!(s->sien0 & LSI_SIST0_UDC)) {
|
||||
qemu_log_mask(LOG_GUEST_ERROR,
|
||||
"lsi_scsi: inf. loop with UDC masked");
|
||||
@@ -1596,6 +1605,8 @@ again:
|
||||
}
|
||||
}
|
||||
trace_lsi_execute_script_stop();
|
||||
+
|
||||
+ reentrancy_level--;
|
||||
}
|
||||
|
||||
static uint8_t lsi_reg_readb(LSIState *s, int offset)
|
||||
diff --git a/tests/qtest/fuzz-lsi53c895a-test.c b/tests/qtest/fuzz-lsi53c895a-test.c
|
||||
index 2012bd54b7..1b55928b9f 100644
|
||||
--- a/tests/qtest/fuzz-lsi53c895a-test.c
|
||||
+++ b/tests/qtest/fuzz-lsi53c895a-test.c
|
||||
@@ -8,6 +8,36 @@
|
||||
#include "qemu/osdep.h"
|
||||
#include "libqtest.h"
|
||||
|
||||
+/*
|
||||
+ * This used to trigger a DMA reentrancy issue
|
||||
+ * leading to memory corruption bugs like stack
|
||||
+ * overflow or use-after-free
|
||||
+ * https://gitlab.com/qemu-project/qemu/-/issues/1563
|
||||
+ */
|
||||
+static void test_lsi_dma_reentrancy(void)
|
||||
+{
|
||||
+ QTestState *s;
|
||||
+
|
||||
+ s = qtest_init("-M q35 -m 512M -nodefaults "
|
||||
+ "-blockdev driver=null-co,node-name=null0 "
|
||||
+ "-device lsi53c810 -device scsi-cd,drive=null0");
|
||||
+
|
||||
+ qtest_outl(s, 0xcf8, 0x80000804); /* PCI Command Register */
|
||||
+ qtest_outw(s, 0xcfc, 0x7); /* Enables accesses */
|
||||
+ qtest_outl(s, 0xcf8, 0x80000814); /* Memory Bar 1 */
|
||||
+ qtest_outl(s, 0xcfc, 0xff100000); /* Set MMIO Address*/
|
||||
+ qtest_outl(s, 0xcf8, 0x80000818); /* Memory Bar 2 */
|
||||
+ qtest_outl(s, 0xcfc, 0xff000000); /* Set RAM Address*/
|
||||
+ qtest_writel(s, 0xff000000, 0xc0000024);
|
||||
+ qtest_writel(s, 0xff000114, 0x00000080);
|
||||
+ qtest_writel(s, 0xff00012c, 0xff000000);
|
||||
+ qtest_writel(s, 0xff000004, 0xff000114);
|
||||
+ qtest_writel(s, 0xff000008, 0xff100014);
|
||||
+ qtest_writel(s, 0xff10002f, 0x000000ff);
|
||||
+
|
||||
+ qtest_quit(s);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* This used to trigger a UAF in lsi_do_msgout()
|
||||
* https://gitlab.com/qemu-project/qemu/-/issues/972
|
||||
@@ -124,5 +154,8 @@ int main(int argc, char **argv)
|
||||
qtest_add_func("fuzz/lsi53c895a/lsi_do_msgout_cancel_req",
|
||||
test_lsi_do_msgout_cancel_req);
|
||||
|
||||
+ qtest_add_func("fuzz/lsi53c895a/lsi_dma_reentrancy",
|
||||
+ test_lsi_dma_reentrancy);
|
||||
+
|
||||
return g_test_run();
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,59 +0,0 @@
|
||||
From 8b0c5c6d356fd6cce9092727e20097b70e07bba9 Mon Sep 17 00:00:00 2001
|
||||
From: Julia Suvorova <jusual@redhat.com>
|
||||
Date: Thu, 23 Feb 2023 13:57:47 +0100
|
||||
Subject: [PATCH] hw/smbios: fix field corruption in type 4 table
|
||||
|
||||
RH-Author: Julia Suvorova <None>
|
||||
RH-MergeRequest: 156: hw/smbios: fix field corruption in type 4 table
|
||||
RH-Bugzilla: 2169904
|
||||
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Acked-by: Ani Sinha <None>
|
||||
RH-Commit: [1/1] ee6d9bb6dfa0fb2625915947072cb91a0926c4ec
|
||||
|
||||
Since table type 4 of SMBIOS version 2.6 is shorter than 3.0, the
|
||||
strings which follow immediately after the struct fields have been
|
||||
overwritten by unconditional filling of later fields such as core_count2.
|
||||
Make these fields dependent on the SMBIOS version.
|
||||
|
||||
Fixes: 05e27d74c7 ("hw/smbios: add core_count2 to smbios table type 4")
|
||||
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2169904
|
||||
|
||||
Signed-off-by: Julia Suvorova <jusual@redhat.com>
|
||||
Message-Id: <20230223125747.254914-1-jusual@redhat.com>
|
||||
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Reviewed-by: Ani Sinha <ani@anisinha.ca>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit 60d09b8dc7dd4256d664ad680795cb1327805b2b)
|
||||
---
|
||||
hw/smbios/smbios.c | 8 +++++---
|
||||
1 file changed, 5 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
|
||||
index c5ad69237e..2d2ece3edb 100644
|
||||
--- a/hw/smbios/smbios.c
|
||||
+++ b/hw/smbios/smbios.c
|
||||
@@ -752,14 +752,16 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance)
|
||||
t->core_count = (ms->smp.cores > 255) ? 0xFF : ms->smp.cores;
|
||||
t->core_enabled = t->core_count;
|
||||
|
||||
- t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores);
|
||||
-
|
||||
t->thread_count = (ms->smp.threads > 255) ? 0xFF : ms->smp.threads;
|
||||
- t->thread_count2 = cpu_to_le16(ms->smp.threads);
|
||||
|
||||
t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */
|
||||
t->processor_family2 = cpu_to_le16(0x01); /* Other */
|
||||
|
||||
+ if (tbl_len == SMBIOS_TYPE_4_LEN_V30) {
|
||||
+ t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores);
|
||||
+ t->thread_count2 = cpu_to_le16(ms->smp.threads);
|
||||
+ }
|
||||
+
|
||||
SMBIOS_BUILD_TABLE_POST;
|
||||
smbios_type4_count++;
|
||||
}
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,76 @@
|
||||
From fcd6219a95851d17fd8bde69d87e78c6533be990 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Wed, 12 Jul 2023 17:46:57 +0200
|
||||
Subject: [PATCH 24/37] hw/vfio/pci-quirks: Sanitize capability pointer
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 179: vfio: live migration support
|
||||
RH-Bugzilla: 2192818
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [22/28] cb080409c1912f4365f8e31cd23c914b48f91575 (clegoate/qemu-kvm-c9s)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/2192818
|
||||
|
||||
commit 0ddcb39c9357
|
||||
Author: Alex Williamson <alex.williamson@redhat.com>
|
||||
Date: Fri Jun 30 16:36:08 2023 -0600
|
||||
|
||||
hw/vfio/pci-quirks: Sanitize capability pointer
|
||||
|
||||
Coverity reports a tained scalar when traversing the capabilities
|
||||
chain (CID 1516589). In practice I've never seen a device with a
|
||||
chain so broken as to cause an issue, but it's also pretty easy to
|
||||
sanitize.
|
||||
|
||||
Fixes: f6b30c1984f7 ("hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques")
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
hw/vfio/pci-quirks.c | 10 ++++++++--
|
||||
1 file changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
|
||||
index 0ed2fcd531..f4ff836805 100644
|
||||
--- a/hw/vfio/pci-quirks.c
|
||||
+++ b/hw/vfio/pci-quirks.c
|
||||
@@ -1530,6 +1530,12 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = {
|
||||
.set = set_nv_gpudirect_clique_id,
|
||||
};
|
||||
|
||||
+static bool is_valid_std_cap_offset(uint8_t pos)
|
||||
+{
|
||||
+ return (pos >= PCI_STD_HEADER_SIZEOF &&
|
||||
+ pos <= (PCI_CFG_SPACE_SIZE - PCI_CAP_SIZEOF));
|
||||
+}
|
||||
+
|
||||
static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
{
|
||||
PCIDevice *pdev = &vdev->pdev;
|
||||
@@ -1563,7 +1569,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
*/
|
||||
ret = pread(vdev->vbasedev.fd, &tmp, 1,
|
||||
vdev->config_offset + PCI_CAPABILITY_LIST);
|
||||
- if (ret != 1 || !tmp) {
|
||||
+ if (ret != 1 || !is_valid_std_cap_offset(tmp)) {
|
||||
error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list");
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -1575,7 +1581,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
d4_conflict = true;
|
||||
}
|
||||
tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT];
|
||||
- } while (tmp);
|
||||
+ } while (is_valid_std_cap_offset(tmp));
|
||||
|
||||
if (!c8_conflict) {
|
||||
pos = 0xC8;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,110 @@
|
||||
From dd38230a0a375fb8427fa106ff79562e56c51b6c Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Wed, 12 Jul 2023 17:46:57 +0200
|
||||
Subject: [PATCH 18/37] hw/vfio/pci-quirks: Support alternate offset for
|
||||
GPUDirect Cliques
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 179: vfio: live migration support
|
||||
RH-Bugzilla: 2192818
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [16/28] 9befb7c9adaeb58e9d0b49686cf54b751c742832 (clegoate/qemu-kvm-c9s)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/2192818
|
||||
|
||||
commit f6b30c1984f7
|
||||
Author: Alex Williamson <alex.williamson@redhat.com>
|
||||
Date: Thu Jun 8 12:05:07 2023 -0600
|
||||
|
||||
hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques
|
||||
|
||||
NVIDIA Turing and newer GPUs implement the MSI-X capability at the offset
|
||||
previously reserved for use by hypervisors to implement the GPUDirect
|
||||
Cliques capability. A revised specification provides an alternate
|
||||
location. Add a config space walk to the quirk to check for conflicts,
|
||||
allowing us to fall back to the new location or generate an error at the
|
||||
quirk setup rather than when the real conflicting capability is added
|
||||
should there be no available location.
|
||||
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
hw/vfio/pci-quirks.c | 41 ++++++++++++++++++++++++++++++++++++++++-
|
||||
1 file changed, 40 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
|
||||
index f0147a050a..0ed2fcd531 100644
|
||||
--- a/hw/vfio/pci-quirks.c
|
||||
+++ b/hw/vfio/pci-quirks.c
|
||||
@@ -1490,6 +1490,9 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev)
|
||||
* +---------------------------------+---------------------------------+
|
||||
*
|
||||
* https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf
|
||||
+ *
|
||||
+ * Specification for Turning and later GPU architectures:
|
||||
+ * https://lists.gnu.org/archive/html/qemu-devel/2023-06/pdf142OR4O4c2.pdf
|
||||
*/
|
||||
static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v,
|
||||
const char *name, void *opaque,
|
||||
@@ -1530,7 +1533,9 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = {
|
||||
static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
{
|
||||
PCIDevice *pdev = &vdev->pdev;
|
||||
- int ret, pos = 0xC8;
|
||||
+ int ret, pos;
|
||||
+ bool c8_conflict = false, d4_conflict = false;
|
||||
+ uint8_t tmp;
|
||||
|
||||
if (vdev->nv_gpudirect_clique == 0xFF) {
|
||||
return 0;
|
||||
@@ -1547,6 +1552,40 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * Per the updated specification above, it's recommended to use offset
|
||||
+ * D4h for Turing and later GPU architectures due to a conflict of the
|
||||
+ * MSI-X capability at C8h. We don't know how to determine the GPU
|
||||
+ * architecture, instead we walk the capability chain to mark conflicts
|
||||
+ * and choose one or error based on the result.
|
||||
+ *
|
||||
+ * NB. Cap list head in pdev->config is already cleared, read from device.
|
||||
+ */
|
||||
+ ret = pread(vdev->vbasedev.fd, &tmp, 1,
|
||||
+ vdev->config_offset + PCI_CAPABILITY_LIST);
|
||||
+ if (ret != 1 || !tmp) {
|
||||
+ error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ do {
|
||||
+ if (tmp == 0xC8) {
|
||||
+ c8_conflict = true;
|
||||
+ } else if (tmp == 0xD4) {
|
||||
+ d4_conflict = true;
|
||||
+ }
|
||||
+ tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT];
|
||||
+ } while (tmp);
|
||||
+
|
||||
+ if (!c8_conflict) {
|
||||
+ pos = 0xC8;
|
||||
+ } else if (!d4_conflict) {
|
||||
+ pos = 0xD4;
|
||||
+ } else {
|
||||
+ error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid config space");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp);
|
||||
if (ret < 0) {
|
||||
error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: ");
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,62 @@
|
||||
From 0a731ac1191182546e80af5f39d178a5a2f3688f Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Mon, 17 Jul 2023 18:21:26 +0200
|
||||
Subject: [PATCH 07/14] hw/virtio-iommu: Fix potential OOB access in
|
||||
virtio_iommu_handle_command()
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes
|
||||
RH-Bugzilla: 2229133
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
RH-Commit: [1/3] ecdb1e1aa6b93761dc87ea79bc0a1093ad649a74 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133
|
||||
|
||||
In the virtio_iommu_handle_command() when a PROBE request is handled,
|
||||
output_size takes a value greater than the tail size and on a subsequent
|
||||
iteration we can get a stack out-of-band access. Initialize the
|
||||
output_size on each iteration.
|
||||
|
||||
The issue was found with ASAN. Credits to:
|
||||
Yiming Tao(Zhejiang University)
|
||||
Gaoning Pan(Zhejiang University)
|
||||
|
||||
Fixes: 1733eebb9e7 ("virtio-iommu: Implement RESV_MEM probe request")
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reported-by: Mauro Matteo Cascella <mcascell@redhat.com>
|
||||
Cc: qemu-stable@nongnu.org
|
||||
|
||||
Message-Id: <20230717162126.11693-1-eric.auger@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit cf2f89edf36a59183166ae8721a8d7ab5cd286bd)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/virtio/virtio-iommu.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
|
||||
index 421e2a944f..17ce630200 100644
|
||||
--- a/hw/virtio/virtio-iommu.c
|
||||
+++ b/hw/virtio/virtio-iommu.c
|
||||
@@ -728,13 +728,15 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
|
||||
VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
|
||||
struct virtio_iommu_req_head head;
|
||||
struct virtio_iommu_req_tail tail = {};
|
||||
- size_t output_size = sizeof(tail), sz;
|
||||
VirtQueueElement *elem;
|
||||
unsigned int iov_cnt;
|
||||
struct iovec *iov;
|
||||
void *buf = NULL;
|
||||
+ size_t sz;
|
||||
|
||||
for (;;) {
|
||||
+ size_t output_size = sizeof(tail);
|
||||
+
|
||||
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
|
||||
if (!elem) {
|
||||
return;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,52 @@
|
||||
From f9d982fae156aa9db0506e1e098c1e8a7f7eec94 Mon Sep 17 00:00:00 2001
|
||||
From: Bandan Das <bsd@redhat.com>
|
||||
Date: Thu, 3 Aug 2023 14:29:15 -0400
|
||||
Subject: [PATCH 13/14] i386/cpu: Update how the EBX register of CPUID
|
||||
0x8000001F is set
|
||||
|
||||
RH-Author: Bandan Das <None>
|
||||
RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter
|
||||
RH-Bugzilla: 2214839
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [4/4] efc368b2c844fd4fbc3c755a5e2da288329e7a2c (bdas1/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839
|
||||
|
||||
commit fb6bbafc0f19385fb257ee073ed13dcaf613f2f8
|
||||
Author: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Date: Fri Sep 30 10:14:30 2022 -0500
|
||||
|
||||
i386/cpu: Update how the EBX register of CPUID 0x8000001F is set
|
||||
|
||||
Update the setting of CPUID 0x8000001F EBX to clearly document the ranges
|
||||
associated with fields being set.
|
||||
|
||||
Fixes: 6cb8f2a663 ("cpu/i386: populate CPUID 0x8000_001F when SEV is active")
|
||||
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Message-Id: <5822fd7d02b575121380e1f493a8f6d9eba2b11a.1664550870.git.thomas.lendacky@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Bandan Das <bsd@redhat.com>
|
||||
---
|
||||
target/i386/cpu.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
|
||||
index 839706b430..4ac3046313 100644
|
||||
--- a/target/i386/cpu.c
|
||||
+++ b/target/i386/cpu.c
|
||||
@@ -6008,8 +6008,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
|
||||
if (sev_enabled()) {
|
||||
*eax = 0x2;
|
||||
*eax |= sev_es_enabled() ? 0x8 : 0;
|
||||
- *ebx = sev_get_cbit_position();
|
||||
- *ebx |= sev_get_reduced_phys_bits() << 6;
|
||||
+ *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */
|
||||
+ *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */
|
||||
}
|
||||
break;
|
||||
default:
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,77 @@
|
||||
From 5c0d254762caaffd574bd95dbfc1df416e6e2509 Mon Sep 17 00:00:00 2001
|
||||
From: Bandan Das <bsd@redhat.com>
|
||||
Date: Thu, 3 Aug 2023 14:22:55 -0400
|
||||
Subject: [PATCH 12/14] i386/sev: Update checks and information related to
|
||||
reduced-phys-bits
|
||||
|
||||
RH-Author: Bandan Das <None>
|
||||
RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter
|
||||
RH-Bugzilla: 2214839
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [3/4] 7c5e7ea9f6cd39e84e5b60417c849430296399fd (bdas1/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839
|
||||
|
||||
commit 8168fed9f84e3128f7628969ae78af49433d5ce7
|
||||
Author: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Date: Fri Sep 30 10:14:29 2022 -0500
|
||||
|
||||
i386/sev: Update checks and information related to reduced-phys-bits
|
||||
|
||||
The value of the reduced-phys-bits parameter is propogated to the CPUID
|
||||
information exposed to the guest. Update the current validation check to
|
||||
account for the size of the CPUID field (6-bits), ensuring the value is
|
||||
in the range of 1 to 63.
|
||||
|
||||
Maintain backward compatibility, to an extent, by allowing a value greater
|
||||
than 1 (so that the previously documented value of 5 still works), but not
|
||||
allowing anything over 63.
|
||||
|
||||
Fixes: d8575c6c02 ("sev/i386: add command to initialize the memory encryption context")
|
||||
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Message-Id: <cca5341a95ac73f904e6300f10b04f9c62e4e8ff.1664550870.git.thomas.lendacky@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Bandan Das <bsd@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 17 ++++++++++++++---
|
||||
1 file changed, 14 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 859e06f6ad..fe2144c038 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -932,15 +932,26 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL);
|
||||
host_cbitpos = ebx & 0x3f;
|
||||
|
||||
+ /*
|
||||
+ * The cbitpos value will be placed in bit positions 5:0 of the EBX
|
||||
+ * register of CPUID 0x8000001F. No need to verify the range as the
|
||||
+ * comparison against the host value accomplishes that.
|
||||
+ */
|
||||
if (host_cbitpos != sev->cbitpos) {
|
||||
error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'",
|
||||
__func__, host_cbitpos, sev->cbitpos);
|
||||
goto err;
|
||||
}
|
||||
|
||||
- if (sev->reduced_phys_bits < 1) {
|
||||
- error_setg(errp, "%s: reduced_phys_bits check failed, it should be >=1,"
|
||||
- " requested '%d'", __func__, sev->reduced_phys_bits);
|
||||
+ /*
|
||||
+ * The reduced-phys-bits value will be placed in bit positions 11:6 of
|
||||
+ * the EBX register of CPUID 0x8000001F, so verify the supplied value
|
||||
+ * is in the range of 1 to 63.
|
||||
+ */
|
||||
+ if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) {
|
||||
+ error_setg(errp, "%s: reduced_phys_bits check failed,"
|
||||
+ " it should be in the range of 1 to 63, requested '%d'",
|
||||
+ __func__, sev->reduced_phys_bits);
|
||||
goto err;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,64 +0,0 @@
|
||||
From cadcc1c6a001622d971c86d44925516905e3d104 Mon Sep 17 00:00:00 2001
|
||||
From: Jason Wang <jasowang@redhat.com>
|
||||
Date: Thu, 23 Feb 2023 14:59:21 +0800
|
||||
Subject: [PATCH 8/8] intel-iommu: fail DEVIOTLB_UNMAP without dt mode
|
||||
|
||||
RH-Author: Laurent Vivier <lvivier@redhat.com>
|
||||
RH-MergeRequest: 157: intel-iommu: fail DEVIOTLB_UNMAP without dt mode
|
||||
RH-Bugzilla: 2156876
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Commit: [1/1] eb9dbae6140ef4ba10d90b9e66abd75540f6892d (lvivier/qemu-kvm-centos)
|
||||
|
||||
Without dt mode, device IOTLB notifier won't work since guest won't
|
||||
send device IOTLB invalidation descriptor in this case. Let's fail
|
||||
early instead of misbehaving silently.
|
||||
|
||||
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
|
||||
Tested-by: Laurent Vivier <lvivier@redhat.com>
|
||||
Tested-by: Viktor Prutyanov <viktor@daynix.com>
|
||||
Buglink: https://bugzilla.redhat.com/2156876
|
||||
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
||||
Message-Id: <20230223065924.42503-3-jasowang@redhat.com>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit 09adb0e021207b60a0c51a68939b4539d98d3ef3)
|
||||
|
||||
Conflict in hw/i386/intel_iommu.c because of missing commit:
|
||||
|
||||
4ce27463ccce ("intel-iommu: fail MAP notifier without caching mode")
|
||||
---
|
||||
hw/i386/intel_iommu.c | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
|
||||
index a08ee85edf..d2983f40d3 100644
|
||||
--- a/hw/i386/intel_iommu.c
|
||||
+++ b/hw/i386/intel_iommu.c
|
||||
@@ -3179,6 +3179,7 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
|
||||
{
|
||||
VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
|
||||
IntelIOMMUState *s = vtd_as->iommu_state;
|
||||
+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
|
||||
|
||||
/* TODO: add support for VFIO and vhost users */
|
||||
if (s->snoop_control) {
|
||||
@@ -3186,6 +3187,13 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
|
||||
"Snoop Control with vhost or VFIO is not supported");
|
||||
return -ENOTSUP;
|
||||
}
|
||||
+ if (!x86_iommu->dt_supported && (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP)) {
|
||||
+ error_setg_errno(errp, ENOTSUP,
|
||||
+ "device %02x.%02x.%x requires device IOTLB mode",
|
||||
+ pci_bus_num(vtd_as->bus), PCI_SLOT(vtd_as->devfn),
|
||||
+ PCI_FUNC(vtd_as->devfn));
|
||||
+ return -ENOTSUP;
|
||||
+ }
|
||||
|
||||
/* Update per-address-space notifier flags */
|
||||
vtd_as->notifier_flags = new;
|
||||
--
|
||||
2.39.1
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue