parent
13c765192a
commit
a3945c4644
@ -1 +1 @@
|
||||
SOURCES/qemu-7.0.0.tar.xz
|
||||
SOURCES/qemu-7.2.0.tar.xz
|
||||
|
@ -1 +1 @@
|
||||
c3fd2403106c33d0470bc9ba4fb4b946c0402248 SOURCES/qemu-7.0.0.tar.xz
|
||||
634a3e4b381cbf13085eb1568accb85cbd9d89c4 SOURCES/qemu-7.2.0.tar.xz
|
||||
|
@ -1,45 +0,0 @@
|
||||
From 9ebfd2f6cfa8e79c92e58fd169f90cc768fb865a Mon Sep 17 00:00:00 2001
|
||||
From: Andrew Jones <drjones@redhat.com>
|
||||
Date: Tue, 21 Jan 2014 10:46:52 +0100
|
||||
Subject: globally limit the maximum number of CPUs
|
||||
|
||||
We now globally limit the number of VCPUs.
|
||||
Especially, there is no way one can specify more than
|
||||
max_cpus VCPUs for a VM.
|
||||
|
||||
This allows us the restore the ppc max_cpus limitation to the upstream
|
||||
default and minimize the ppc hack in kvm-all.c.
|
||||
|
||||
Signed-off-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
Signed-off-by: Danilo Cesar Lemes de Paula <ddepaula@redhat.com>
|
||||
---
|
||||
accel/kvm/kvm-all.c | 12 ++++++++++++
|
||||
1 file changed, 12 insertions(+)
|
||||
|
||||
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||
index 5f1377ca04..fdf0e4d429 100644
|
||||
--- a/accel/kvm/kvm-all.c
|
||||
+++ b/accel/kvm/kvm-all.c
|
||||
@@ -2430,6 +2430,18 @@ static int kvm_init(MachineState *ms)
|
||||
soft_vcpus_limit = kvm_recommended_vcpus(s);
|
||||
hard_vcpus_limit = kvm_max_vcpus(s);
|
||||
|
||||
+#ifdef HOST_PPC64
|
||||
+ /*
|
||||
+ * On POWER, the kernel advertises a soft limit based on the
|
||||
+ * number of CPU threads on the host. We want to allow exceeding
|
||||
+ * this for testing purposes, so we don't want to set hard limit
|
||||
+ * to soft limit as on x86.
|
||||
+ */
|
||||
+#else
|
||||
+ /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */
|
||||
+ hard_vcpus_limit = soft_vcpus_limit;
|
||||
+#endif
|
||||
+
|
||||
while (nc->name) {
|
||||
if (nc->num > soft_vcpus_limit) {
|
||||
warn_report("Number of %s cpus requested (%d) exceeds "
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,66 +0,0 @@
|
||||
From b72e04cb7e417d9e1c973223747ab3a27abda8b4 Mon Sep 17 00:00:00 2001
|
||||
From: Fam Zheng <famz@redhat.com>
|
||||
Date: Wed, 14 Jun 2017 15:37:01 +0200
|
||||
Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only]
|
||||
|
||||
RH-Author: Fam Zheng <famz@redhat.com>
|
||||
Message-id: <20170614153701.14757-1-famz@redhat.com>
|
||||
Patchwork-id: 75613
|
||||
O-Subject: [RHV-7.4 qemu-kvm-rhev PATCH v3] virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only]
|
||||
Bugzilla: 1378816
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Max Reitz <mreitz@redhat.com>
|
||||
|
||||
We need a fix for RHEL 7.4 and 7.3.z, but unfortunately upstream isn't
|
||||
ready. If it were, the changes will be too invasive. To have an idea:
|
||||
|
||||
https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg05400.html
|
||||
|
||||
is an incomplete attempt to fix part of the issue, and the remaining
|
||||
work unfortunately involve even more complex changes.
|
||||
|
||||
As a band-aid, this partially reverts the effect of ef8875b
|
||||
(virtio-scsi: Remove op blocker for dataplane, since v2.7). We cannot
|
||||
simply revert that commit as a whole because we already shipped it in
|
||||
qemu-kvm-rhev 7.3, since when, block jobs has been possible. We should
|
||||
only block what has been broken. Also, faithfully reverting the above
|
||||
commit means adding back the removed op blocker, but that is not enough,
|
||||
because it still crashes when inserting media into an initially empty
|
||||
scsi-cd.
|
||||
|
||||
All in all, scsi-cd on virtio-scsi-dataplane has basically been unusable
|
||||
unless the scsi-cd never enters an empty state, so, disable it
|
||||
altogether. Otherwise it would be much more difficult to avoid
|
||||
crashing.
|
||||
|
||||
Signed-off-by: Fam Zheng <famz@redhat.com>
|
||||
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
|
||||
---
|
||||
hw/scsi/virtio-scsi.c | 9 +++++++++
|
||||
1 file changed, 9 insertions(+)
|
||||
|
||||
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
|
||||
index 34a968ecfb..7f6da33a8a 100644
|
||||
--- a/hw/scsi/virtio-scsi.c
|
||||
+++ b/hw/scsi/virtio-scsi.c
|
||||
@@ -896,6 +896,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
||||
AioContext *old_context;
|
||||
int ret;
|
||||
|
||||
+ /* XXX: Remove this check once block backend is capable of handling
|
||||
+ * AioContext change upon eject/insert.
|
||||
+ * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if
|
||||
+ * data plane is not used, both cases are safe for scsi-cd. */
|
||||
+ if (s->ctx && s->ctx != qemu_get_aio_context() &&
|
||||
+ object_dynamic_cast(OBJECT(dev), "scsi-cd")) {
|
||||
+ error_setg(errp, "scsi-cd is not supported by data plane");
|
||||
+ return;
|
||||
+ }
|
||||
if (s->ctx && !s->dataplane_fenced) {
|
||||
if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) {
|
||||
return;
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,26 @@
|
||||
From 21ed34787b9492c2cfe3d8fc12a32748bcf02307 Mon Sep 17 00:00:00 2001
|
||||
From: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
Date: Wed, 9 Nov 2022 07:08:32 -0500
|
||||
Subject: Addd 7.2 compat bits for RHEL 9.1 machine type
|
||||
|
||||
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
---
|
||||
hw/core/machine.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 9edec1ca05..3d851d34da 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -54,6 +54,8 @@ GlobalProperty hw_compat_rhel_9_1[] = {
|
||||
{ "arm-gicv3-common", "force-8-bit-prio", "on" },
|
||||
/* hw_compat_rhel_9_1 from hw_compat_7_0 */
|
||||
{ "nvme-ns", "eui64-default", "on"},
|
||||
+ /* hw_compat_rhel_9_1 from hw_compat_7_1 */
|
||||
+ { "virtio-device", "queue_reset", "false" },
|
||||
};
|
||||
const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1);
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,135 +0,0 @@
|
||||
From 1d6439527aa6ccabb58208c94417778ccc19de39 Mon Sep 17 00:00:00 2001
|
||||
From: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
Date: Wed, 9 Feb 2022 04:16:25 -0500
|
||||
Subject: WRB: Introduce RHEL 9.0.0 hw compat structure
|
||||
|
||||
General compatibility structure for post RHEL 9.0.0 rebase.
|
||||
|
||||
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
---
|
||||
hw/core/machine.c | 9 +++++++++
|
||||
hw/i386/pc.c | 6 ++++++
|
||||
hw/i386/pc_piix.c | 4 ++++
|
||||
hw/i386/pc_q35.c | 4 ++++
|
||||
hw/s390x/s390-virtio-ccw.c | 2 ++
|
||||
include/hw/boards.h | 3 +++
|
||||
include/hw/i386/pc.h | 3 +++
|
||||
7 files changed, 31 insertions(+)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 28989b6e7b..dffc3ef4ab 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -53,6 +53,15 @@ GlobalProperty hw_compat_rhel_8_6[] = {
|
||||
};
|
||||
const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6);
|
||||
|
||||
+/*
|
||||
+ * Mostly the same as hw_compat_6_2
|
||||
+ */
|
||||
+GlobalProperty hw_compat_rhel_9_0[] = {
|
||||
+ /* hw_compat_rhel_9_0 from hw_compat_6_2 */
|
||||
+ { "PIIX4_PM", "x-not-migrate-acpi-index", "on"},
|
||||
+};
|
||||
+const size_t hw_compat_rhel_9_0_len = G_N_ELEMENTS(hw_compat_rhel_9_0);
|
||||
+
|
||||
/*
|
||||
* Mostly the same as hw_compat_6_0 and hw_compat_6_1
|
||||
*/
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index 263d882af6..0886cfe3fe 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -391,6 +391,12 @@ GlobalProperty pc_rhel_compat[] = {
|
||||
};
|
||||
const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat);
|
||||
|
||||
+GlobalProperty pc_rhel_9_0_compat[] = {
|
||||
+ /* pc_rhel_9_0_compat from pc_compat_6_2 */
|
||||
+ { "virtio-mem", "unplugged-inaccessible", "off" },
|
||||
+};
|
||||
+const size_t pc_rhel_9_0_compat_len = G_N_ELEMENTS(pc_rhel_9_0_compat);
|
||||
+
|
||||
GlobalProperty pc_rhel_8_5_compat[] = {
|
||||
/* pc_rhel_8_5_compat from pc_compat_6_0 */
|
||||
{ "qemu64" "-" TYPE_X86_CPU, "family", "6" },
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index 0cacc0d623..dc987fe93b 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -1014,6 +1014,10 @@ static void pc_machine_rhel760_options(MachineClass *m)
|
||||
pcmc->kvmclock_create_always = false;
|
||||
/* From pc_i440fx_5_1_machine_options() */
|
||||
pcmc->pci_root_uid = 1;
|
||||
+ compat_props_add(m->compat_props, hw_compat_rhel_9_0,
|
||||
+ hw_compat_rhel_9_0_len);
|
||||
+ compat_props_add(m->compat_props, pc_rhel_9_0_compat,
|
||||
+ pc_rhel_9_0_compat_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_8_6,
|
||||
hw_compat_rhel_8_6_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_8_5,
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index 157160e069..52c253c570 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -669,6 +669,10 @@ static void pc_q35_machine_rhel900_options(MachineClass *m)
|
||||
m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)";
|
||||
pcmc->smbios_stream_product = "RHEL";
|
||||
pcmc->smbios_stream_version = "9.0.0";
|
||||
+ compat_props_add(m->compat_props, hw_compat_rhel_9_0,
|
||||
+ hw_compat_rhel_9_0_len);
|
||||
+ compat_props_add(m->compat_props, pc_rhel_9_0_compat,
|
||||
+ pc_rhel_9_0_compat_len);
|
||||
}
|
||||
|
||||
DEFINE_PC_MACHINE(q35_rhel900, "pc-q35-rhel9.0.0", pc_q35_init_rhel900,
|
||||
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
||||
index 465a2a09d2..08e0f6a79b 100644
|
||||
--- a/hw/s390x/s390-virtio-ccw.c
|
||||
+++ b/hw/s390x/s390-virtio-ccw.c
|
||||
@@ -1118,12 +1118,14 @@ static void ccw_machine_2_4_class_options(MachineClass *mc)
|
||||
DEFINE_CCW_MACHINE(2_4, "2.4", false);
|
||||
#endif
|
||||
|
||||
+
|
||||
static void ccw_machine_rhel900_instance_options(MachineState *machine)
|
||||
{
|
||||
}
|
||||
|
||||
static void ccw_machine_rhel900_class_options(MachineClass *mc)
|
||||
{
|
||||
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len);
|
||||
}
|
||||
DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true);
|
||||
|
||||
diff --git a/include/hw/boards.h b/include/hw/boards.h
|
||||
index d1555665df..635e45dd71 100644
|
||||
--- a/include/hw/boards.h
|
||||
+++ b/include/hw/boards.h
|
||||
@@ -451,6 +451,9 @@ extern const size_t hw_compat_2_2_len;
|
||||
extern GlobalProperty hw_compat_2_1[];
|
||||
extern const size_t hw_compat_2_1_len;
|
||||
|
||||
+extern GlobalProperty hw_compat_rhel_9_0[];
|
||||
+extern const size_t hw_compat_rhel_9_0_len;
|
||||
+
|
||||
extern GlobalProperty hw_compat_rhel_8_6[];
|
||||
extern const size_t hw_compat_rhel_8_6_len;
|
||||
|
||||
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
|
||||
index 419a6ec24b..a492c420b5 100644
|
||||
--- a/include/hw/i386/pc.h
|
||||
+++ b/include/hw/i386/pc.h
|
||||
@@ -292,6 +292,9 @@ extern const size_t pc_compat_1_4_len;
|
||||
extern GlobalProperty pc_rhel_compat[];
|
||||
extern const size_t pc_rhel_compat_len;
|
||||
|
||||
+extern GlobalProperty pc_rhel_9_0_compat[];
|
||||
+extern const size_t pc_rhel_9_0_compat_len;
|
||||
+
|
||||
extern GlobalProperty pc_rhel_8_5_compat[];
|
||||
extern const size_t pc_rhel_8_5_compat_len;
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,47 @@
|
||||
From 27c188c6a4cbd908269cf06affd24025708ecb5c Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Thu, 17 Nov 2022 16:47:16 +0100
|
||||
Subject: redhat: Update s390x machine type compatibility for QEMU 7.2.0 update
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2143585
|
||||
Upstream Status: n/a (rhel-only)
|
||||
|
||||
Add the compatibility handling for the rebase from QEMU 7.1 to 7.2,
|
||||
i.e. the settings from ccw_machine_7_1_class_options() and
|
||||
ccw_machine_7_1_instance_options() to the rhel9.1.0 machine type
|
||||
(earlier settings have been added by previous rebases already).
|
||||
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
hw/s390x/s390-virtio-ccw.c | 9 +++++++++
|
||||
1 file changed, 9 insertions(+)
|
||||
|
||||
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
||||
index ba640e3d9e..97e868ada0 100644
|
||||
--- a/hw/s390x/s390-virtio-ccw.c
|
||||
+++ b/hw/s390x/s390-virtio-ccw.c
|
||||
@@ -1195,12 +1195,21 @@ static void ccw_machine_rhel900_instance_options(MachineState *machine)
|
||||
static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 };
|
||||
|
||||
s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat);
|
||||
+ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE);
|
||||
}
|
||||
|
||||
static void ccw_machine_rhel900_class_options(MachineClass *mc)
|
||||
{
|
||||
+ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc);
|
||||
+ static GlobalProperty compat[] = {
|
||||
+ { TYPE_S390_PCI_DEVICE, "interpret", "off", },
|
||||
+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", },
|
||||
+ };
|
||||
+
|
||||
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len);
|
||||
+ s390mc->max_threads = S390_MAX_CPUS;
|
||||
}
|
||||
DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true);
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,38 +0,0 @@
|
||||
From c8ad21ca31892f8798cf82508c2b2c61bf3b9895 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Mon, 4 Apr 2022 12:15:50 +0200
|
||||
Subject: redhat: Update s390x machine type compatibility for rebase to QEMU
|
||||
7.0.0
|
||||
|
||||
RH-Author: Thomas Huth <thuth@redhat.com>
|
||||
RH-MergeRequest: 143: Update machine type compatibility for QEMU 7.0.0 update [s390x]
|
||||
RH-Commit: [23/23] 0ecf97d7bdddc50565b5779c64744b353f715cbd
|
||||
RH-Bugzilla: 2064782
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
|
||||
No s390x-specific machine class property updates required this time,
|
||||
only an update to the default qemu cpu model.
|
||||
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
hw/s390x/s390-virtio-ccw.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
||||
index 08e0f6a79b..4a491d4988 100644
|
||||
--- a/hw/s390x/s390-virtio-ccw.c
|
||||
+++ b/hw/s390x/s390-virtio-ccw.c
|
||||
@@ -1121,6 +1121,9 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false);
|
||||
|
||||
static void ccw_machine_rhel900_instance_options(MachineState *machine)
|
||||
{
|
||||
+ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 };
|
||||
+
|
||||
+ s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat);
|
||||
}
|
||||
|
||||
static void ccw_machine_rhel900_class_options(MachineClass *mc)
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,43 @@
|
||||
From c1a21266d8bed27f1ef1f705818fde5f9350b73f Mon Sep 17 00:00:00 2001
|
||||
From: Cornelia Huck <cohuck@redhat.com>
|
||||
Date: Wed, 23 Nov 2022 14:15:37 +0100
|
||||
Subject: redhat: aarch64: add rhel9.2.0 virt machine type
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2131982
|
||||
Upstream: RHEL only
|
||||
|
||||
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 9 ++++++++-
|
||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index dfcab40a73..0a94f31dd1 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3518,14 +3518,21 @@ static void rhel_machine_init(void)
|
||||
}
|
||||
type_init(rhel_machine_init);
|
||||
|
||||
+static void rhel920_virt_options(MachineClass *mc)
|
||||
+{
|
||||
+}
|
||||
+DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0)
|
||||
+
|
||||
static void rhel900_virt_options(MachineClass *mc)
|
||||
{
|
||||
VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
|
||||
|
||||
+ rhel920_virt_options(mc);
|
||||
+
|
||||
compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
|
||||
|
||||
/* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */
|
||||
vmc->no_tcg_lpa2 = true;
|
||||
}
|
||||
-DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0)
|
||||
+DEFINE_RHEL_MACHINE(9, 0, 0)
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,70 +0,0 @@
|
||||
From 38b89dc24551258b630f09d1c654b6c72b265c79 Mon Sep 17 00:00:00 2001
|
||||
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||
Date: Thu, 14 Apr 2022 14:58:43 +0100
|
||||
Subject: pc: Move s3/s4 suspend disabling to compat
|
||||
|
||||
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
RH-MergeRequest: 155: 7.0 machine type fixes (x86)
|
||||
RH-Commit: [26/26] 7d666032d5f5dab1444ebba085f92f2de4e86699
|
||||
RH-Bugzilla: 2064771
|
||||
|
||||
Our downstream patches currently have tweaks in the C code to disable
|
||||
s3/s4; Thomas pointed out we can just set the property.
|
||||
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
hw/acpi/ich9.c | 4 ++--
|
||||
hw/acpi/piix4.c | 4 ++--
|
||||
hw/i386/pc.c | 6 ++++++
|
||||
3 files changed, 10 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
|
||||
index de1e401cdf..bd9bbade70 100644
|
||||
--- a/hw/acpi/ich9.c
|
||||
+++ b/hw/acpi/ich9.c
|
||||
@@ -435,8 +435,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm)
|
||||
static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN;
|
||||
pm->acpi_memory_hotplug.is_enabled = true;
|
||||
pm->cpu_hotplug_legacy = true;
|
||||
- pm->disable_s3 = 1;
|
||||
- pm->disable_s4 = 1;
|
||||
+ pm->disable_s3 = 0;
|
||||
+ pm->disable_s4 = 0;
|
||||
pm->s4_val = 2;
|
||||
pm->use_acpi_hotplug_bridge = true;
|
||||
pm->keep_pci_slot_hpc = true;
|
||||
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
|
||||
index 28544e78c3..2fb2b43248 100644
|
||||
--- a/hw/acpi/piix4.c
|
||||
+++ b/hw/acpi/piix4.c
|
||||
@@ -653,8 +653,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev)
|
||||
|
||||
static Property piix4_pm_properties[] = {
|
||||
DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0),
|
||||
- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1),
|
||||
- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1),
|
||||
+ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0),
|
||||
+ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0),
|
||||
DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2),
|
||||
DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState,
|
||||
use_acpi_hotplug_bridge, true),
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index 0886cfe3fe..f98f842f80 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -380,6 +380,12 @@ const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4);
|
||||
* machine type.
|
||||
*/
|
||||
GlobalProperty pc_rhel_compat[] = {
|
||||
+ /* we don't support s3/s4 suspend */
|
||||
+ { "PIIX4_PM", "disable_s3", "1" },
|
||||
+ { "PIIX4_PM", "disable_s4", "1" },
|
||||
+ { "ICH9-LPC", "disable_s3", "1" },
|
||||
+ { "ICH9-LPC", "disable_s4", "1" },
|
||||
+
|
||||
{ TYPE_X86_CPU, "host-phys-bits", "on" },
|
||||
{ TYPE_X86_CPU, "host-phys-bits-limit", "48" },
|
||||
{ TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" },
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,62 @@
|
||||
From a932b8d4296066be01613ada84241b501488f99f Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Thu, 17 Nov 2022 17:03:24 +0100
|
||||
Subject: redhat: Add new rhel-9.2.0 s390x machine type
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2136473
|
||||
Upstream Status: n/a (rhel-only)
|
||||
|
||||
RHEL 9.2 will be an EUS release - we want to have a new machine
|
||||
type here to make sure that we have a spot where we can wire up
|
||||
fixes later.
|
||||
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
hw/s390x/s390-virtio-ccw.c | 15 ++++++++++++++-
|
||||
1 file changed, 14 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
||||
index 97e868ada0..aa142a1a4e 100644
|
||||
--- a/hw/s390x/s390-virtio-ccw.c
|
||||
+++ b/hw/s390x/s390-virtio-ccw.c
|
||||
@@ -1190,10 +1190,21 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false);
|
||||
#endif
|
||||
|
||||
|
||||
+static void ccw_machine_rhel920_instance_options(MachineState *machine)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+static void ccw_machine_rhel920_class_options(MachineClass *mc)
|
||||
+{
|
||||
+}
|
||||
+DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true);
|
||||
+
|
||||
static void ccw_machine_rhel900_instance_options(MachineState *machine)
|
||||
{
|
||||
static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 };
|
||||
|
||||
+ ccw_machine_rhel920_instance_options(machine);
|
||||
+
|
||||
s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat);
|
||||
s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE);
|
||||
}
|
||||
@@ -1206,12 +1217,14 @@ static void ccw_machine_rhel900_class_options(MachineClass *mc)
|
||||
{ TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", },
|
||||
};
|
||||
|
||||
+ ccw_machine_rhel920_class_options(mc);
|
||||
+
|
||||
compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len);
|
||||
s390mc->max_threads = S390_MAX_CPUS;
|
||||
}
|
||||
-DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true);
|
||||
+DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", false);
|
||||
|
||||
static void ccw_machine_rhel860_instance_options(MachineState *machine)
|
||||
{
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,75 @@
|
||||
From f33ca8aed4744238230f1f2cc47df77aa4c9e0ac Mon Sep 17 00:00:00 2001
|
||||
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||
Date: Thu, 17 Nov 2022 12:36:30 +0000
|
||||
Subject: x86: rhel 9.2.0 machine type
|
||||
|
||||
Add a 9.2.0 x86 machine type, and fix up the compatibility
|
||||
for 9.0.0 and older.
|
||||
|
||||
pc_compat_7_1 and pc_compat_7_0 are both empty upstream so there's
|
||||
nothing to do there.
|
||||
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
hw/i386/pc_piix.c | 1 +
|
||||
hw/i386/pc_q35.c | 21 ++++++++++++++++++++-
|
||||
2 files changed, 21 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index 173a1fd10b..fc06877344 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -944,6 +944,7 @@ static void pc_machine_rhel760_options(MachineClass *m)
|
||||
/* From pc_i440fx_5_1_machine_options() */
|
||||
pcmc->pci_root_uid = 1;
|
||||
pcmc->legacy_no_rng_seed = true;
|
||||
+ pcmc->enforce_amd_1tb_hole = false;
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
|
||||
hw_compat_rhel_9_1_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index 97c3630021..52cfe3bf45 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -692,6 +692,23 @@ static void pc_q35_machine_rhel_options(MachineClass *m)
|
||||
compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len);
|
||||
}
|
||||
|
||||
+static void pc_q35_init_rhel920(MachineState *machine)
|
||||
+{
|
||||
+ pc_q35_init(machine);
|
||||
+}
|
||||
+
|
||||
+static void pc_q35_machine_rhel920_options(MachineClass *m)
|
||||
+{
|
||||
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||
+ pc_q35_machine_rhel_options(m);
|
||||
+ m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)";
|
||||
+ pcmc->smbios_stream_product = "RHEL";
|
||||
+ pcmc->smbios_stream_version = "9.2.0";
|
||||
+}
|
||||
+
|
||||
+DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
|
||||
+ pc_q35_machine_rhel920_options);
|
||||
+
|
||||
static void pc_q35_init_rhel900(MachineState *machine)
|
||||
{
|
||||
pc_q35_init(machine);
|
||||
@@ -700,11 +717,13 @@ static void pc_q35_init_rhel900(MachineState *machine)
|
||||
static void pc_q35_machine_rhel900_options(MachineClass *m)
|
||||
{
|
||||
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||
- pc_q35_machine_rhel_options(m);
|
||||
+ pc_q35_machine_rhel920_options(m);
|
||||
m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)";
|
||||
+ m->alias = NULL;
|
||||
pcmc->smbios_stream_product = "RHEL";
|
||||
pcmc->smbios_stream_version = "9.0.0";
|
||||
pcmc->legacy_no_rng_seed = true;
|
||||
+ pcmc->enforce_amd_1tb_hole = false;
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
|
||||
hw_compat_rhel_9_1_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,87 +0,0 @@
|
||||
From ac346634c5731407baa9de709dbd4d5cc6f45301 Mon Sep 17 00:00:00 2001
|
||||
From: Leonardo Bras <leobras@redhat.com>
|
||||
Date: Mon, 11 Jul 2022 18:11:12 -0300
|
||||
Subject: [PATCH 02/11] Add dirty-sync-missed-zero-copy migration stat
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Leonardo Brás <leobras@redhat.com>
|
||||
RH-MergeRequest: 111: zero-copy-send fixes & improvements
|
||||
RH-Commit: [2/6] 115035fd0a4e4b9439c91fb0f5d1a2f9244ba369 (LeoBras/centos-qemu-kvm)
|
||||
RH-Bugzilla: 2107466
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
Acked-by: Markus Armbruster <armbru@redhat.com>
|
||||
Acked-by: Peter Xu <peterx@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Message-Id: <20220711211112.18951-3-leobras@redhat.com>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
(cherry picked from commit cf20c897338067ab4b70a4596fdccaf90c7e29a1)
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
---
|
||||
migration/migration.c | 2 ++
|
||||
monitor/hmp-cmds.c | 5 +++++
|
||||
qapi/migration.json | 7 ++++++-
|
||||
3 files changed, 13 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/migration/migration.c b/migration/migration.c
|
||||
index 8fb3eae910..3a3a7a4a50 100644
|
||||
--- a/migration/migration.c
|
||||
+++ b/migration/migration.c
|
||||
@@ -1017,6 +1017,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s)
|
||||
info->ram->normal_bytes = ram_counters.normal * page_size;
|
||||
info->ram->mbps = s->mbps;
|
||||
info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
|
||||
+ info->ram->dirty_sync_missed_zero_copy =
|
||||
+ ram_counters.dirty_sync_missed_zero_copy;
|
||||
info->ram->postcopy_requests = ram_counters.postcopy_requests;
|
||||
info->ram->page_size = page_size;
|
||||
info->ram->multifd_bytes = ram_counters.multifd_bytes;
|
||||
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
|
||||
index 634968498b..9cec01de38 100644
|
||||
--- a/monitor/hmp-cmds.c
|
||||
+++ b/monitor/hmp-cmds.c
|
||||
@@ -305,6 +305,11 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
|
||||
monitor_printf(mon, "postcopy ram: %" PRIu64 " kbytes\n",
|
||||
info->ram->postcopy_bytes >> 10);
|
||||
}
|
||||
+ if (info->ram->dirty_sync_missed_zero_copy) {
|
||||
+ monitor_printf(mon,
|
||||
+ "Zero-copy-send fallbacks happened: %" PRIu64 " times\n",
|
||||
+ info->ram->dirty_sync_missed_zero_copy);
|
||||
+ }
|
||||
}
|
||||
|
||||
if (info->has_disk) {
|
||||
diff --git a/qapi/migration.json b/qapi/migration.json
|
||||
index 5105790cd0..9b38b3c21c 100644
|
||||
--- a/qapi/migration.json
|
||||
+++ b/qapi/migration.json
|
||||
@@ -55,6 +55,10 @@
|
||||
# @postcopy-bytes: The number of bytes sent during the post-copy phase
|
||||
# (since 7.0).
|
||||
#
|
||||
+# @dirty-sync-missed-zero-copy: Number of times dirty RAM synchronization could
|
||||
+# not avoid copying dirty pages. This is between
|
||||
+# 0 and @dirty-sync-count * @multifd-channels.
|
||||
+# (since 7.1)
|
||||
# Since: 0.14
|
||||
##
|
||||
{ 'struct': 'MigrationStats',
|
||||
@@ -65,7 +69,8 @@
|
||||
'postcopy-requests' : 'int', 'page-size' : 'int',
|
||||
'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64',
|
||||
'precopy-bytes' : 'uint64', 'downtime-bytes' : 'uint64',
|
||||
- 'postcopy-bytes' : 'uint64' } }
|
||||
+ 'postcopy-bytes' : 'uint64',
|
||||
+ 'dirty-sync-missed-zero-copy' : 'uint64' } }
|
||||
|
||||
##
|
||||
# @XBZRLECacheStats:
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,41 +0,0 @@
|
||||
From 85781b8745fa1581a66f64011d61a4f0c4e103dc Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Fri, 6 May 2022 17:03:11 +0200
|
||||
Subject: [PATCH 3/5] Enable virtio-iommu-pci on aarch64
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 83: Enable virtio-iommu-pci on aarch64
|
||||
RH-Commit: [1/1] 23e5c0832e52c66adf5fd6daccdc3edddc7ecb8b (eauger1/centos-qemu-kvm)
|
||||
RH-Bugzilla: 1477099
|
||||
RH-Acked-by: Gavin Shan <gshan@redhat.com>
|
||||
RH-Acked-by: Andrew Jones <drjones@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1477099
|
||||
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45128798
|
||||
Upstream Status: RHEL-only
|
||||
Tested: With virtio-net-pci and virtio-block-pci
|
||||
|
||||
let's enable the virtio-iommu-pci device on aarch64 by
|
||||
turning CONFIG_VIRTIO_IOMMU on.
|
||||
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
|
||||
index 187938573f..1618d31b89 100644
|
||||
--- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
|
||||
+++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
|
||||
@@ -23,6 +23,7 @@ CONFIG_VFIO_PCI=y
|
||||
CONFIG_VIRTIO_MMIO=y
|
||||
CONFIG_VIRTIO_PCI=y
|
||||
CONFIG_VIRTIO_MEM=y
|
||||
+CONFIG_VIRTIO_IOMMU=y
|
||||
CONFIG_XIO3130=y
|
||||
CONFIG_NVDIMM=y
|
||||
CONFIG_ACPI_APEI=y
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,41 +0,0 @@
|
||||
From c531a39171201f8a1d063e6af752e5d629c1b4bf Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Thu, 9 Jun 2022 11:35:18 +0200
|
||||
Subject: [PATCH 4/6] Enable virtio-iommu-pci on x86_64
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 100: Enable virtio-iommu-pci on x86_64
|
||||
RH-Commit: [1/1] a164af477efc7cb9d3d76a0e644f198f7c9fb2b5 (eauger1/centos-qemu-kvm)
|
||||
RH-Bugzilla: 2094252
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094252
|
||||
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45871185
|
||||
Upstream Status: RHEL-only
|
||||
Tested: With virtio-net-pci and virtio-block-pci
|
||||
|
||||
let's enable the virtio-iommu-pci device on x86_64 by
|
||||
turning CONFIG_VIRTIO_IOMMU on.
|
||||
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
|
||||
index d0c9e66641..3850b9de72 100644
|
||||
--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
|
||||
+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
|
||||
@@ -90,6 +90,7 @@ CONFIG_VHOST_USER_BLK=y
|
||||
CONFIG_VIRTIO_MEM=y
|
||||
CONFIG_VIRTIO_PCI=y
|
||||
CONFIG_VIRTIO_VGA=y
|
||||
+CONFIG_VIRTIO_IOMMU=y
|
||||
CONFIG_VMMOUSE=y
|
||||
CONFIG_VMPORT=y
|
||||
CONFIG_VTD=y
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,503 +0,0 @@
|
||||
From 1163da281c178359dd7e1cf1ced5c98caa600f8e Mon Sep 17 00:00:00 2001
|
||||
From: Nicolas Saenz Julienne <nsaenzju@redhat.com>
|
||||
Date: Mon, 25 Apr 2022 09:57:21 +0200
|
||||
Subject: [PATCH 01/16] Introduce event-loop-base abstract class
|
||||
|
||||
RH-Author: Nicolas Saenz Julienne <nsaenzju@redhat.com>
|
||||
RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size
|
||||
RH-Commit: [1/3] 5817205d8f56cc4aa98bd5963ecac54a59bad990
|
||||
RH-Bugzilla: 2031024
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
|
||||
Introduce the 'event-loop-base' abstract class, it'll hold the
|
||||
properties common to all event loops and provide the necessary hooks for
|
||||
their creation and maintenance. Then have iothread inherit from it.
|
||||
|
||||
EventLoopBaseClass is defined as user creatable and provides a hook for
|
||||
its children to attach themselves to the user creatable class 'complete'
|
||||
function. It also provides an update_params() callback to propagate
|
||||
property changes onto its children.
|
||||
|
||||
The new 'event-loop-base' class will live in the root directory. It is
|
||||
built on its own using the 'link_whole' option (there are no direct
|
||||
function dependencies between the class and its children, it all happens
|
||||
trough 'constructor' magic). And also imposes new compilation
|
||||
dependencies:
|
||||
|
||||
qom <- event-loop-base <- blockdev (iothread.c)
|
||||
|
||||
And in subsequent patches:
|
||||
|
||||
qom <- event-loop-base <- qemuutil (util/main-loop.c)
|
||||
|
||||
All this forced some amount of reordering in meson.build:
|
||||
|
||||
- Moved qom build definition before qemuutil. Doing it the other way
|
||||
around (i.e. moving qemuutil after qom) isn't possible as a lot of
|
||||
core libraries that live in between the two depend on it.
|
||||
|
||||
- Process the 'hw' subdir earlier, as it introduces files into the
|
||||
'qom' source set.
|
||||
|
||||
No functional changes intended.
|
||||
|
||||
Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Acked-by: Markus Armbruster <armbru@redhat.com>
|
||||
Message-id: 20220425075723.20019-2-nsaenzju@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 7d5983e3c8c40b1d0668faba31d79905c4fadd7d)
|
||||
---
|
||||
event-loop-base.c | 104 +++++++++++++++++++++++++++++++
|
||||
include/sysemu/event-loop-base.h | 36 +++++++++++
|
||||
include/sysemu/iothread.h | 6 +-
|
||||
iothread.c | 65 ++++++-------------
|
||||
meson.build | 23 ++++---
|
||||
qapi/qom.json | 22 +++++--
|
||||
6 files changed, 192 insertions(+), 64 deletions(-)
|
||||
create mode 100644 event-loop-base.c
|
||||
create mode 100644 include/sysemu/event-loop-base.h
|
||||
|
||||
diff --git a/event-loop-base.c b/event-loop-base.c
|
||||
new file mode 100644
|
||||
index 0000000000..a924c73a7c
|
||||
--- /dev/null
|
||||
+++ b/event-loop-base.c
|
||||
@@ -0,0 +1,104 @@
|
||||
+/*
|
||||
+ * QEMU event-loop base
|
||||
+ *
|
||||
+ * Copyright (C) 2022 Red Hat Inc
|
||||
+ *
|
||||
+ * Authors:
|
||||
+ * Stefan Hajnoczi <stefanha@redhat.com>
|
||||
+ * Nicolas Saenz Julienne <nsaenzju@redhat.com>
|
||||
+ *
|
||||
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
+ * See the COPYING file in the top-level directory.
|
||||
+ */
|
||||
+
|
||||
+#include "qemu/osdep.h"
|
||||
+#include "qom/object_interfaces.h"
|
||||
+#include "qapi/error.h"
|
||||
+#include "sysemu/event-loop-base.h"
|
||||
+
|
||||
+typedef struct {
|
||||
+ const char *name;
|
||||
+ ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */
|
||||
+} EventLoopBaseParamInfo;
|
||||
+
|
||||
+static EventLoopBaseParamInfo aio_max_batch_info = {
|
||||
+ "aio-max-batch", offsetof(EventLoopBase, aio_max_batch),
|
||||
+};
|
||||
+
|
||||
+static void event_loop_base_get_param(Object *obj, Visitor *v,
|
||||
+ const char *name, void *opaque, Error **errp)
|
||||
+{
|
||||
+ EventLoopBase *event_loop_base = EVENT_LOOP_BASE(obj);
|
||||
+ EventLoopBaseParamInfo *info = opaque;
|
||||
+ int64_t *field = (void *)event_loop_base + info->offset;
|
||||
+
|
||||
+ visit_type_int64(v, name, field, errp);
|
||||
+}
|
||||
+
|
||||
+static void event_loop_base_set_param(Object *obj, Visitor *v,
|
||||
+ const char *name, void *opaque, Error **errp)
|
||||
+{
|
||||
+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(obj);
|
||||
+ EventLoopBase *base = EVENT_LOOP_BASE(obj);
|
||||
+ EventLoopBaseParamInfo *info = opaque;
|
||||
+ int64_t *field = (void *)base + info->offset;
|
||||
+ int64_t value;
|
||||
+
|
||||
+ if (!visit_type_int64(v, name, &value, errp)) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ if (value < 0) {
|
||||
+ error_setg(errp, "%s value must be in range [0, %" PRId64 "]",
|
||||
+ info->name, INT64_MAX);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ *field = value;
|
||||
+
|
||||
+ if (bc->update_params) {
|
||||
+ bc->update_params(base, errp);
|
||||
+ }
|
||||
+
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
+static void event_loop_base_complete(UserCreatable *uc, Error **errp)
|
||||
+{
|
||||
+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc);
|
||||
+ EventLoopBase *base = EVENT_LOOP_BASE(uc);
|
||||
+
|
||||
+ if (bc->init) {
|
||||
+ bc->init(base, errp);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void event_loop_base_class_init(ObjectClass *klass, void *class_data)
|
||||
+{
|
||||
+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
|
||||
+ ucc->complete = event_loop_base_complete;
|
||||
+
|
||||
+ object_class_property_add(klass, "aio-max-batch", "int",
|
||||
+ event_loop_base_get_param,
|
||||
+ event_loop_base_set_param,
|
||||
+ NULL, &aio_max_batch_info);
|
||||
+}
|
||||
+
|
||||
+static const TypeInfo event_loop_base_info = {
|
||||
+ .name = TYPE_EVENT_LOOP_BASE,
|
||||
+ .parent = TYPE_OBJECT,
|
||||
+ .instance_size = sizeof(EventLoopBase),
|
||||
+ .class_size = sizeof(EventLoopBaseClass),
|
||||
+ .class_init = event_loop_base_class_init,
|
||||
+ .abstract = true,
|
||||
+ .interfaces = (InterfaceInfo[]) {
|
||||
+ { TYPE_USER_CREATABLE },
|
||||
+ { }
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+static void register_types(void)
|
||||
+{
|
||||
+ type_register_static(&event_loop_base_info);
|
||||
+}
|
||||
+type_init(register_types);
|
||||
diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h
|
||||
new file mode 100644
|
||||
index 0000000000..8e77d8b69f
|
||||
--- /dev/null
|
||||
+++ b/include/sysemu/event-loop-base.h
|
||||
@@ -0,0 +1,36 @@
|
||||
+/*
|
||||
+ * QEMU event-loop backend
|
||||
+ *
|
||||
+ * Copyright (C) 2022 Red Hat Inc
|
||||
+ *
|
||||
+ * Authors:
|
||||
+ * Nicolas Saenz Julienne <nsaenzju@redhat.com>
|
||||
+ *
|
||||
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
+ * See the COPYING file in the top-level directory.
|
||||
+ */
|
||||
+#ifndef QEMU_EVENT_LOOP_BASE_H
|
||||
+#define QEMU_EVENT_LOOP_BASE_H
|
||||
+
|
||||
+#include "qom/object.h"
|
||||
+#include "block/aio.h"
|
||||
+#include "qemu/typedefs.h"
|
||||
+
|
||||
+#define TYPE_EVENT_LOOP_BASE "event-loop-base"
|
||||
+OBJECT_DECLARE_TYPE(EventLoopBase, EventLoopBaseClass,
|
||||
+ EVENT_LOOP_BASE)
|
||||
+
|
||||
+struct EventLoopBaseClass {
|
||||
+ ObjectClass parent_class;
|
||||
+
|
||||
+ void (*init)(EventLoopBase *base, Error **errp);
|
||||
+ void (*update_params)(EventLoopBase *base, Error **errp);
|
||||
+};
|
||||
+
|
||||
+struct EventLoopBase {
|
||||
+ Object parent;
|
||||
+
|
||||
+ /* AioContext AIO engine parameters */
|
||||
+ int64_t aio_max_batch;
|
||||
+};
|
||||
+#endif
|
||||
diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
|
||||
index 7f714bd136..8f8601d6ab 100644
|
||||
--- a/include/sysemu/iothread.h
|
||||
+++ b/include/sysemu/iothread.h
|
||||
@@ -17,11 +17,12 @@
|
||||
#include "block/aio.h"
|
||||
#include "qemu/thread.h"
|
||||
#include "qom/object.h"
|
||||
+#include "sysemu/event-loop-base.h"
|
||||
|
||||
#define TYPE_IOTHREAD "iothread"
|
||||
|
||||
struct IOThread {
|
||||
- Object parent_obj;
|
||||
+ EventLoopBase parent_obj;
|
||||
|
||||
QemuThread thread;
|
||||
AioContext *ctx;
|
||||
@@ -37,9 +38,6 @@ struct IOThread {
|
||||
int64_t poll_max_ns;
|
||||
int64_t poll_grow;
|
||||
int64_t poll_shrink;
|
||||
-
|
||||
- /* AioContext AIO engine parameters */
|
||||
- int64_t aio_max_batch;
|
||||
};
|
||||
typedef struct IOThread IOThread;
|
||||
|
||||
diff --git a/iothread.c b/iothread.c
|
||||
index 0f98af0f2a..8fa2f3bfb8 100644
|
||||
--- a/iothread.c
|
||||
+++ b/iothread.c
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "qemu/module.h"
|
||||
#include "block/aio.h"
|
||||
#include "block/block.h"
|
||||
+#include "sysemu/event-loop-base.h"
|
||||
#include "sysemu/iothread.h"
|
||||
#include "qapi/error.h"
|
||||
#include "qapi/qapi-commands-misc.h"
|
||||
@@ -152,10 +153,15 @@ static void iothread_init_gcontext(IOThread *iothread)
|
||||
iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE);
|
||||
}
|
||||
|
||||
-static void iothread_set_aio_context_params(IOThread *iothread, Error **errp)
|
||||
+static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp)
|
||||
{
|
||||
+ IOThread *iothread = IOTHREAD(base);
|
||||
ERRP_GUARD();
|
||||
|
||||
+ if (!iothread->ctx) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
aio_context_set_poll_params(iothread->ctx,
|
||||
iothread->poll_max_ns,
|
||||
iothread->poll_grow,
|
||||
@@ -166,14 +172,15 @@ static void iothread_set_aio_context_params(IOThread *iothread, Error **errp)
|
||||
}
|
||||
|
||||
aio_context_set_aio_params(iothread->ctx,
|
||||
- iothread->aio_max_batch,
|
||||
+ iothread->parent_obj.aio_max_batch,
|
||||
errp);
|
||||
}
|
||||
|
||||
-static void iothread_complete(UserCreatable *obj, Error **errp)
|
||||
+
|
||||
+static void iothread_init(EventLoopBase *base, Error **errp)
|
||||
{
|
||||
Error *local_error = NULL;
|
||||
- IOThread *iothread = IOTHREAD(obj);
|
||||
+ IOThread *iothread = IOTHREAD(base);
|
||||
char *thread_name;
|
||||
|
||||
iothread->stopping = false;
|
||||
@@ -189,7 +196,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
|
||||
*/
|
||||
iothread_init_gcontext(iothread);
|
||||
|
||||
- iothread_set_aio_context_params(iothread, &local_error);
|
||||
+ iothread_set_aio_context_params(base, &local_error);
|
||||
if (local_error) {
|
||||
error_propagate(errp, local_error);
|
||||
aio_context_unref(iothread->ctx);
|
||||
@@ -201,7 +208,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
|
||||
* to inherit.
|
||||
*/
|
||||
thread_name = g_strdup_printf("IO %s",
|
||||
- object_get_canonical_path_component(OBJECT(obj)));
|
||||
+ object_get_canonical_path_component(OBJECT(base)));
|
||||
qemu_thread_create(&iothread->thread, thread_name, iothread_run,
|
||||
iothread, QEMU_THREAD_JOINABLE);
|
||||
g_free(thread_name);
|
||||
@@ -226,9 +233,6 @@ static IOThreadParamInfo poll_grow_info = {
|
||||
static IOThreadParamInfo poll_shrink_info = {
|
||||
"poll-shrink", offsetof(IOThread, poll_shrink),
|
||||
};
|
||||
-static IOThreadParamInfo aio_max_batch_info = {
|
||||
- "aio-max-batch", offsetof(IOThread, aio_max_batch),
|
||||
-};
|
||||
|
||||
static void iothread_get_param(Object *obj, Visitor *v,
|
||||
const char *name, IOThreadParamInfo *info, Error **errp)
|
||||
@@ -288,35 +292,12 @@ static void iothread_set_poll_param(Object *obj, Visitor *v,
|
||||
}
|
||||
}
|
||||
|
||||
-static void iothread_get_aio_param(Object *obj, Visitor *v,
|
||||
- const char *name, void *opaque, Error **errp)
|
||||
-{
|
||||
- IOThreadParamInfo *info = opaque;
|
||||
-
|
||||
- iothread_get_param(obj, v, name, info, errp);
|
||||
-}
|
||||
-
|
||||
-static void iothread_set_aio_param(Object *obj, Visitor *v,
|
||||
- const char *name, void *opaque, Error **errp)
|
||||
-{
|
||||
- IOThread *iothread = IOTHREAD(obj);
|
||||
- IOThreadParamInfo *info = opaque;
|
||||
-
|
||||
- if (!iothread_set_param(obj, v, name, info, errp)) {
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- if (iothread->ctx) {
|
||||
- aio_context_set_aio_params(iothread->ctx,
|
||||
- iothread->aio_max_batch,
|
||||
- errp);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
static void iothread_class_init(ObjectClass *klass, void *class_data)
|
||||
{
|
||||
- UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
|
||||
- ucc->complete = iothread_complete;
|
||||
+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(klass);
|
||||
+
|
||||
+ bc->init = iothread_init;
|
||||
+ bc->update_params = iothread_set_aio_context_params;
|
||||
|
||||
object_class_property_add(klass, "poll-max-ns", "int",
|
||||
iothread_get_poll_param,
|
||||
@@ -330,23 +311,15 @@ static void iothread_class_init(ObjectClass *klass, void *class_data)
|
||||
iothread_get_poll_param,
|
||||
iothread_set_poll_param,
|
||||
NULL, &poll_shrink_info);
|
||||
- object_class_property_add(klass, "aio-max-batch", "int",
|
||||
- iothread_get_aio_param,
|
||||
- iothread_set_aio_param,
|
||||
- NULL, &aio_max_batch_info);
|
||||
}
|
||||
|
||||
static const TypeInfo iothread_info = {
|
||||
.name = TYPE_IOTHREAD,
|
||||
- .parent = TYPE_OBJECT,
|
||||
+ .parent = TYPE_EVENT_LOOP_BASE,
|
||||
.class_init = iothread_class_init,
|
||||
.instance_size = sizeof(IOThread),
|
||||
.instance_init = iothread_instance_init,
|
||||
.instance_finalize = iothread_instance_finalize,
|
||||
- .interfaces = (InterfaceInfo[]) {
|
||||
- {TYPE_USER_CREATABLE},
|
||||
- {}
|
||||
- },
|
||||
};
|
||||
|
||||
static void iothread_register_types(void)
|
||||
@@ -383,7 +356,7 @@ static int query_one_iothread(Object *object, void *opaque)
|
||||
info->poll_max_ns = iothread->poll_max_ns;
|
||||
info->poll_grow = iothread->poll_grow;
|
||||
info->poll_shrink = iothread->poll_shrink;
|
||||
- info->aio_max_batch = iothread->aio_max_batch;
|
||||
+ info->aio_max_batch = iothread->parent_obj.aio_max_batch;
|
||||
|
||||
QAPI_LIST_APPEND(*tail, info);
|
||||
return 0;
|
||||
diff --git a/meson.build b/meson.build
|
||||
index 6f7e430f0f..b9c919a55e 100644
|
||||
--- a/meson.build
|
||||
+++ b/meson.build
|
||||
@@ -2804,6 +2804,7 @@ subdir('qom')
|
||||
subdir('authz')
|
||||
subdir('crypto')
|
||||
subdir('ui')
|
||||
+subdir('hw')
|
||||
|
||||
|
||||
if enable_modules
|
||||
@@ -2811,6 +2812,18 @@ if enable_modules
|
||||
modulecommon = declare_dependency(link_whole: libmodulecommon, compile_args: '-DBUILD_DSO')
|
||||
endif
|
||||
|
||||
+qom_ss = qom_ss.apply(config_host, strict: false)
|
||||
+libqom = static_library('qom', qom_ss.sources() + genh,
|
||||
+ dependencies: [qom_ss.dependencies()],
|
||||
+ name_suffix: 'fa')
|
||||
+qom = declare_dependency(link_whole: libqom)
|
||||
+
|
||||
+event_loop_base = files('event-loop-base.c')
|
||||
+event_loop_base = static_library('event-loop-base', sources: event_loop_base + genh,
|
||||
+ build_by_default: true)
|
||||
+event_loop_base = declare_dependency(link_whole: event_loop_base,
|
||||
+ dependencies: [qom])
|
||||
+
|
||||
stub_ss = stub_ss.apply(config_all, strict: false)
|
||||
|
||||
util_ss.add_all(trace_ss)
|
||||
@@ -2897,7 +2910,6 @@ subdir('monitor')
|
||||
subdir('net')
|
||||
subdir('replay')
|
||||
subdir('semihosting')
|
||||
-subdir('hw')
|
||||
subdir('tcg')
|
||||
subdir('fpu')
|
||||
subdir('accel')
|
||||
@@ -3022,13 +3034,6 @@ qemu_syms = custom_target('qemu.syms', output: 'qemu.syms',
|
||||
capture: true,
|
||||
command: [undefsym, nm, '@INPUT@'])
|
||||
|
||||
-qom_ss = qom_ss.apply(config_host, strict: false)
|
||||
-libqom = static_library('qom', qom_ss.sources() + genh,
|
||||
- dependencies: [qom_ss.dependencies()],
|
||||
- name_suffix: 'fa')
|
||||
-
|
||||
-qom = declare_dependency(link_whole: libqom)
|
||||
-
|
||||
authz_ss = authz_ss.apply(config_host, strict: false)
|
||||
libauthz = static_library('authz', authz_ss.sources() + genh,
|
||||
dependencies: [authz_ss.dependencies()],
|
||||
@@ -3081,7 +3086,7 @@ libblockdev = static_library('blockdev', blockdev_ss.sources() + genh,
|
||||
build_by_default: false)
|
||||
|
||||
blockdev = declare_dependency(link_whole: [libblockdev],
|
||||
- dependencies: [block])
|
||||
+ dependencies: [block, event_loop_base])
|
||||
|
||||
qmp_ss = qmp_ss.apply(config_host, strict: false)
|
||||
libqmp = static_library('qmp', qmp_ss.sources() + genh,
|
||||
diff --git a/qapi/qom.json b/qapi/qom.json
|
||||
index eeb5395ff3..a2439533c5 100644
|
||||
--- a/qapi/qom.json
|
||||
+++ b/qapi/qom.json
|
||||
@@ -499,6 +499,20 @@
|
||||
'*repeat': 'bool',
|
||||
'*grab-toggle': 'GrabToggleKeys' } }
|
||||
|
||||
+##
|
||||
+# @EventLoopBaseProperties:
|
||||
+#
|
||||
+# Common properties for event loops
|
||||
+#
|
||||
+# @aio-max-batch: maximum number of requests in a batch for the AIO engine,
|
||||
+# 0 means that the engine will use its default.
|
||||
+# (default: 0)
|
||||
+#
|
||||
+# Since: 7.1
|
||||
+##
|
||||
+{ 'struct': 'EventLoopBaseProperties',
|
||||
+ 'data': { '*aio-max-batch': 'int' } }
|
||||
+
|
||||
##
|
||||
# @IothreadProperties:
|
||||
#
|
||||
@@ -516,17 +530,15 @@
|
||||
# algorithm detects it is spending too long polling without
|
||||
# encountering events. 0 selects a default behaviour (default: 0)
|
||||
#
|
||||
-# @aio-max-batch: maximum number of requests in a batch for the AIO engine,
|
||||
-# 0 means that the engine will use its default
|
||||
-# (default:0, since 6.1)
|
||||
+# The @aio-max-batch option is available since 6.1.
|
||||
#
|
||||
# Since: 2.0
|
||||
##
|
||||
{ 'struct': 'IothreadProperties',
|
||||
+ 'base': 'EventLoopBaseProperties',
|
||||
'data': { '*poll-max-ns': 'int',
|
||||
'*poll-grow': 'int',
|
||||
- '*poll-shrink': 'int',
|
||||
- '*aio-max-batch': 'int' } }
|
||||
+ '*poll-shrink': 'int' } }
|
||||
|
||||
##
|
||||
# @MemoryBackendProperties:
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,82 @@
|
||||
From 6aebc271d95f9c59cb63c923b6ce35f16fce10e4 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Mon, 16 Jan 2023 07:17:23 -0500
|
||||
Subject: [PATCH 30/31] KVM: keep track of running ioctls
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 138: accel: introduce accelerator blocker API
|
||||
RH-Bugzilla: 1979276
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/3] f566f81dda26ae733008f32261ecd1253ec1796d (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276
|
||||
|
||||
commit a27dd2de68f37ba96fe164a42121daa5f0750afc
|
||||
Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Fri Nov 11 10:47:57 2022 -0500
|
||||
|
||||
KVM: keep track of running ioctls
|
||||
|
||||
Using the new accel-blocker API, mark where ioctls are being called
|
||||
in KVM. Next, we will implement the critical section that will take
|
||||
care of performing memslots modifications atomically, therefore
|
||||
preventing any new ioctl from running and allowing the running ones
|
||||
to finish.
|
||||
|
||||
Signed-off-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Message-Id: <20221111154758.1372674-3-eesposit@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
accel/kvm/kvm-all.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||
index f99b0becd8..ff660fd469 100644
|
||||
--- a/accel/kvm/kvm-all.c
|
||||
+++ b/accel/kvm/kvm-all.c
|
||||
@@ -2310,6 +2310,7 @@ static int kvm_init(MachineState *ms)
|
||||
assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size());
|
||||
|
||||
s->sigmask_len = 8;
|
||||
+ accel_blocker_init();
|
||||
|
||||
#ifdef KVM_CAP_SET_GUEST_DEBUG
|
||||
QTAILQ_INIT(&s->kvm_sw_breakpoints);
|
||||
@@ -3014,7 +3015,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
|
||||
va_end(ap);
|
||||
|
||||
trace_kvm_vm_ioctl(type, arg);
|
||||
+ accel_ioctl_begin();
|
||||
ret = ioctl(s->vmfd, type, arg);
|
||||
+ accel_ioctl_end();
|
||||
if (ret == -1) {
|
||||
ret = -errno;
|
||||
}
|
||||
@@ -3032,7 +3035,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
|
||||
va_end(ap);
|
||||
|
||||
trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg);
|
||||
+ accel_cpu_ioctl_begin(cpu);
|
||||
ret = ioctl(cpu->kvm_fd, type, arg);
|
||||
+ accel_cpu_ioctl_end(cpu);
|
||||
if (ret == -1) {
|
||||
ret = -errno;
|
||||
}
|
||||
@@ -3050,7 +3055,9 @@ int kvm_device_ioctl(int fd, int type, ...)
|
||||
va_end(ap);
|
||||
|
||||
trace_kvm_device_ioctl(fd, type, arg);
|
||||
+ accel_ioctl_begin();
|
||||
ret = ioctl(fd, type, arg);
|
||||
+ accel_ioctl_end();
|
||||
if (ret == -1) {
|
||||
ret = -errno;
|
||||
}
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,420 +0,0 @@
|
||||
From cda3fcf14f2883fea633e25256f6c14a71271adf Mon Sep 17 00:00:00 2001
|
||||
From: Leonardo Bras <leobras@redhat.com>
|
||||
Date: Fri, 13 May 2022 03:28:31 -0300
|
||||
Subject: [PATCH 08/18] QIOChannel: Add flags on io_writev and introduce
|
||||
io_flush callback
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Leonardo Brás <leobras@redhat.com>
|
||||
RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd
|
||||
RH-Commit: [2/11] 06acfb6b0cb2c25733c2eb198011f7623b5a7024 (LeoBras/centos-qemu-kvm)
|
||||
RH-Bugzilla: 1968509
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
|
||||
Add flags to io_writev and introduce io_flush as optional callback to
|
||||
QIOChannelClass, allowing the implementation of zero copy writes by
|
||||
subclasses.
|
||||
|
||||
How to use them:
|
||||
- Write data using qio_channel_writev*(...,QIO_CHANNEL_WRITE_FLAG_ZERO_COPY),
|
||||
- Wait write completion with qio_channel_flush().
|
||||
|
||||
Notes:
|
||||
As some zero copy write implementations work asynchronously, it's
|
||||
recommended to keep the write buffer untouched until the return of
|
||||
qio_channel_flush(), to avoid the risk of sending an updated buffer
|
||||
instead of the buffer state during write.
|
||||
|
||||
As io_flush callback is optional, if a subclass does not implement it, then:
|
||||
- io_flush will return 0 without changing anything.
|
||||
|
||||
Also, some functions like qio_channel_writev_full_all() were adapted to
|
||||
receive a flag parameter. That allows shared code between zero copy and
|
||||
non-zero copy writev, and also an easier implementation on new flags.
|
||||
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Reviewed-by: Juan Quintela <quintela@redhat.com>
|
||||
Message-Id: <20220513062836.965425-3-leobras@redhat.com>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
(cherry picked from commit b88651cb4d4fa416fdbb6afaf5b26ec8c035eaad)
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
---
|
||||
chardev/char-io.c | 2 +-
|
||||
hw/remote/mpqemu-link.c | 2 +-
|
||||
include/io/channel.h | 38 +++++++++++++++++++++-
|
||||
io/channel-buffer.c | 1 +
|
||||
io/channel-command.c | 1 +
|
||||
io/channel-file.c | 1 +
|
||||
io/channel-socket.c | 2 ++
|
||||
io/channel-tls.c | 1 +
|
||||
io/channel-websock.c | 1 +
|
||||
io/channel.c | 49 +++++++++++++++++++++++------
|
||||
migration/rdma.c | 1 +
|
||||
scsi/pr-manager-helper.c | 2 +-
|
||||
tests/unit/test-io-channel-socket.c | 1 +
|
||||
13 files changed, 88 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/chardev/char-io.c b/chardev/char-io.c
|
||||
index 8ced184160..4451128cba 100644
|
||||
--- a/chardev/char-io.c
|
||||
+++ b/chardev/char-io.c
|
||||
@@ -122,7 +122,7 @@ int io_channel_send_full(QIOChannel *ioc,
|
||||
|
||||
ret = qio_channel_writev_full(
|
||||
ioc, &iov, 1,
|
||||
- fds, nfds, NULL);
|
||||
+ fds, nfds, 0, NULL);
|
||||
if (ret == QIO_CHANNEL_ERR_BLOCK) {
|
||||
if (offset) {
|
||||
return offset;
|
||||
diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c
|
||||
index 7e841820e5..e8f556bd27 100644
|
||||
--- a/hw/remote/mpqemu-link.c
|
||||
+++ b/hw/remote/mpqemu-link.c
|
||||
@@ -69,7 +69,7 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp)
|
||||
}
|
||||
|
||||
if (!qio_channel_writev_full_all(ioc, send, G_N_ELEMENTS(send),
|
||||
- fds, nfds, errp)) {
|
||||
+ fds, nfds, 0, errp)) {
|
||||
ret = true;
|
||||
} else {
|
||||
trace_mpqemu_send_io_error(msg->cmd, msg->size, nfds);
|
||||
diff --git a/include/io/channel.h b/include/io/channel.h
|
||||
index 88988979f8..c680ee7480 100644
|
||||
--- a/include/io/channel.h
|
||||
+++ b/include/io/channel.h
|
||||
@@ -32,12 +32,15 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
|
||||
|
||||
#define QIO_CHANNEL_ERR_BLOCK -2
|
||||
|
||||
+#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
|
||||
+
|
||||
typedef enum QIOChannelFeature QIOChannelFeature;
|
||||
|
||||
enum QIOChannelFeature {
|
||||
QIO_CHANNEL_FEATURE_FD_PASS,
|
||||
QIO_CHANNEL_FEATURE_SHUTDOWN,
|
||||
QIO_CHANNEL_FEATURE_LISTEN,
|
||||
+ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY,
|
||||
};
|
||||
|
||||
|
||||
@@ -104,6 +107,7 @@ struct QIOChannelClass {
|
||||
size_t niov,
|
||||
int *fds,
|
||||
size_t nfds,
|
||||
+ int flags,
|
||||
Error **errp);
|
||||
ssize_t (*io_readv)(QIOChannel *ioc,
|
||||
const struct iovec *iov,
|
||||
@@ -136,6 +140,8 @@ struct QIOChannelClass {
|
||||
IOHandler *io_read,
|
||||
IOHandler *io_write,
|
||||
void *opaque);
|
||||
+ int (*io_flush)(QIOChannel *ioc,
|
||||
+ Error **errp);
|
||||
};
|
||||
|
||||
/* General I/O handling functions */
|
||||
@@ -228,6 +234,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
|
||||
* @niov: the length of the @iov array
|
||||
* @fds: an array of file handles to send
|
||||
* @nfds: number of file handles in @fds
|
||||
+ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*)
|
||||
* @errp: pointer to a NULL-initialized error object
|
||||
*
|
||||
* Write data to the IO channel, reading it from the
|
||||
@@ -260,6 +267,7 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int *fds,
|
||||
size_t nfds,
|
||||
+ int flags,
|
||||
Error **errp);
|
||||
|
||||
/**
|
||||
@@ -837,6 +845,7 @@ int qio_channel_readv_full_all(QIOChannel *ioc,
|
||||
* @niov: the length of the @iov array
|
||||
* @fds: an array of file handles to send
|
||||
* @nfds: number of file handles in @fds
|
||||
+ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*)
|
||||
* @errp: pointer to a NULL-initialized error object
|
||||
*
|
||||
*
|
||||
@@ -846,6 +855,14 @@ int qio_channel_readv_full_all(QIOChannel *ioc,
|
||||
* to be written, yielding from the current coroutine
|
||||
* if required.
|
||||
*
|
||||
+ * If QIO_CHANNEL_WRITE_FLAG_ZERO_COPY is passed in flags,
|
||||
+ * instead of waiting for all requested data to be written,
|
||||
+ * this function will wait until it's all queued for writing.
|
||||
+ * In this case, if the buffer gets changed between queueing and
|
||||
+ * sending, the updated buffer will be sent. If this is not a
|
||||
+ * desired behavior, it's suggested to call qio_channel_flush()
|
||||
+ * before reusing the buffer.
|
||||
+ *
|
||||
* Returns: 0 if all bytes were written, or -1 on error
|
||||
*/
|
||||
|
||||
@@ -853,6 +870,25 @@ int qio_channel_writev_full_all(QIOChannel *ioc,
|
||||
const struct iovec *iov,
|
||||
size_t niov,
|
||||
int *fds, size_t nfds,
|
||||
- Error **errp);
|
||||
+ int flags, Error **errp);
|
||||
+
|
||||
+/**
|
||||
+ * qio_channel_flush:
|
||||
+ * @ioc: the channel object
|
||||
+ * @errp: pointer to a NULL-initialized error object
|
||||
+ *
|
||||
+ * Will block until every packet queued with
|
||||
+ * qio_channel_writev_full() + QIO_CHANNEL_WRITE_FLAG_ZERO_COPY
|
||||
+ * is sent, or return in case of any error.
|
||||
+ *
|
||||
+ * If not implemented, acts as a no-op, and returns 0.
|
||||
+ *
|
||||
+ * Returns -1 if any error is found,
|
||||
+ * 1 if every send failed to use zero copy.
|
||||
+ * 0 otherwise.
|
||||
+ */
|
||||
+
|
||||
+int qio_channel_flush(QIOChannel *ioc,
|
||||
+ Error **errp);
|
||||
|
||||
#endif /* QIO_CHANNEL_H */
|
||||
diff --git a/io/channel-buffer.c b/io/channel-buffer.c
|
||||
index baa4e2b089..bf52011be2 100644
|
||||
--- a/io/channel-buffer.c
|
||||
+++ b/io/channel-buffer.c
|
||||
@@ -81,6 +81,7 @@ static ssize_t qio_channel_buffer_writev(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int *fds,
|
||||
size_t nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc);
|
||||
diff --git a/io/channel-command.c b/io/channel-command.c
|
||||
index 338da73ade..54560464ae 100644
|
||||
--- a/io/channel-command.c
|
||||
+++ b/io/channel-command.c
|
||||
@@ -258,6 +258,7 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int *fds,
|
||||
size_t nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
|
||||
diff --git a/io/channel-file.c b/io/channel-file.c
|
||||
index d7cf6d278f..ef6807a6be 100644
|
||||
--- a/io/channel-file.c
|
||||
+++ b/io/channel-file.c
|
||||
@@ -114,6 +114,7 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int *fds,
|
||||
size_t nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
|
||||
diff --git a/io/channel-socket.c b/io/channel-socket.c
|
||||
index 7a8d9f69c9..a1be2197ca 100644
|
||||
--- a/io/channel-socket.c
|
||||
+++ b/io/channel-socket.c
|
||||
@@ -525,6 +525,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int *fds,
|
||||
size_t nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
|
||||
@@ -620,6 +621,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int *fds,
|
||||
size_t nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
|
||||
diff --git a/io/channel-tls.c b/io/channel-tls.c
|
||||
index 2ae1b92fc0..4ce890a538 100644
|
||||
--- a/io/channel-tls.c
|
||||
+++ b/io/channel-tls.c
|
||||
@@ -301,6 +301,7 @@ static ssize_t qio_channel_tls_writev(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int *fds,
|
||||
size_t nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
|
||||
diff --git a/io/channel-websock.c b/io/channel-websock.c
|
||||
index 55145a6a8c..9619906ac3 100644
|
||||
--- a/io/channel-websock.c
|
||||
+++ b/io/channel-websock.c
|
||||
@@ -1127,6 +1127,7 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int *fds,
|
||||
size_t nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc);
|
||||
diff --git a/io/channel.c b/io/channel.c
|
||||
index e8b019dc36..0640941ac5 100644
|
||||
--- a/io/channel.c
|
||||
+++ b/io/channel.c
|
||||
@@ -72,18 +72,32 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int *fds,
|
||||
size_t nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
|
||||
|
||||
- if ((fds || nfds) &&
|
||||
- !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
|
||||
+ if (fds || nfds) {
|
||||
+ if (!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
|
||||
+ error_setg_errno(errp, EINVAL,
|
||||
+ "Channel does not support file descriptor passing");
|
||||
+ return -1;
|
||||
+ }
|
||||
+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
|
||||
+ error_setg_errno(errp, EINVAL,
|
||||
+ "Zero Copy does not support file descriptor passing");
|
||||
+ return -1;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if ((flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) &&
|
||||
+ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
|
||||
error_setg_errno(errp, EINVAL,
|
||||
- "Channel does not support file descriptor passing");
|
||||
+ "Requested Zero Copy feature is not available");
|
||||
return -1;
|
||||
}
|
||||
|
||||
- return klass->io_writev(ioc, iov, niov, fds, nfds, errp);
|
||||
+ return klass->io_writev(ioc, iov, niov, fds, nfds, flags, errp);
|
||||
}
|
||||
|
||||
|
||||
@@ -217,14 +231,14 @@ int qio_channel_writev_all(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
Error **errp)
|
||||
{
|
||||
- return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, errp);
|
||||
+ return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, 0, errp);
|
||||
}
|
||||
|
||||
int qio_channel_writev_full_all(QIOChannel *ioc,
|
||||
const struct iovec *iov,
|
||||
size_t niov,
|
||||
int *fds, size_t nfds,
|
||||
- Error **errp)
|
||||
+ int flags, Error **errp)
|
||||
{
|
||||
int ret = -1;
|
||||
struct iovec *local_iov = g_new(struct iovec, niov);
|
||||
@@ -237,8 +251,10 @@ int qio_channel_writev_full_all(QIOChannel *ioc,
|
||||
|
||||
while (nlocal_iov > 0) {
|
||||
ssize_t len;
|
||||
- len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, nfds,
|
||||
- errp);
|
||||
+
|
||||
+ len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds,
|
||||
+ nfds, flags, errp);
|
||||
+
|
||||
if (len == QIO_CHANNEL_ERR_BLOCK) {
|
||||
if (qemu_in_coroutine()) {
|
||||
qio_channel_yield(ioc, G_IO_OUT);
|
||||
@@ -277,7 +293,7 @@ ssize_t qio_channel_writev(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
Error **errp)
|
||||
{
|
||||
- return qio_channel_writev_full(ioc, iov, niov, NULL, 0, errp);
|
||||
+ return qio_channel_writev_full(ioc, iov, niov, NULL, 0, 0, errp);
|
||||
}
|
||||
|
||||
|
||||
@@ -297,7 +313,7 @@ ssize_t qio_channel_write(QIOChannel *ioc,
|
||||
Error **errp)
|
||||
{
|
||||
struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen };
|
||||
- return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, errp);
|
||||
+ return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, 0, errp);
|
||||
}
|
||||
|
||||
|
||||
@@ -473,6 +489,19 @@ off_t qio_channel_io_seek(QIOChannel *ioc,
|
||||
return klass->io_seek(ioc, offset, whence, errp);
|
||||
}
|
||||
|
||||
+int qio_channel_flush(QIOChannel *ioc,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
|
||||
+
|
||||
+ if (!klass->io_flush ||
|
||||
+ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ return klass->io_flush(ioc, errp);
|
||||
+}
|
||||
+
|
||||
|
||||
static void qio_channel_restart_read(void *opaque)
|
||||
{
|
||||
diff --git a/migration/rdma.c b/migration/rdma.c
|
||||
index ef1e65ec36..672d1958a9 100644
|
||||
--- a/migration/rdma.c
|
||||
+++ b/migration/rdma.c
|
||||
@@ -2840,6 +2840,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int *fds,
|
||||
size_t nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
|
||||
diff --git a/scsi/pr-manager-helper.c b/scsi/pr-manager-helper.c
|
||||
index 451c7631b7..3be52a98d5 100644
|
||||
--- a/scsi/pr-manager-helper.c
|
||||
+++ b/scsi/pr-manager-helper.c
|
||||
@@ -77,7 +77,7 @@ static int pr_manager_helper_write(PRManagerHelper *pr_mgr,
|
||||
iov.iov_base = (void *)buf;
|
||||
iov.iov_len = sz;
|
||||
n_written = qio_channel_writev_full(QIO_CHANNEL(pr_mgr->ioc), &iov, 1,
|
||||
- nfds ? &fd : NULL, nfds, errp);
|
||||
+ nfds ? &fd : NULL, nfds, 0, errp);
|
||||
|
||||
if (n_written <= 0) {
|
||||
assert(n_written != QIO_CHANNEL_ERR_BLOCK);
|
||||
diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c
|
||||
index c49eec1f03..6713886d02 100644
|
||||
--- a/tests/unit/test-io-channel-socket.c
|
||||
+++ b/tests/unit/test-io-channel-socket.c
|
||||
@@ -444,6 +444,7 @@ static void test_io_channel_unix_fd_pass(void)
|
||||
G_N_ELEMENTS(iosend),
|
||||
fdsend,
|
||||
G_N_ELEMENTS(fdsend),
|
||||
+ 0,
|
||||
&error_abort);
|
||||
|
||||
qio_channel_readv_full(dst,
|
||||
--
|
||||
2.35.3
|
||||
|
@ -1,56 +0,0 @@
|
||||
From cb6dc39a5e5d2d981b4b1e983042b3fbb529d5d1 Mon Sep 17 00:00:00 2001
|
||||
From: Leonardo Bras <leobras@redhat.com>
|
||||
Date: Thu, 4 Aug 2022 04:10:43 -0300
|
||||
Subject: [PATCH 06/11] QIOChannelSocket: Add support for MSG_ZEROCOPY + IPV6
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Leonardo Brás <leobras@redhat.com>
|
||||
RH-MergeRequest: 111: zero-copy-send fixes & improvements
|
||||
RH-Commit: [6/6] 2eb1aba8ebf267a6f67cfba2e489dc88619c7fd4 (LeoBras/centos-qemu-kvm)
|
||||
RH-Bugzilla: 2107466
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
|
||||
For using MSG_ZEROCOPY, there are two steps:
|
||||
1 - io_writev() the packet, which enqueues the packet for sending, and
|
||||
2 - io_flush(), which gets confirmation that all packets got correctly sent
|
||||
|
||||
Currently, if MSG_ZEROCOPY is used to send packets over IPV6, no error will
|
||||
be reported in (1), but it will fail in the first time (2) happens.
|
||||
|
||||
This happens because (2) currently checks for cmsg_level & cmsg_type
|
||||
associated with IPV4 only, before reporting any error.
|
||||
|
||||
Add checks for cmsg_level & cmsg_type associated with IPV6, and thus enable
|
||||
support for MSG_ZEROCOPY + IPV6
|
||||
|
||||
Fixes: 2bc58ffc29 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX")
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
(cherry picked from commit 5258a7e2c0677d16e9e1d06845f60171adf0b290)
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
---
|
||||
io/channel-socket.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/io/channel-socket.c b/io/channel-socket.c
|
||||
index eb7baa2184..efd5f60808 100644
|
||||
--- a/io/channel-socket.c
|
||||
+++ b/io/channel-socket.c
|
||||
@@ -747,8 +747,8 @@ static int qio_channel_socket_flush(QIOChannel *ioc,
|
||||
}
|
||||
|
||||
cm = CMSG_FIRSTHDR(&msg);
|
||||
- if (cm->cmsg_level != SOL_IP &&
|
||||
- cm->cmsg_type != IP_RECVERR) {
|
||||
+ if (cm->cmsg_level != SOL_IP && cm->cmsg_type != IP_RECVERR &&
|
||||
+ cm->cmsg_level != SOL_IPV6 && cm->cmsg_type != IPV6_RECVERR) {
|
||||
error_setg_errno(errp, EPROTOTYPE,
|
||||
"Wrong cmsg in errqueue");
|
||||
return -1;
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,65 +0,0 @@
|
||||
From 678981c6bb7c964e1591f6f8aba49e9602f64852 Mon Sep 17 00:00:00 2001
|
||||
From: Leonardo Bras <leobras@redhat.com>
|
||||
Date: Mon, 11 Jul 2022 18:11:11 -0300
|
||||
Subject: [PATCH 01/11] QIOChannelSocket: Fix zero-copy flush returning code 1
|
||||
when nothing sent
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Leonardo Brás <leobras@redhat.com>
|
||||
RH-MergeRequest: 111: zero-copy-send fixes & improvements
|
||||
RH-Commit: [1/6] cebc887cb61de1572d8ae3232cde45e80c339404 (LeoBras/centos-qemu-kvm)
|
||||
RH-Bugzilla: 2107466
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
|
||||
If flush is called when no buffer was sent with MSG_ZEROCOPY, it currently
|
||||
returns 1. This return code should be used only when Linux fails to use
|
||||
MSG_ZEROCOPY on a lot of sendmsg().
|
||||
|
||||
Fix this by returning early from flush if no sendmsg(...,MSG_ZEROCOPY)
|
||||
was attempted.
|
||||
|
||||
Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX")
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Acked-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Reviewed-by: Juan Quintela <quintela@redhat.com>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Message-Id: <20220711211112.18951-2-leobras@redhat.com>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
(cherry picked from commit 927f93e099c4f9184e60a1bc61624ac2d04d0223)
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
---
|
||||
io/channel-socket.c | 8 +++++++-
|
||||
1 file changed, 7 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/io/channel-socket.c b/io/channel-socket.c
|
||||
index 8ae8b212cf..eb7baa2184 100644
|
||||
--- a/io/channel-socket.c
|
||||
+++ b/io/channel-socket.c
|
||||
@@ -717,12 +717,18 @@ static int qio_channel_socket_flush(QIOChannel *ioc,
|
||||
struct cmsghdr *cm;
|
||||
char control[CMSG_SPACE(sizeof(*serr))];
|
||||
int received;
|
||||
- int ret = 1;
|
||||
+ int ret;
|
||||
+
|
||||
+ if (sioc->zero_copy_queued == sioc->zero_copy_sent) {
|
||||
+ return 0;
|
||||
+ }
|
||||
|
||||
msg.msg_control = control;
|
||||
msg.msg_controllen = sizeof(control);
|
||||
memset(control, 0, sizeof(control));
|
||||
|
||||
+ ret = 1;
|
||||
+
|
||||
while (sioc->zero_copy_sent < sioc->zero_copy_queued) {
|
||||
received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE);
|
||||
if (received < 0) {
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,58 +0,0 @@
|
||||
From e70f01749addd7d0b7aa7fa4fdedb664f98e6b9b Mon Sep 17 00:00:00 2001
|
||||
From: Leonardo Bras <leobras@redhat.com>
|
||||
Date: Mon, 20 Jun 2022 02:39:43 -0300
|
||||
Subject: [PATCH 16/18] QIOChannelSocket: Fix zero-copy send so socket flush
|
||||
works
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Leonardo Brás <leobras@redhat.com>
|
||||
RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd
|
||||
RH-Commit: [10/11] a2dfac987e24026b1a78e90b86234ca206b6401f (LeoBras/centos-qemu-kvm)
|
||||
RH-Bugzilla: 1968509
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
|
||||
Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial
|
||||
part of the flushing mechanism got missing: incrementing zero_copy_queued.
|
||||
|
||||
Without that, the flushing interface becomes a no-op, and there is no
|
||||
guarantee the buffer is really sent.
|
||||
|
||||
This can go as bad as causing a corruption in RAM during migration.
|
||||
|
||||
Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX")
|
||||
Reported-by: 徐闯 <xuchuangxclwt@bytedance.com>
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Reviewed-by: Juan Quintela <quintela@redhat.com>
|
||||
Signed-off-by: Juan Quintela <quintela@redhat.com>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
(cherry picked from commit 4f5a09714c983a3471fd12e3c7f3196e95c650c1)
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
---
|
||||
io/channel-socket.c | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
diff --git a/io/channel-socket.c b/io/channel-socket.c
|
||||
index 7490e5943d..8ae8b212cf 100644
|
||||
--- a/io/channel-socket.c
|
||||
+++ b/io/channel-socket.c
|
||||
@@ -612,6 +612,11 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
|
||||
"Unable to write to socket");
|
||||
return -1;
|
||||
}
|
||||
+
|
||||
+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
|
||||
+ sioc->zero_copy_queued++;
|
||||
+ }
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
#else /* WIN32 */
|
||||
--
|
||||
2.35.3
|
||||
|
@ -1,249 +0,0 @@
|
||||
From 4aeba0365d30dabe2e70dc172683f0878a4a9621 Mon Sep 17 00:00:00 2001
|
||||
From: Leonardo Bras <leobras@redhat.com>
|
||||
Date: Fri, 13 May 2022 03:28:32 -0300
|
||||
Subject: [PATCH 09/18] QIOChannelSocket: Implement io_writev zero copy flag &
|
||||
io_flush for CONFIG_LINUX
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Leonardo Brás <leobras@redhat.com>
|
||||
RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd
|
||||
RH-Commit: [3/11] 9afeac1f5ac7675624660a0281726c09c8321180 (LeoBras/centos-qemu-kvm)
|
||||
RH-Bugzilla: 1968509
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
|
||||
For CONFIG_LINUX, implement the new zero copy flag and the optional callback
|
||||
io_flush on QIOChannelSocket, but enables it only when MSG_ZEROCOPY
|
||||
feature is available in the host kernel, which is checked on
|
||||
qio_channel_socket_connect_sync()
|
||||
|
||||
qio_channel_socket_flush() was implemented by counting how many times
|
||||
sendmsg(...,MSG_ZEROCOPY) was successfully called, and then reading the
|
||||
socket's error queue, in order to find how many of them finished sending.
|
||||
Flush will loop until those counters are the same, or until some error occurs.
|
||||
|
||||
Notes on using writev() with QIO_CHANNEL_WRITE_FLAG_ZERO_COPY:
|
||||
1: Buffer
|
||||
- As MSG_ZEROCOPY tells the kernel to use the same user buffer to avoid copying,
|
||||
some caution is necessary to avoid overwriting any buffer before it's sent.
|
||||
If something like this happen, a newer version of the buffer may be sent instead.
|
||||
- If this is a problem, it's recommended to call qio_channel_flush() before freeing
|
||||
or re-using the buffer.
|
||||
|
||||
2: Locked memory
|
||||
- When using MSG_ZERCOCOPY, the buffer memory will be locked after queued, and
|
||||
unlocked after it's sent.
|
||||
- Depending on the size of each buffer, and how often it's sent, it may require
|
||||
a larger amount of locked memory than usually available to non-root user.
|
||||
- If the required amount of locked memory is not available, writev_zero_copy
|
||||
will return an error, which can abort an operation like migration,
|
||||
- Because of this, when an user code wants to add zero copy as a feature, it
|
||||
requires a mechanism to disable it, so it can still be accessible to less
|
||||
privileged users.
|
||||
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Reviewed-by: Juan Quintela <quintela@redhat.com>
|
||||
Message-Id: <20220513062836.965425-4-leobras@redhat.com>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
(cherry picked from commit 2bc58ffc2926a4efdd03edfb5909861fefc68c3d)
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
---
|
||||
include/io/channel-socket.h | 2 +
|
||||
io/channel-socket.c | 116 ++++++++++++++++++++++++++++++++++--
|
||||
2 files changed, 114 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h
|
||||
index e747e63514..513c428fe4 100644
|
||||
--- a/include/io/channel-socket.h
|
||||
+++ b/include/io/channel-socket.h
|
||||
@@ -47,6 +47,8 @@ struct QIOChannelSocket {
|
||||
socklen_t localAddrLen;
|
||||
struct sockaddr_storage remoteAddr;
|
||||
socklen_t remoteAddrLen;
|
||||
+ ssize_t zero_copy_queued;
|
||||
+ ssize_t zero_copy_sent;
|
||||
};
|
||||
|
||||
|
||||
diff --git a/io/channel-socket.c b/io/channel-socket.c
|
||||
index a1be2197ca..fbd2214d20 100644
|
||||
--- a/io/channel-socket.c
|
||||
+++ b/io/channel-socket.c
|
||||
@@ -26,6 +26,14 @@
|
||||
#include "io/channel-watch.h"
|
||||
#include "trace.h"
|
||||
#include "qapi/clone-visitor.h"
|
||||
+#ifdef CONFIG_LINUX
|
||||
+#include <linux/errqueue.h>
|
||||
+#include <sys/socket.h>
|
||||
+
|
||||
+#if (defined(MSG_ZEROCOPY) && defined(SO_ZEROCOPY))
|
||||
+#define QEMU_MSG_ZEROCOPY
|
||||
+#endif
|
||||
+#endif
|
||||
|
||||
#define SOCKET_MAX_FDS 16
|
||||
|
||||
@@ -55,6 +63,8 @@ qio_channel_socket_new(void)
|
||||
|
||||
sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET));
|
||||
sioc->fd = -1;
|
||||
+ sioc->zero_copy_queued = 0;
|
||||
+ sioc->zero_copy_sent = 0;
|
||||
|
||||
ioc = QIO_CHANNEL(sioc);
|
||||
qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN);
|
||||
@@ -154,6 +164,16 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc,
|
||||
return -1;
|
||||
}
|
||||
|
||||
+#ifdef QEMU_MSG_ZEROCOPY
|
||||
+ int ret, v = 1;
|
||||
+ ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &v, sizeof(v));
|
||||
+ if (ret == 0) {
|
||||
+ /* Zero copy available on host */
|
||||
+ qio_channel_set_feature(QIO_CHANNEL(ioc),
|
||||
+ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY);
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -534,6 +554,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
|
||||
char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)];
|
||||
size_t fdsize = sizeof(int) * nfds;
|
||||
struct cmsghdr *cmsg;
|
||||
+ int sflags = 0;
|
||||
|
||||
memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
|
||||
|
||||
@@ -558,15 +579,31 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
|
||||
memcpy(CMSG_DATA(cmsg), fds, fdsize);
|
||||
}
|
||||
|
||||
+#ifdef QEMU_MSG_ZEROCOPY
|
||||
+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
|
||||
+ sflags = MSG_ZEROCOPY;
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
retry:
|
||||
- ret = sendmsg(sioc->fd, &msg, 0);
|
||||
+ ret = sendmsg(sioc->fd, &msg, sflags);
|
||||
if (ret <= 0) {
|
||||
- if (errno == EAGAIN) {
|
||||
+ switch (errno) {
|
||||
+ case EAGAIN:
|
||||
return QIO_CHANNEL_ERR_BLOCK;
|
||||
- }
|
||||
- if (errno == EINTR) {
|
||||
+ case EINTR:
|
||||
goto retry;
|
||||
+#ifdef QEMU_MSG_ZEROCOPY
|
||||
+ case ENOBUFS:
|
||||
+ if (sflags & MSG_ZEROCOPY) {
|
||||
+ error_setg_errno(errp, errno,
|
||||
+ "Process can't lock enough memory for using MSG_ZEROCOPY");
|
||||
+ return -1;
|
||||
+ }
|
||||
+ break;
|
||||
+#endif
|
||||
}
|
||||
+
|
||||
error_setg_errno(errp, errno,
|
||||
"Unable to write to socket");
|
||||
return -1;
|
||||
@@ -660,6 +697,74 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
|
||||
}
|
||||
#endif /* WIN32 */
|
||||
|
||||
+
|
||||
+#ifdef QEMU_MSG_ZEROCOPY
|
||||
+static int qio_channel_socket_flush(QIOChannel *ioc,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
|
||||
+ struct msghdr msg = {};
|
||||
+ struct sock_extended_err *serr;
|
||||
+ struct cmsghdr *cm;
|
||||
+ char control[CMSG_SPACE(sizeof(*serr))];
|
||||
+ int received;
|
||||
+ int ret = 1;
|
||||
+
|
||||
+ msg.msg_control = control;
|
||||
+ msg.msg_controllen = sizeof(control);
|
||||
+ memset(control, 0, sizeof(control));
|
||||
+
|
||||
+ while (sioc->zero_copy_sent < sioc->zero_copy_queued) {
|
||||
+ received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE);
|
||||
+ if (received < 0) {
|
||||
+ switch (errno) {
|
||||
+ case EAGAIN:
|
||||
+ /* Nothing on errqueue, wait until something is available */
|
||||
+ qio_channel_wait(ioc, G_IO_ERR);
|
||||
+ continue;
|
||||
+ case EINTR:
|
||||
+ continue;
|
||||
+ default:
|
||||
+ error_setg_errno(errp, errno,
|
||||
+ "Unable to read errqueue");
|
||||
+ return -1;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ cm = CMSG_FIRSTHDR(&msg);
|
||||
+ if (cm->cmsg_level != SOL_IP &&
|
||||
+ cm->cmsg_type != IP_RECVERR) {
|
||||
+ error_setg_errno(errp, EPROTOTYPE,
|
||||
+ "Wrong cmsg in errqueue");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ serr = (void *) CMSG_DATA(cm);
|
||||
+ if (serr->ee_errno != SO_EE_ORIGIN_NONE) {
|
||||
+ error_setg_errno(errp, serr->ee_errno,
|
||||
+ "Error on socket");
|
||||
+ return -1;
|
||||
+ }
|
||||
+ if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) {
|
||||
+ error_setg_errno(errp, serr->ee_origin,
|
||||
+ "Error not from zero copy");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ /* No errors, count successfully finished sendmsg()*/
|
||||
+ sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1;
|
||||
+
|
||||
+ /* If any sendmsg() succeeded using zero copy, return 0 at the end */
|
||||
+ if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) {
|
||||
+ ret = 0;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+#endif /* QEMU_MSG_ZEROCOPY */
|
||||
+
|
||||
static int
|
||||
qio_channel_socket_set_blocking(QIOChannel *ioc,
|
||||
bool enabled,
|
||||
@@ -790,6 +895,9 @@ static void qio_channel_socket_class_init(ObjectClass *klass,
|
||||
ioc_klass->io_set_delay = qio_channel_socket_set_delay;
|
||||
ioc_klass->io_create_watch = qio_channel_socket_create_watch;
|
||||
ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler;
|
||||
+#ifdef QEMU_MSG_ZEROCOPY
|
||||
+ ioc_klass->io_flush = qio_channel_socket_flush;
|
||||
+#endif
|
||||
}
|
||||
|
||||
static const TypeInfo qio_channel_socket_info = {
|
||||
--
|
||||
2.35.3
|
||||
|
@ -1,82 +0,0 @@
|
||||
From 60bf942a58db12c821f2a6a49e2e0b04b99bec30 Mon Sep 17 00:00:00 2001
|
||||
From: Leonardo Bras <leobras@redhat.com>
|
||||
Date: Mon, 20 Jun 2022 02:39:42 -0300
|
||||
Subject: [PATCH 15/18] QIOChannelSocket: Introduce assert and reduce ifdefs to
|
||||
improve readability
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Leonardo Brás <leobras@redhat.com>
|
||||
RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd
|
||||
RH-Commit: [9/11] eaa02d68301852ccc98bdacc7387d8d03be1cb05 (LeoBras/centos-qemu-kvm)
|
||||
RH-Bugzilla: 1968509
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
|
||||
During implementation of MSG_ZEROCOPY feature, a lot of #ifdefs were
|
||||
introduced, particularly at qio_channel_socket_writev().
|
||||
|
||||
Rewrite some of those changes so it's easier to read.
|
||||
|
||||
Also, introduce an assert to help detect incorrect zero-copy usage is when
|
||||
it's disabled on build.
|
||||
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Reviewed-by: Juan Quintela <quintela@redhat.com>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Signed-off-by: Juan Quintela <quintela@redhat.com>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
dgilbert: Fixed up thinko'd g_assert_unreachable->g_assert_not_reached
|
||||
(cherry picked from commit 803ca43e4c7fcf32f9f68c118301ccd0c83ece3f)
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
---
|
||||
io/channel-socket.c | 14 +++++++++-----
|
||||
1 file changed, 9 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/io/channel-socket.c b/io/channel-socket.c
|
||||
index fbd2214d20..7490e5943d 100644
|
||||
--- a/io/channel-socket.c
|
||||
+++ b/io/channel-socket.c
|
||||
@@ -579,11 +579,17 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
|
||||
memcpy(CMSG_DATA(cmsg), fds, fdsize);
|
||||
}
|
||||
|
||||
-#ifdef QEMU_MSG_ZEROCOPY
|
||||
if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
|
||||
+#ifdef QEMU_MSG_ZEROCOPY
|
||||
sflags = MSG_ZEROCOPY;
|
||||
- }
|
||||
+#else
|
||||
+ /*
|
||||
+ * We expect QIOChannel class entry point to have
|
||||
+ * blocked this code path already
|
||||
+ */
|
||||
+ g_assert_not_reached();
|
||||
#endif
|
||||
+ }
|
||||
|
||||
retry:
|
||||
ret = sendmsg(sioc->fd, &msg, sflags);
|
||||
@@ -593,15 +599,13 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
|
||||
return QIO_CHANNEL_ERR_BLOCK;
|
||||
case EINTR:
|
||||
goto retry;
|
||||
-#ifdef QEMU_MSG_ZEROCOPY
|
||||
case ENOBUFS:
|
||||
- if (sflags & MSG_ZEROCOPY) {
|
||||
+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
|
||||
error_setg_errno(errp, errno,
|
||||
"Process can't lock enough memory for using MSG_ZEROCOPY");
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
-#endif
|
||||
}
|
||||
|
||||
error_setg_errno(errp, errno,
|
||||
--
|
||||
2.35.3
|
||||
|
@ -1,237 +0,0 @@
|
||||
From 055edf068196622a3e1868c9e4c991d410272a6d Mon Sep 17 00:00:00 2001
|
||||
From: Andrew Jones <drjones@redhat.com>
|
||||
Date: Wed, 15 Jun 2022 15:28:27 +0200
|
||||
Subject: [PATCH 03/18] RHEL-only: AArch64: Drop unsupported CPU types
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Daniel P. Berrangé <berrange@redhat.com>
|
||||
RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models
|
||||
RH-Commit: [3/6] 21f54c86dc87e5e75a64459b5a385686bc09640c (berrange/centos-src-qemu)
|
||||
RH-Bugzilla: 2060839
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2066824
|
||||
Upstream Status: RHEL only
|
||||
|
||||
We only need to support AArch64 cpu types and we only need three
|
||||
types:
|
||||
1) A base type to use with TCG, i.e. a cpu type with only base
|
||||
features. 'cortex-a57' serves this role and is currently used
|
||||
by libguestfs.
|
||||
2) The 'max' type, which is for both KVM and TCG and is good for
|
||||
tests that just specify 'max' but run under both. 'max' with
|
||||
TCG also provides the VM with all the CPU features TCG
|
||||
supports, which is good for VMs that need features not
|
||||
provided by the basic cortex-a57.
|
||||
3) The host type which is used with KVM.
|
||||
|
||||
Signed-off-by: Andrew Jones <drjones@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 4 ++++
|
||||
target/arm/cpu64.c | 6 ++++++
|
||||
target/arm/cpu_tcg.c | 12 ++----------
|
||||
tests/qtest/arm-cpu-features.c | 6 ++++++
|
||||
4 files changed, 18 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 95d012d6eb..74119976d3 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -239,12 +239,16 @@ static const int a15irqmap[] = {
|
||||
};
|
||||
|
||||
static const char *valid_cpus[] = {
|
||||
+#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
ARM_CPU_TYPE_NAME("cortex-a7"),
|
||||
ARM_CPU_TYPE_NAME("cortex-a15"),
|
||||
ARM_CPU_TYPE_NAME("cortex-a53"),
|
||||
+#endif /* disabled for RHEL */
|
||||
ARM_CPU_TYPE_NAME("cortex-a57"),
|
||||
+#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
ARM_CPU_TYPE_NAME("cortex-a72"),
|
||||
ARM_CPU_TYPE_NAME("a64fx"),
|
||||
+#endif /* disabled for RHEL */
|
||||
ARM_CPU_TYPE_NAME("host"),
|
||||
ARM_CPU_TYPE_NAME("max"),
|
||||
};
|
||||
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
|
||||
index eb44c05822..e80b831073 100644
|
||||
--- a/target/arm/cpu64.c
|
||||
+++ b/target/arm/cpu64.c
|
||||
@@ -146,6 +146,7 @@ static void aarch64_a57_initfn(Object *obj)
|
||||
define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo);
|
||||
}
|
||||
|
||||
+#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
static void aarch64_a53_initfn(Object *obj)
|
||||
{
|
||||
ARMCPU *cpu = ARM_CPU(obj);
|
||||
@@ -249,6 +250,7 @@ static void aarch64_a72_initfn(Object *obj)
|
||||
cpu->gic_vprebits = 5;
|
||||
define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo);
|
||||
}
|
||||
+#endif /* disabled for RHEL */
|
||||
|
||||
void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
|
||||
{
|
||||
@@ -923,6 +925,7 @@ static void aarch64_max_initfn(Object *obj)
|
||||
qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property);
|
||||
}
|
||||
|
||||
+#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
static void aarch64_a64fx_initfn(Object *obj)
|
||||
{
|
||||
ARMCPU *cpu = ARM_CPU(obj);
|
||||
@@ -969,12 +972,15 @@ static void aarch64_a64fx_initfn(Object *obj)
|
||||
|
||||
/* TODO: Add A64FX specific HPC extension registers */
|
||||
}
|
||||
+#endif /* disabled for RHEL */
|
||||
|
||||
static const ARMCPUInfo aarch64_cpus[] = {
|
||||
{ .name = "cortex-a57", .initfn = aarch64_a57_initfn },
|
||||
+#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
{ .name = "cortex-a53", .initfn = aarch64_a53_initfn },
|
||||
{ .name = "cortex-a72", .initfn = aarch64_a72_initfn },
|
||||
{ .name = "a64fx", .initfn = aarch64_a64fx_initfn },
|
||||
+#endif /* disabled for RHEL */
|
||||
{ .name = "max", .initfn = aarch64_max_initfn },
|
||||
#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
|
||||
{ .name = "host", .initfn = aarch64_host_initfn },
|
||||
diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
|
||||
index 3826fa5122..74727fc92c 100644
|
||||
--- a/target/arm/cpu_tcg.c
|
||||
+++ b/target/arm/cpu_tcg.c
|
||||
@@ -19,10 +19,10 @@
|
||||
#include "hw/boards.h"
|
||||
#endif
|
||||
|
||||
+#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
/* CPU models. These are not needed for the AArch64 linux-user build. */
|
||||
#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
|
||||
|
||||
-#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
|
||||
static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
|
||||
{
|
||||
@@ -376,7 +376,6 @@ static void cortex_a9_initfn(Object *obj)
|
||||
cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */
|
||||
define_arm_cp_regs(cpu, cortexa9_cp_reginfo);
|
||||
}
|
||||
-#endif /* disabled for RHEL */
|
||||
|
||||
#ifndef CONFIG_USER_ONLY
|
||||
static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
|
||||
@@ -402,7 +401,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
|
||||
REGINFO_SENTINEL
|
||||
};
|
||||
|
||||
-#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
static void cortex_a7_initfn(Object *obj)
|
||||
{
|
||||
ARMCPU *cpu = ARM_CPU(obj);
|
||||
@@ -448,7 +446,6 @@ static void cortex_a7_initfn(Object *obj)
|
||||
cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */
|
||||
define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */
|
||||
}
|
||||
-#endif /* disabled for RHEL */
|
||||
|
||||
static void cortex_a15_initfn(Object *obj)
|
||||
{
|
||||
@@ -492,7 +489,6 @@ static void cortex_a15_initfn(Object *obj)
|
||||
define_arm_cp_regs(cpu, cortexa15_cp_reginfo);
|
||||
}
|
||||
|
||||
-#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
static void cortex_m0_initfn(Object *obj)
|
||||
{
|
||||
ARMCPU *cpu = ARM_CPU(obj);
|
||||
@@ -933,7 +929,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data)
|
||||
|
||||
cc->gdb_core_xml_file = "arm-m-profile.xml";
|
||||
}
|
||||
-#endif /* disabled for RHEL */
|
||||
|
||||
#ifndef TARGET_AARCH64
|
||||
/*
|
||||
@@ -1013,7 +1008,6 @@ static void arm_max_initfn(Object *obj)
|
||||
#endif /* !TARGET_AARCH64 */
|
||||
|
||||
static const ARMCPUInfo arm_tcg_cpus[] = {
|
||||
-#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
{ .name = "arm926", .initfn = arm926_initfn },
|
||||
{ .name = "arm946", .initfn = arm946_initfn },
|
||||
{ .name = "arm1026", .initfn = arm1026_initfn },
|
||||
@@ -1029,9 +1023,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
|
||||
{ .name = "cortex-a7", .initfn = cortex_a7_initfn },
|
||||
{ .name = "cortex-a8", .initfn = cortex_a8_initfn },
|
||||
{ .name = "cortex-a9", .initfn = cortex_a9_initfn },
|
||||
-#endif /* disabled for RHEL */
|
||||
{ .name = "cortex-a15", .initfn = cortex_a15_initfn },
|
||||
-#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
{ .name = "cortex-m0", .initfn = cortex_m0_initfn,
|
||||
.class_init = arm_v7m_class_init },
|
||||
{ .name = "cortex-m3", .initfn = cortex_m3_initfn,
|
||||
@@ -1062,7 +1054,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
|
||||
{ .name = "pxa270-b1", .initfn = pxa270b1_initfn },
|
||||
{ .name = "pxa270-c0", .initfn = pxa270c0_initfn },
|
||||
{ .name = "pxa270-c5", .initfn = pxa270c5_initfn },
|
||||
-#endif /* disabled for RHEL */
|
||||
#ifndef TARGET_AARCH64
|
||||
{ .name = "max", .initfn = arm_max_initfn },
|
||||
#endif
|
||||
@@ -1090,3 +1081,4 @@ static void arm_tcg_cpu_register_types(void)
|
||||
type_init(arm_tcg_cpu_register_types)
|
||||
|
||||
#endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */
|
||||
+#endif /* disabled for RHEL */
|
||||
diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c
|
||||
index f76652143a..fe2a0a070d 100644
|
||||
--- a/tests/qtest/arm-cpu-features.c
|
||||
+++ b/tests/qtest/arm-cpu-features.c
|
||||
@@ -440,8 +440,10 @@ static void test_query_cpu_model_expansion(const void *data)
|
||||
assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL);
|
||||
|
||||
/* Test expected feature presence/absence for some cpu types */
|
||||
+#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
assert_has_feature_enabled(qts, "cortex-a15", "pmu");
|
||||
assert_has_not_feature(qts, "cortex-a15", "aarch64");
|
||||
+#endif /* disabled for RHEL */
|
||||
|
||||
/* Enabling and disabling pmu should always work. */
|
||||
assert_has_feature_enabled(qts, "max", "pmu");
|
||||
@@ -458,6 +460,7 @@ static void test_query_cpu_model_expansion(const void *data)
|
||||
assert_has_feature_enabled(qts, "cortex-a57", "pmu");
|
||||
assert_has_feature_enabled(qts, "cortex-a57", "aarch64");
|
||||
|
||||
+#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
assert_has_feature_enabled(qts, "a64fx", "pmu");
|
||||
assert_has_feature_enabled(qts, "a64fx", "aarch64");
|
||||
/*
|
||||
@@ -470,6 +473,7 @@ static void test_query_cpu_model_expansion(const void *data)
|
||||
"{ 'sve384': true }");
|
||||
assert_error(qts, "a64fx", "cannot enable sve640",
|
||||
"{ 'sve640': true }");
|
||||
+#endif /* disabled for RHEL */
|
||||
|
||||
sve_tests_default(qts, "max");
|
||||
pauth_tests_default(qts, "max");
|
||||
@@ -505,9 +509,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data)
|
||||
QDict *resp;
|
||||
char *error;
|
||||
|
||||
+#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
assert_error(qts, "cortex-a15",
|
||||
"We cannot guarantee the CPU type 'cortex-a15' works "
|
||||
"with KVM on this host", NULL);
|
||||
+#endif /* disabled for RHEL */
|
||||
|
||||
assert_has_feature_enabled(qts, "host", "aarch64");
|
||||
|
||||
--
|
||||
2.35.3
|
||||
|
@ -1,95 +0,0 @@
|
||||
From d710394f68eb0b6116dd8ac76f619c192e0d5972 Mon Sep 17 00:00:00 2001
|
||||
From: Andrew Jones <drjones@redhat.com>
|
||||
Date: Wed, 15 Jun 2022 15:28:27 +0200
|
||||
Subject: [PATCH 02/18] RHEL-only: tests/avocado: Switch aarch64 tests from a53
|
||||
to a57
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Daniel P. Berrangé <berrange@redhat.com>
|
||||
RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models
|
||||
RH-Commit: [2/6] e85ef69b42c411a6997e4da10ba05176368769b3 (berrange/centos-src-qemu)
|
||||
RH-Bugzilla: 2060839
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2066824
|
||||
Upstream Status: RHEL only
|
||||
|
||||
We plan to remove the cortex-a53 from the supported cpu types. Switch
|
||||
all avocado tests that use it to the cortex-a57, which will work the
|
||||
same and we intend to keep. We don't want to try and upstream this
|
||||
change since the better upstream change would be to switch from the
|
||||
a53 to 'max', but the upstream tests also need to use later guest
|
||||
kernels to use 'max' (see qemu upstream commit 0942820408dc
|
||||
("hw/arm/virt: Disable LPA2 for -machine virt-6.2")
|
||||
|
||||
Signed-off-by: Andrew Jones <drjones@redhat.com>
|
||||
---
|
||||
tests/avocado/replay_kernel.py | 2 +-
|
||||
tests/avocado/reverse_debugging.py | 2 +-
|
||||
tests/avocado/tcg_plugins.py | 6 +++---
|
||||
3 files changed, 5 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py
|
||||
index 0b2b0dc692..3a7b5f0748 100644
|
||||
--- a/tests/avocado/replay_kernel.py
|
||||
+++ b/tests/avocado/replay_kernel.py
|
||||
@@ -147,7 +147,7 @@ def test_aarch64_virt(self):
|
||||
"""
|
||||
:avocado: tags=arch:aarch64
|
||||
:avocado: tags=machine:virt
|
||||
- :avocado: tags=cpu:cortex-a53
|
||||
+ :avocado: tags=cpu:cortex-a57
|
||||
"""
|
||||
kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora'
|
||||
'/linux/releases/29/Everything/aarch64/os/images/pxeboot'
|
||||
diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py
|
||||
index d2921e70c3..66d185ed42 100644
|
||||
--- a/tests/avocado/reverse_debugging.py
|
||||
+++ b/tests/avocado/reverse_debugging.py
|
||||
@@ -198,7 +198,7 @@ def test_aarch64_virt(self):
|
||||
"""
|
||||
:avocado: tags=arch:aarch64
|
||||
:avocado: tags=machine:virt
|
||||
- :avocado: tags=cpu:cortex-a53
|
||||
+ :avocado: tags=cpu:cortex-a57
|
||||
"""
|
||||
kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora'
|
||||
'/linux/releases/29/Everything/aarch64/os/images/pxeboot'
|
||||
diff --git a/tests/avocado/tcg_plugins.py b/tests/avocado/tcg_plugins.py
|
||||
index 642d2e49e3..93b3afd823 100644
|
||||
--- a/tests/avocado/tcg_plugins.py
|
||||
+++ b/tests/avocado/tcg_plugins.py
|
||||
@@ -68,7 +68,7 @@ def test_aarch64_virt_insn(self):
|
||||
:avocado: tags=accel:tcg
|
||||
:avocado: tags=arch:aarch64
|
||||
:avocado: tags=machine:virt
|
||||
- :avocado: tags=cpu:cortex-a53
|
||||
+ :avocado: tags=cpu:cortex-a57
|
||||
"""
|
||||
kernel_path = self._grab_aarch64_kernel()
|
||||
kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
|
||||
@@ -94,7 +94,7 @@ def test_aarch64_virt_insn_icount(self):
|
||||
:avocado: tags=accel:tcg
|
||||
:avocado: tags=arch:aarch64
|
||||
:avocado: tags=machine:virt
|
||||
- :avocado: tags=cpu:cortex-a53
|
||||
+ :avocado: tags=cpu:cortex-a57
|
||||
"""
|
||||
kernel_path = self._grab_aarch64_kernel()
|
||||
kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
|
||||
@@ -120,7 +120,7 @@ def test_aarch64_virt_mem_icount(self):
|
||||
:avocado: tags=accel:tcg
|
||||
:avocado: tags=arch:aarch64
|
||||
:avocado: tags=machine:virt
|
||||
- :avocado: tags=cpu:cortex-a53
|
||||
+ :avocado: tags=cpu:cortex-a57
|
||||
"""
|
||||
kernel_path = self._grab_aarch64_kernel()
|
||||
kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
|
||||
--
|
||||
2.35.3
|
||||
|
@ -1,58 +0,0 @@
|
||||
From 5ab8613582fd56b847fe75750acb5b7255900b35 Mon Sep 17 00:00:00 2001
|
||||
From: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
Date: Thu, 9 Jun 2022 11:55:15 +0200
|
||||
Subject: [PATCH 15/16] Revert "globally limit the maximum number of CPUs"
|
||||
|
||||
RH-Author: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-MergeRequest: 99: Revert "globally limit the maximum number of CPUs"
|
||||
RH-Commit: [1/1] 13100d4a2209b2190a3654c1f9cf4ebade1e8d24 (vkuznets/qemu-kvm-c9s)
|
||||
RH-Bugzilla: 2094270
|
||||
RH-Acked-by: Andrew Jones <drjones@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094270
|
||||
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45871149
|
||||
Upstream Status: RHEL-only
|
||||
Tested: with upstream kernel
|
||||
|
||||
Downstream QEMU carries a patch that sets the hard limit of possible vCPUs
|
||||
to the value that the KVM code of the kernel recommends as soft limit.
|
||||
Upstream KVM code has been changed recently to not use an arbitrary soft
|
||||
limit anymore, but to cap the value on the amount of available physical
|
||||
CPUs of the host. This defeats the purpose of the downstream change in
|
||||
QEMU completely. Drop the downstream-only patch to allow CPU overcommit.
|
||||
|
||||
This reverts commit 6669f6fa677d43144f39d6ad59725b7ba622f1c2.
|
||||
|
||||
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
---
|
||||
accel/kvm/kvm-all.c | 12 ------------
|
||||
1 file changed, 12 deletions(-)
|
||||
|
||||
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||
index fdf0e4d429..5f1377ca04 100644
|
||||
--- a/accel/kvm/kvm-all.c
|
||||
+++ b/accel/kvm/kvm-all.c
|
||||
@@ -2430,18 +2430,6 @@ static int kvm_init(MachineState *ms)
|
||||
soft_vcpus_limit = kvm_recommended_vcpus(s);
|
||||
hard_vcpus_limit = kvm_max_vcpus(s);
|
||||
|
||||
-#ifdef HOST_PPC64
|
||||
- /*
|
||||
- * On POWER, the kernel advertises a soft limit based on the
|
||||
- * number of CPU threads on the host. We want to allow exceeding
|
||||
- * this for testing purposes, so we don't want to set hard limit
|
||||
- * to soft limit as on x86.
|
||||
- */
|
||||
-#else
|
||||
- /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */
|
||||
- hard_vcpus_limit = soft_vcpus_limit;
|
||||
-#endif
|
||||
-
|
||||
while (nc->name) {
|
||||
if (nc->num > soft_vcpus_limit) {
|
||||
warn_report("Number of %s cpus requested (%d) exceeds "
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,134 +0,0 @@
|
||||
From 5ea59b17866add54e5ae8c76d3cb472c67e1fa91 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Tue, 2 Aug 2022 08:19:49 +0200
|
||||
Subject: [PATCH 32/32] Revert "migration: Simplify unqueue_page()"
|
||||
|
||||
RH-Author: Thomas Huth <thuth@redhat.com>
|
||||
RH-MergeRequest: 112: Fix postcopy migration on s390x
|
||||
RH-Commit: [2/2] 3913c9ed3f27f4b66245913da29d0c46db0c6567 (thuth/qemu-kvm-cs9)
|
||||
RH-Bugzilla: 2099934
|
||||
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
|
||||
This reverts commit cfd66f30fb0f735df06ff4220e5000290a43dad3.
|
||||
|
||||
The simplification of unqueue_page() introduced a bug that sometimes
|
||||
breaks migration on s390x hosts.
|
||||
|
||||
The problem is not fully understood yet, but since we are already in
|
||||
the freeze for QEMU 7.1 and we need something working there, let's
|
||||
revert this patch for the upcoming release. The optimization can be
|
||||
redone later again in a proper way if necessary.
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2099934
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
Message-Id: <20220802061949.331576-1-thuth@redhat.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
(cherry picked from commit 777f53c75983dd10756f5dbfc8af50fe11da81c1)
|
||||
Conflicts:
|
||||
migration/trace-events
|
||||
(trivial contextual conflict)
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
migration/ram.c | 37 ++++++++++++++++++++++++++-----------
|
||||
migration/trace-events | 3 ++-
|
||||
2 files changed, 28 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/migration/ram.c b/migration/ram.c
|
||||
index fb6db54642..ee40e4a718 100644
|
||||
--- a/migration/ram.c
|
||||
+++ b/migration/ram.c
|
||||
@@ -1548,7 +1548,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
|
||||
{
|
||||
struct RAMSrcPageRequest *entry;
|
||||
RAMBlock *block = NULL;
|
||||
- size_t page_size;
|
||||
|
||||
if (!postcopy_has_request(rs)) {
|
||||
return NULL;
|
||||
@@ -1565,13 +1564,10 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
|
||||
entry = QSIMPLEQ_FIRST(&rs->src_page_requests);
|
||||
block = entry->rb;
|
||||
*offset = entry->offset;
|
||||
- page_size = qemu_ram_pagesize(block);
|
||||
- /* Each page request should only be multiple page size of the ramblock */
|
||||
- assert((entry->len % page_size) == 0);
|
||||
|
||||
- if (entry->len > page_size) {
|
||||
- entry->len -= page_size;
|
||||
- entry->offset += page_size;
|
||||
+ if (entry->len > TARGET_PAGE_SIZE) {
|
||||
+ entry->len -= TARGET_PAGE_SIZE;
|
||||
+ entry->offset += TARGET_PAGE_SIZE;
|
||||
} else {
|
||||
memory_region_unref(block->mr);
|
||||
QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
|
||||
@@ -1579,9 +1575,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
|
||||
migration_consume_urgent_request();
|
||||
}
|
||||
|
||||
- trace_unqueue_page(block->idstr, *offset,
|
||||
- test_bit((*offset >> TARGET_PAGE_BITS), block->bmap));
|
||||
-
|
||||
return block;
|
||||
}
|
||||
|
||||
@@ -1956,8 +1949,30 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
|
||||
{
|
||||
RAMBlock *block;
|
||||
ram_addr_t offset;
|
||||
+ bool dirty;
|
||||
+
|
||||
+ do {
|
||||
+ block = unqueue_page(rs, &offset);
|
||||
+ /*
|
||||
+ * We're sending this page, and since it's postcopy nothing else
|
||||
+ * will dirty it, and we must make sure it doesn't get sent again
|
||||
+ * even if this queue request was received after the background
|
||||
+ * search already sent it.
|
||||
+ */
|
||||
+ if (block) {
|
||||
+ unsigned long page;
|
||||
+
|
||||
+ page = offset >> TARGET_PAGE_BITS;
|
||||
+ dirty = test_bit(page, block->bmap);
|
||||
+ if (!dirty) {
|
||||
+ trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
|
||||
+ page);
|
||||
+ } else {
|
||||
+ trace_get_queued_page(block->idstr, (uint64_t)offset, page);
|
||||
+ }
|
||||
+ }
|
||||
|
||||
- block = unqueue_page(rs, &offset);
|
||||
+ } while (block && !dirty);
|
||||
|
||||
if (!block) {
|
||||
/*
|
||||
diff --git a/migration/trace-events b/migration/trace-events
|
||||
index 1aec580e92..09d61ed1f4 100644
|
||||
--- a/migration/trace-events
|
||||
+++ b/migration/trace-events
|
||||
@@ -85,6 +85,8 @@ put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)"
|
||||
qemu_file_fclose(void) ""
|
||||
|
||||
# ram.c
|
||||
+get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
|
||||
+get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
|
||||
migration_bitmap_sync_start(void) ""
|
||||
migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64
|
||||
migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx"
|
||||
@@ -110,7 +112,6 @@ ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRI
|
||||
ram_load_complete(int ret, uint64_t seq_iter) "exit_code %d seq iteration %" PRIu64
|
||||
ram_write_tracking_ramblock_start(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu"
|
||||
ram_write_tracking_ramblock_stop(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu"
|
||||
-unqueue_page(char *block, uint64_t offset, bool dirty) "ramblock '%s' offset 0x%"PRIx64" dirty %d"
|
||||
|
||||
# multifd.c
|
||||
multifd_new_send_channel_async(uint8_t id) "channel %u"
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,140 @@
|
||||
From 0c19fb7c4a22a30830152b224b2e66963f829a7a Mon Sep 17 00:00:00 2001
|
||||
From: Greg Kurz <groug@kaod.org>
|
||||
Date: Thu, 19 Jan 2023 18:24:24 +0100
|
||||
Subject: [PATCH 19/20] Revert "vhost-user: Introduce nested event loop in
|
||||
vhost_user_read()"
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Laurent Vivier <lvivier@redhat.com>
|
||||
RH-MergeRequest: 146: Fix vhost-user with dpdk
|
||||
RH-Bugzilla: 2155173
|
||||
RH-Acked-by: Cindy Lu <lulu@redhat.com>
|
||||
RH-Acked-by: Greg Kurz (RH) <gkurz@redhat.com>
|
||||
RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
|
||||
RH-Commit: [2/2] 9b67041f92f29f70b7ccb41d8087801e4e4e38af (lvivier/qemu-kvm-centos)
|
||||
|
||||
This reverts commit a7f523c7d114d445c5d83aecdba3efc038e5a692.
|
||||
|
||||
The nested event loop is broken by design. It's only user was removed.
|
||||
Drop the code as well so that nobody ever tries to use it again.
|
||||
|
||||
I had to fix a couple of trivial conflicts around return values because
|
||||
of 025faa872bcf ("vhost-user: stick to -errno error return convention").
|
||||
|
||||
Signed-off-by: Greg Kurz <groug@kaod.org>
|
||||
Message-Id: <20230119172424.478268-3-groug@kaod.org>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Acked-by: Maxime Coquelin <maxime.coquelin@redhat.com>
|
||||
(cherry picked from commit 4382138f642f69fdbc79ebf4e93d84be8061191f)
|
||||
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
|
||||
---
|
||||
hw/virtio/vhost-user.c | 65 ++++--------------------------------------
|
||||
1 file changed, 5 insertions(+), 60 deletions(-)
|
||||
|
||||
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
|
||||
index 0ac00eb901..7cb49c50f9 100644
|
||||
--- a/hw/virtio/vhost-user.c
|
||||
+++ b/hw/virtio/vhost-user.c
|
||||
@@ -305,19 +305,8 @@ static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-struct vhost_user_read_cb_data {
|
||||
- struct vhost_dev *dev;
|
||||
- VhostUserMsg *msg;
|
||||
- GMainLoop *loop;
|
||||
- int ret;
|
||||
-};
|
||||
-
|
||||
-static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
|
||||
- gpointer opaque)
|
||||
+static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
|
||||
{
|
||||
- struct vhost_user_read_cb_data *data = opaque;
|
||||
- struct vhost_dev *dev = data->dev;
|
||||
- VhostUserMsg *msg = data->msg;
|
||||
struct vhost_user *u = dev->opaque;
|
||||
CharBackend *chr = u->user->chr;
|
||||
uint8_t *p = (uint8_t *) msg;
|
||||
@@ -325,8 +314,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
|
||||
|
||||
r = vhost_user_read_header(dev, msg);
|
||||
if (r < 0) {
|
||||
- data->ret = r;
|
||||
- goto end;
|
||||
+ return r;
|
||||
}
|
||||
|
||||
/* validate message size is sane */
|
||||
@@ -334,8 +322,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
|
||||
error_report("Failed to read msg header."
|
||||
" Size %d exceeds the maximum %zu.", msg->hdr.size,
|
||||
VHOST_USER_PAYLOAD_SIZE);
|
||||
- data->ret = -EPROTO;
|
||||
- goto end;
|
||||
+ return -EPROTO;
|
||||
}
|
||||
|
||||
if (msg->hdr.size) {
|
||||
@@ -346,53 +333,11 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
|
||||
int saved_errno = errno;
|
||||
error_report("Failed to read msg payload."
|
||||
" Read %d instead of %d.", r, msg->hdr.size);
|
||||
- data->ret = r < 0 ? -saved_errno : -EIO;
|
||||
- goto end;
|
||||
+ return r < 0 ? -saved_errno : -EIO;
|
||||
}
|
||||
}
|
||||
|
||||
-end:
|
||||
- g_main_loop_quit(data->loop);
|
||||
- return G_SOURCE_REMOVE;
|
||||
-}
|
||||
-
|
||||
-static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
|
||||
-{
|
||||
- struct vhost_user *u = dev->opaque;
|
||||
- CharBackend *chr = u->user->chr;
|
||||
- GMainContext *prev_ctxt = chr->chr->gcontext;
|
||||
- GMainContext *ctxt = g_main_context_new();
|
||||
- GMainLoop *loop = g_main_loop_new(ctxt, FALSE);
|
||||
- struct vhost_user_read_cb_data data = {
|
||||
- .dev = dev,
|
||||
- .loop = loop,
|
||||
- .msg = msg,
|
||||
- .ret = 0
|
||||
- };
|
||||
-
|
||||
- /*
|
||||
- * We want to be able to monitor the slave channel fd while waiting
|
||||
- * for chr I/O. This requires an event loop, but we can't nest the
|
||||
- * one to which chr is currently attached : its fd handlers might not
|
||||
- * be prepared for re-entrancy. So we create a new one and switch chr
|
||||
- * to use it.
|
||||
- */
|
||||
- qemu_chr_be_update_read_handlers(chr->chr, ctxt);
|
||||
- qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data);
|
||||
-
|
||||
- g_main_loop_run(loop);
|
||||
-
|
||||
- /*
|
||||
- * Restore the previous event loop context. This also destroys/recreates
|
||||
- * event sources : this guarantees that all pending events in the original
|
||||
- * context that have been processed by the nested loop are purged.
|
||||
- */
|
||||
- qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt);
|
||||
-
|
||||
- g_main_loop_unref(loop);
|
||||
- g_main_context_unref(ctxt);
|
||||
-
|
||||
- return data.ret;
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
static int process_message_reply(struct vhost_dev *dev,
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,143 @@
|
||||
From 9fb47ad317ad8cdda9960190d499ad6c3a9817f0 Mon Sep 17 00:00:00 2001
|
||||
From: Greg Kurz <groug@kaod.org>
|
||||
Date: Thu, 19 Jan 2023 18:24:23 +0100
|
||||
Subject: [PATCH 18/20] Revert "vhost-user: Monitor slave channel in
|
||||
vhost_user_read()"
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Laurent Vivier <lvivier@redhat.com>
|
||||
RH-MergeRequest: 146: Fix vhost-user with dpdk
|
||||
RH-Bugzilla: 2155173
|
||||
RH-Acked-by: Cindy Lu <lulu@redhat.com>
|
||||
RH-Acked-by: Greg Kurz (RH) <gkurz@redhat.com>
|
||||
RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
|
||||
RH-Commit: [1/2] c583a7f121ca9c93c9a2ad17bf0ccf5c1241dc99 (lvivier/qemu-kvm-centos)
|
||||
|
||||
This reverts commit db8a3772e300c1a656331a92da0785d81667dc81.
|
||||
|
||||
Motivation : this is breaking vhost-user with DPDK as reported in [0].
|
||||
|
||||
Received unexpected msg type. Expected 22 received 40
|
||||
Fail to update device iotlb
|
||||
Received unexpected msg type. Expected 40 received 22
|
||||
Received unexpected msg type. Expected 22 received 11
|
||||
Fail to update device iotlb
|
||||
Received unexpected msg type. Expected 11 received 22
|
||||
vhost VQ 1 ring restore failed: -71: Protocol error (71)
|
||||
Received unexpected msg type. Expected 22 received 11
|
||||
Fail to update device iotlb
|
||||
Received unexpected msg type. Expected 11 received 22
|
||||
vhost VQ 0 ring restore failed: -71: Protocol error (71)
|
||||
unable to start vhost net: 71: falling back on userspace virtio
|
||||
|
||||
The failing sequence that leads to the first error is :
|
||||
- QEMU sends a VHOST_USER_GET_STATUS (40) request to DPDK on the master
|
||||
socket
|
||||
- QEMU starts a nested event loop in order to wait for the
|
||||
VHOST_USER_GET_STATUS response and to be able to process messages from
|
||||
the slave channel
|
||||
- DPDK sends a couple of legitimate IOTLB miss messages on the slave
|
||||
channel
|
||||
- QEMU processes each IOTLB request and sends VHOST_USER_IOTLB_MSG (22)
|
||||
updates on the master socket
|
||||
- QEMU assumes to receive a response for the latest VHOST_USER_IOTLB_MSG
|
||||
but it gets the response for the VHOST_USER_GET_STATUS instead
|
||||
|
||||
The subsequent errors have the same root cause : the nested event loop
|
||||
breaks the order by design. It lures QEMU to expect responses to the
|
||||
latest message sent on the master socket to arrive first.
|
||||
|
||||
Since this was only needed for DAX enablement which is still not merged
|
||||
upstream, just drop the code for now. A working solution will have to
|
||||
be merged later on. Likely protect the master socket with a mutex
|
||||
and service the slave channel with a separate thread, as discussed with
|
||||
Maxime in the mail thread below.
|
||||
|
||||
[0] https://lore.kernel.org/qemu-devel/43145ede-89dc-280e-b953-6a2b436de395@redhat.com/
|
||||
|
||||
Reported-by: Yanghang Liu <yanghliu@redhat.com>
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2155173
|
||||
Signed-off-by: Greg Kurz <groug@kaod.org>
|
||||
Message-Id: <20230119172424.478268-2-groug@kaod.org>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Acked-by: Maxime Coquelin <maxime.coquelin@redhat.com>
|
||||
(cherry picked from commit f340a59d5a852d75ae34555723694c7e8eafbd0c)
|
||||
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
|
||||
---
|
||||
hw/virtio/vhost-user.c | 35 +++--------------------------------
|
||||
1 file changed, 3 insertions(+), 32 deletions(-)
|
||||
|
||||
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
|
||||
index 8f635844af..0ac00eb901 100644
|
||||
--- a/hw/virtio/vhost-user.c
|
||||
+++ b/hw/virtio/vhost-user.c
|
||||
@@ -356,35 +356,6 @@ end:
|
||||
return G_SOURCE_REMOVE;
|
||||
}
|
||||
|
||||
-static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
|
||||
- gpointer opaque);
|
||||
-
|
||||
-/*
|
||||
- * This updates the read handler to use a new event loop context.
|
||||
- * Event sources are removed from the previous context : this ensures
|
||||
- * that events detected in the previous context are purged. They will
|
||||
- * be re-detected and processed in the new context.
|
||||
- */
|
||||
-static void slave_update_read_handler(struct vhost_dev *dev,
|
||||
- GMainContext *ctxt)
|
||||
-{
|
||||
- struct vhost_user *u = dev->opaque;
|
||||
-
|
||||
- if (!u->slave_ioc) {
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- if (u->slave_src) {
|
||||
- g_source_destroy(u->slave_src);
|
||||
- g_source_unref(u->slave_src);
|
||||
- }
|
||||
-
|
||||
- u->slave_src = qio_channel_add_watch_source(u->slave_ioc,
|
||||
- G_IO_IN | G_IO_HUP,
|
||||
- slave_read, dev, NULL,
|
||||
- ctxt);
|
||||
-}
|
||||
-
|
||||
static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
|
||||
{
|
||||
struct vhost_user *u = dev->opaque;
|
||||
@@ -406,7 +377,6 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
|
||||
* be prepared for re-entrancy. So we create a new one and switch chr
|
||||
* to use it.
|
||||
*/
|
||||
- slave_update_read_handler(dev, ctxt);
|
||||
qemu_chr_be_update_read_handlers(chr->chr, ctxt);
|
||||
qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data);
|
||||
|
||||
@@ -418,7 +388,6 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
|
||||
* context that have been processed by the nested loop are purged.
|
||||
*/
|
||||
qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt);
|
||||
- slave_update_read_handler(dev, NULL);
|
||||
|
||||
g_main_loop_unref(loop);
|
||||
g_main_context_unref(ctxt);
|
||||
@@ -1802,7 +1771,9 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev)
|
||||
return -ECONNREFUSED;
|
||||
}
|
||||
u->slave_ioc = ioc;
|
||||
- slave_update_read_handler(dev, NULL);
|
||||
+ u->slave_src = qio_channel_add_watch_source(u->slave_ioc,
|
||||
+ G_IO_IN | G_IO_HUP,
|
||||
+ slave_read, dev, NULL, NULL);
|
||||
|
||||
if (reply_supported) {
|
||||
msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,51 +0,0 @@
|
||||
From 733acef2caea0758edd74fb634b095ce09bf5914 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Mon, 9 May 2022 03:46:23 -0400
|
||||
Subject: [PATCH 15/16] Revert "virtio-scsi: Reject scsi-cd if data plane
|
||||
enabled [RHEL only]"
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 91: Revert "virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only]"
|
||||
RH-Commit: [1/1] 1af55d792bc9166e5c86272afe8093c76ab41bb4 (eesposit/qemu-kvm)
|
||||
RH-Bugzilla: 1995710
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
|
||||
This reverts commit 4e17b1126e.
|
||||
|
||||
Over time AioContext usage and coverage has increased, and now block
|
||||
backend is capable of handling AioContext change upon eject and insert.
|
||||
Therefore the above downstream-only commit is not necessary anymore,
|
||||
and can be safely reverted.
|
||||
|
||||
X-downstream-only: true
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
hw/scsi/virtio-scsi.c | 9 ---------
|
||||
1 file changed, 9 deletions(-)
|
||||
|
||||
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
|
||||
index 2450c9438c..db54d104be 100644
|
||||
--- a/hw/scsi/virtio-scsi.c
|
||||
+++ b/hw/scsi/virtio-scsi.c
|
||||
@@ -937,15 +937,6 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
||||
AioContext *old_context;
|
||||
int ret;
|
||||
|
||||
- /* XXX: Remove this check once block backend is capable of handling
|
||||
- * AioContext change upon eject/insert.
|
||||
- * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if
|
||||
- * data plane is not used, both cases are safe for scsi-cd. */
|
||||
- if (s->ctx && s->ctx != qemu_get_aio_context() &&
|
||||
- object_dynamic_cast(OBJECT(dev), "scsi-cd")) {
|
||||
- error_setg(errp, "scsi-cd is not supported by data plane");
|
||||
- return;
|
||||
- }
|
||||
if (s->ctx && !s->dataplane_fenced) {
|
||||
if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) {
|
||||
return;
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,348 @@
|
||||
From ae2077fd5d351a68c313c64f07fb225dff694a8f Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Mon, 16 Jan 2023 07:16:41 -0500
|
||||
Subject: [PATCH 29/31] accel: introduce accelerator blocker API
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 138: accel: introduce accelerator blocker API
|
||||
RH-Bugzilla: 1979276
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/3] 56b07cd7db516c5066e6d66b4695064fdf73abbf (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276
|
||||
|
||||
commit bd688fc93120fb3e28aa70e3dfdf567ccc1e0bc1
|
||||
Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Fri Nov 11 10:47:56 2022 -0500
|
||||
|
||||
accel: introduce accelerator blocker API
|
||||
|
||||
This API allows the accelerators to prevent vcpus from issuing
|
||||
new ioctls while execting a critical section marked with the
|
||||
accel_ioctl_inhibit_begin/end functions.
|
||||
|
||||
Note that all functions submitting ioctls must mark where the
|
||||
ioctl is being called with accel_{cpu_}ioctl_begin/end().
|
||||
|
||||
This API requires the caller to always hold the BQL.
|
||||
API documentation is in sysemu/accel-blocker.h
|
||||
|
||||
Internally, it uses a QemuLockCnt together with a per-CPU QemuLockCnt
|
||||
(to minimize cache line bouncing) to keep avoid that new ioctls
|
||||
run when the critical section starts, and a QemuEvent to wait
|
||||
that all running ioctls finish.
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Message-Id: <20221111154758.1372674-2-eesposit@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Conflicts:
|
||||
util/meson.build: "interval-tree.c" does not exist
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
accel/accel-blocker.c | 154 +++++++++++++++++++++++++++++++++
|
||||
accel/meson.build | 2 +-
|
||||
hw/core/cpu-common.c | 2 +
|
||||
include/hw/core/cpu.h | 3 +
|
||||
include/sysemu/accel-blocker.h | 56 ++++++++++++
|
||||
util/meson.build | 2 +-
|
||||
6 files changed, 217 insertions(+), 2 deletions(-)
|
||||
create mode 100644 accel/accel-blocker.c
|
||||
create mode 100644 include/sysemu/accel-blocker.h
|
||||
|
||||
diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c
|
||||
new file mode 100644
|
||||
index 0000000000..1e7f423462
|
||||
--- /dev/null
|
||||
+++ b/accel/accel-blocker.c
|
||||
@@ -0,0 +1,154 @@
|
||||
+/*
|
||||
+ * Lock to inhibit accelerator ioctls
|
||||
+ *
|
||||
+ * Copyright (c) 2022 Red Hat Inc.
|
||||
+ *
|
||||
+ * Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
+ *
|
||||
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
+ * of this software and associated documentation files (the "Software"), to deal
|
||||
+ * in the Software without restriction, including without limitation the rights
|
||||
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
+ * copies of the Software, and to permit persons to whom the Software is
|
||||
+ * furnished to do so, subject to the following conditions:
|
||||
+ *
|
||||
+ * The above copyright notice and this permission notice shall be included in
|
||||
+ * all copies or substantial portions of the Software.
|
||||
+ *
|
||||
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
+ * THE SOFTWARE.
|
||||
+ */
|
||||
+
|
||||
+#include "qemu/osdep.h"
|
||||
+#include "qemu/thread.h"
|
||||
+#include "qemu/main-loop.h"
|
||||
+#include "hw/core/cpu.h"
|
||||
+#include "sysemu/accel-blocker.h"
|
||||
+
|
||||
+static QemuLockCnt accel_in_ioctl_lock;
|
||||
+static QemuEvent accel_in_ioctl_event;
|
||||
+
|
||||
+void accel_blocker_init(void)
|
||||
+{
|
||||
+ qemu_lockcnt_init(&accel_in_ioctl_lock);
|
||||
+ qemu_event_init(&accel_in_ioctl_event, false);
|
||||
+}
|
||||
+
|
||||
+void accel_ioctl_begin(void)
|
||||
+{
|
||||
+ if (likely(qemu_mutex_iothread_locked())) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */
|
||||
+ qemu_lockcnt_inc(&accel_in_ioctl_lock);
|
||||
+}
|
||||
+
|
||||
+void accel_ioctl_end(void)
|
||||
+{
|
||||
+ if (likely(qemu_mutex_iothread_locked())) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ qemu_lockcnt_dec(&accel_in_ioctl_lock);
|
||||
+ /* change event to SET. If event was BUSY, wake up all waiters */
|
||||
+ qemu_event_set(&accel_in_ioctl_event);
|
||||
+}
|
||||
+
|
||||
+void accel_cpu_ioctl_begin(CPUState *cpu)
|
||||
+{
|
||||
+ if (unlikely(qemu_mutex_iothread_locked())) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */
|
||||
+ qemu_lockcnt_inc(&cpu->in_ioctl_lock);
|
||||
+}
|
||||
+
|
||||
+void accel_cpu_ioctl_end(CPUState *cpu)
|
||||
+{
|
||||
+ if (unlikely(qemu_mutex_iothread_locked())) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ qemu_lockcnt_dec(&cpu->in_ioctl_lock);
|
||||
+ /* change event to SET. If event was BUSY, wake up all waiters */
|
||||
+ qemu_event_set(&accel_in_ioctl_event);
|
||||
+}
|
||||
+
|
||||
+static bool accel_has_to_wait(void)
|
||||
+{
|
||||
+ CPUState *cpu;
|
||||
+ bool needs_to_wait = false;
|
||||
+
|
||||
+ CPU_FOREACH(cpu) {
|
||||
+ if (qemu_lockcnt_count(&cpu->in_ioctl_lock)) {
|
||||
+ /* exit the ioctl, if vcpu is running it */
|
||||
+ qemu_cpu_kick(cpu);
|
||||
+ needs_to_wait = true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return needs_to_wait || qemu_lockcnt_count(&accel_in_ioctl_lock);
|
||||
+}
|
||||
+
|
||||
+void accel_ioctl_inhibit_begin(void)
|
||||
+{
|
||||
+ CPUState *cpu;
|
||||
+
|
||||
+ /*
|
||||
+ * We allow to inhibit only when holding the BQL, so we can identify
|
||||
+ * when an inhibitor wants to issue an ioctl easily.
|
||||
+ */
|
||||
+ g_assert(qemu_mutex_iothread_locked());
|
||||
+
|
||||
+ /* Block further invocations of the ioctls outside the BQL. */
|
||||
+ CPU_FOREACH(cpu) {
|
||||
+ qemu_lockcnt_lock(&cpu->in_ioctl_lock);
|
||||
+ }
|
||||
+ qemu_lockcnt_lock(&accel_in_ioctl_lock);
|
||||
+
|
||||
+ /* Keep waiting until there are running ioctls */
|
||||
+ while (true) {
|
||||
+
|
||||
+ /* Reset event to FREE. */
|
||||
+ qemu_event_reset(&accel_in_ioctl_event);
|
||||
+
|
||||
+ if (accel_has_to_wait()) {
|
||||
+ /*
|
||||
+ * If event is still FREE, and there are ioctls still in progress,
|
||||
+ * wait.
|
||||
+ *
|
||||
+ * If an ioctl finishes before qemu_event_wait(), it will change
|
||||
+ * the event state to SET. This will prevent qemu_event_wait() from
|
||||
+ * blocking, but it's not a problem because if other ioctls are
|
||||
+ * still running the loop will iterate once more and reset the event
|
||||
+ * status to FREE so that it can wait properly.
|
||||
+ *
|
||||
+ * If an ioctls finishes while qemu_event_wait() is blocking, then
|
||||
+ * it will be waken up, but also here the while loop makes sure
|
||||
+ * to re-enter the wait if there are other running ioctls.
|
||||
+ */
|
||||
+ qemu_event_wait(&accel_in_ioctl_event);
|
||||
+ } else {
|
||||
+ /* No ioctl is running */
|
||||
+ return;
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+void accel_ioctl_inhibit_end(void)
|
||||
+{
|
||||
+ CPUState *cpu;
|
||||
+
|
||||
+ qemu_lockcnt_unlock(&accel_in_ioctl_lock);
|
||||
+ CPU_FOREACH(cpu) {
|
||||
+ qemu_lockcnt_unlock(&cpu->in_ioctl_lock);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
diff --git a/accel/meson.build b/accel/meson.build
|
||||
index 259c35c4c8..061332610f 100644
|
||||
--- a/accel/meson.build
|
||||
+++ b/accel/meson.build
|
||||
@@ -1,4 +1,4 @@
|
||||
-specific_ss.add(files('accel-common.c'))
|
||||
+specific_ss.add(files('accel-common.c', 'accel-blocker.c'))
|
||||
softmmu_ss.add(files('accel-softmmu.c'))
|
||||
user_ss.add(files('accel-user.c'))
|
||||
|
||||
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
|
||||
index f9fdd46b9d..8d6a4b1b65 100644
|
||||
--- a/hw/core/cpu-common.c
|
||||
+++ b/hw/core/cpu-common.c
|
||||
@@ -237,6 +237,7 @@ static void cpu_common_initfn(Object *obj)
|
||||
cpu->nr_threads = 1;
|
||||
|
||||
qemu_mutex_init(&cpu->work_mutex);
|
||||
+ qemu_lockcnt_init(&cpu->in_ioctl_lock);
|
||||
QSIMPLEQ_INIT(&cpu->work_list);
|
||||
QTAILQ_INIT(&cpu->breakpoints);
|
||||
QTAILQ_INIT(&cpu->watchpoints);
|
||||
@@ -248,6 +249,7 @@ static void cpu_common_finalize(Object *obj)
|
||||
{
|
||||
CPUState *cpu = CPU(obj);
|
||||
|
||||
+ qemu_lockcnt_destroy(&cpu->in_ioctl_lock);
|
||||
qemu_mutex_destroy(&cpu->work_mutex);
|
||||
}
|
||||
|
||||
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
|
||||
index 8830546121..2417597236 100644
|
||||
--- a/include/hw/core/cpu.h
|
||||
+++ b/include/hw/core/cpu.h
|
||||
@@ -398,6 +398,9 @@ struct CPUState {
|
||||
uint32_t kvm_fetch_index;
|
||||
uint64_t dirty_pages;
|
||||
|
||||
+ /* Use by accel-block: CPU is executing an ioctl() */
|
||||
+ QemuLockCnt in_ioctl_lock;
|
||||
+
|
||||
/* Used for events with 'vcpu' and *without* the 'disabled' properties */
|
||||
DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS);
|
||||
DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS);
|
||||
diff --git a/include/sysemu/accel-blocker.h b/include/sysemu/accel-blocker.h
|
||||
new file mode 100644
|
||||
index 0000000000..72020529ef
|
||||
--- /dev/null
|
||||
+++ b/include/sysemu/accel-blocker.h
|
||||
@@ -0,0 +1,56 @@
|
||||
+/*
|
||||
+ * Accelerator blocking API, to prevent new ioctls from starting and wait the
|
||||
+ * running ones finish.
|
||||
+ * This mechanism differs from pause/resume_all_vcpus() in that it does not
|
||||
+ * release the BQL.
|
||||
+ *
|
||||
+ * Copyright (c) 2022 Red Hat Inc.
|
||||
+ *
|
||||
+ * Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
+ *
|
||||
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
+ * See the COPYING file in the top-level directory.
|
||||
+ */
|
||||
+#ifndef ACCEL_BLOCKER_H
|
||||
+#define ACCEL_BLOCKER_H
|
||||
+
|
||||
+#include "qemu/osdep.h"
|
||||
+#include "sysemu/cpus.h"
|
||||
+
|
||||
+extern void accel_blocker_init(void);
|
||||
+
|
||||
+/*
|
||||
+ * accel_{cpu_}ioctl_begin/end:
|
||||
+ * Mark when ioctl is about to run or just finished.
|
||||
+ *
|
||||
+ * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is
|
||||
+ * called, preventing new ioctls to run. They will continue only after
|
||||
+ * accel_ioctl_inibith_end().
|
||||
+ */
|
||||
+extern void accel_ioctl_begin(void);
|
||||
+extern void accel_ioctl_end(void);
|
||||
+extern void accel_cpu_ioctl_begin(CPUState *cpu);
|
||||
+extern void accel_cpu_ioctl_end(CPUState *cpu);
|
||||
+
|
||||
+/*
|
||||
+ * accel_ioctl_inhibit_begin: start critical section
|
||||
+ *
|
||||
+ * This function makes sure that:
|
||||
+ * 1) incoming accel_{cpu_}ioctl_begin() calls block
|
||||
+ * 2) wait that all ioctls that were already running reach
|
||||
+ * accel_{cpu_}ioctl_end(), kicking vcpus if necessary.
|
||||
+ *
|
||||
+ * This allows the caller to access shared data or perform operations without
|
||||
+ * worrying of concurrent vcpus accesses.
|
||||
+ */
|
||||
+extern void accel_ioctl_inhibit_begin(void);
|
||||
+
|
||||
+/*
|
||||
+ * accel_ioctl_inhibit_end: end critical section started by
|
||||
+ * accel_ioctl_inhibit_begin()
|
||||
+ *
|
||||
+ * This function allows blocked accel_{cpu_}ioctl_begin() to continue.
|
||||
+ */
|
||||
+extern void accel_ioctl_inhibit_end(void);
|
||||
+
|
||||
+#endif /* ACCEL_BLOCKER_H */
|
||||
diff --git a/util/meson.build b/util/meson.build
|
||||
index 25b9b61f98..85a5504c4d 100644
|
||||
--- a/util/meson.build
|
||||
+++ b/util/meson.build
|
||||
@@ -57,6 +57,7 @@ util_ss.add(files('guest-random.c'))
|
||||
util_ss.add(files('yank.c'))
|
||||
util_ss.add(files('int128.c'))
|
||||
util_ss.add(files('memalign.c'))
|
||||
+util_ss.add(files('lockcnt.c'))
|
||||
|
||||
if have_user
|
||||
util_ss.add(files('selfmap.c'))
|
||||
@@ -71,7 +72,6 @@ endif
|
||||
if have_block or have_ga
|
||||
util_ss.add(files('aiocb.c', 'async.c'))
|
||||
util_ss.add(files('base64.c'))
|
||||
- util_ss.add(files('lockcnt.c'))
|
||||
util_ss.add(files('main-loop.c'))
|
||||
util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
|
||||
util_ss.add(files('coroutine-@0@.c'.format(config_host['CONFIG_COROUTINE_BACKEND'])))
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,58 @@
|
||||
From ab68e13b7628f2348d41a4518a92508542af712f Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Fri, 3 Feb 2023 18:15:10 +0100
|
||||
Subject: [PATCH 05/20] accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 144: accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page
|
||||
RH-Bugzilla: 2165280
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Gavin Shan <gshan@redhat.com>
|
||||
RH-Acked-by: Shaoqin Huang <None>
|
||||
RH-Commit: [1/1] 5b0863c34ba06c01c4e343d1ecd72402779c7de3 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/2165280
|
||||
Upstream: yes
|
||||
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=50530041
|
||||
Test: 'kvm unit test ./run_tests.sh -g debug' does not SIGSEV anymore
|
||||
|
||||
After commit 4e4fa6c12d ("accel/tcg: Complete cpu initialization
|
||||
before registration"), it looks the CPUJumpCache pointer can be NULL.
|
||||
This causes a SIGSEV when running debug-wp-migration kvm unit test.
|
||||
|
||||
At the first place it should be clarified why this TCG code is called
|
||||
with KVM acceleration. This may hide another bug.
|
||||
|
||||
Fixes: 4e4fa6c12d ("accel/tcg: Complete cpu initialization before registration")
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
Message-Id: <20230203171510.2867451-1-eric.auger@redhat.com>
|
||||
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
(cherry picked from commit 99ab4d500af638ba3ebb20e8aa89d72201b70860)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
accel/tcg/cputlb.c | 7 ++++++-
|
||||
1 file changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
|
||||
index 6f1c00682b..4244b0e4e3 100644
|
||||
--- a/accel/tcg/cputlb.c
|
||||
+++ b/accel/tcg/cputlb.c
|
||||
@@ -100,9 +100,14 @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
|
||||
|
||||
static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
|
||||
{
|
||||
- int i, i0 = tb_jmp_cache_hash_page(page_addr);
|
||||
CPUJumpCache *jc = cpu->tb_jmp_cache;
|
||||
+ int i, i0;
|
||||
|
||||
+ if (unlikely(!jc)) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ i0 = tb_jmp_cache_hash_page(page_addr);
|
||||
for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
|
||||
qatomic_set(&jc->array[i0 + i].tb, NULL);
|
||||
}
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,50 @@
|
||||
From e9a9c0b023ae0dcbb14543b74063cca931d8230f Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:24:36 -0500
|
||||
Subject: [PATCH 08/12] aio-wait: switch to smp_mb__after_rmw()
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2175660
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [5/9] a90c96d148fdbec340a45dc6cedf3660d8be2aab (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
|
||||
|
||||
commit b532526a07ef3b903ead2e055fe6cc87b41057a3
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Fri Mar 3 11:03:52 2023 +0100
|
||||
|
||||
aio-wait: switch to smp_mb__after_rmw()
|
||||
|
||||
The barrier comes after an atomic increment, so it is enough to use
|
||||
smp_mb__after_rmw(); this avoids a double barrier on x86 systems.
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
include/block/aio-wait.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
|
||||
index dd9a7f6461..da13357bb8 100644
|
||||
--- a/include/block/aio-wait.h
|
||||
+++ b/include/block/aio-wait.h
|
||||
@@ -85,7 +85,7 @@ extern AioWait global_aio_wait;
|
||||
/* Increment wait_->num_waiters before evaluating cond. */ \
|
||||
qatomic_inc(&wait_->num_waiters); \
|
||||
/* Paired with smp_mb in aio_wait_kick(). */ \
|
||||
- smp_mb(); \
|
||||
+ smp_mb__after_rmw(); \
|
||||
if (ctx_ && in_aio_context_home_thread(ctx_)) { \
|
||||
while ((cond)) { \
|
||||
aio_poll(ctx_, true); \
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,66 @@
|
||||
From 3d823dda6832b76fd3d776131008107b0b0f7166 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:24:36 -0500
|
||||
Subject: [PATCH 12/12] async: clarify usage of barriers in the polling case
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2175660
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [9/9] b4ea298d75a75bb61e07a27d1296e0095fbc2bbf (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
|
||||
|
||||
commit 6229438cca037d42f44a96d38feb15cb102a444f
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Mon Mar 6 10:43:52 2023 +0100
|
||||
|
||||
async: clarify usage of barriers in the polling case
|
||||
|
||||
Explain that aio_context_notifier_poll() relies on
|
||||
aio_notify_accept() to catch all the memory writes that were
|
||||
done before ctx->notified was set to true.
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
util/async.c | 10 ++++++++--
|
||||
1 file changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index 37d3e6036d..e0846baf93 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -472,8 +472,9 @@ void aio_notify_accept(AioContext *ctx)
|
||||
qatomic_set(&ctx->notified, false);
|
||||
|
||||
/*
|
||||
- * Write ctx->notified before reading e.g. bh->flags. Pairs with smp_wmb
|
||||
- * in aio_notify.
|
||||
+ * Order reads of ctx->notified (in aio_context_notifier_poll()) and the
|
||||
+ * above clearing of ctx->notified before reads of e.g. bh->flags. Pairs
|
||||
+ * with smp_wmb() in aio_notify.
|
||||
*/
|
||||
smp_mb();
|
||||
}
|
||||
@@ -496,6 +497,11 @@ static bool aio_context_notifier_poll(void *opaque)
|
||||
EventNotifier *e = opaque;
|
||||
AioContext *ctx = container_of(e, AioContext, notifier);
|
||||
|
||||
+ /*
|
||||
+ * No need for load-acquire because we just want to kick the
|
||||
+ * event loop. aio_notify_accept() takes care of synchronizing
|
||||
+ * the event loop with the producers.
|
||||
+ */
|
||||
return qatomic_read(&ctx->notified);
|
||||
}
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,111 @@
|
||||
From 29bcf843d796ffc2a0906dea947e4cdfe9f7ec60 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:24:36 -0500
|
||||
Subject: [PATCH 11/12] async: update documentation of the memory barriers
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2175660
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [8/9] 5ca20e4c8983e0bc1ecee66bead3472777abe4d1 (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
|
||||
|
||||
commit 8dd48650b43dfde4ebea34191ac267e474bcc29e
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Mon Mar 6 10:15:06 2023 +0100
|
||||
|
||||
async: update documentation of the memory barriers
|
||||
|
||||
Ever since commit 8c6b0356b539 ("util/async: make bh_aio_poll() O(1)",
|
||||
2020-02-22), synchronization between qemu_bh_schedule() and aio_bh_poll()
|
||||
is happening when the bottom half is enqueued in the bh_list; not
|
||||
when the flags are set. Update the documentation to match.
|
||||
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
util/async.c | 33 +++++++++++++++++++--------------
|
||||
1 file changed, 19 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index 63434ddae4..37d3e6036d 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -73,14 +73,21 @@ static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags)
|
||||
unsigned old_flags;
|
||||
|
||||
/*
|
||||
- * The memory barrier implicit in qatomic_fetch_or makes sure that:
|
||||
- * 1. idle & any writes needed by the callback are done before the
|
||||
- * locations are read in the aio_bh_poll.
|
||||
- * 2. ctx is loaded before the callback has a chance to execute and bh
|
||||
- * could be freed.
|
||||
+ * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that
|
||||
+ * insertion starts after BH_PENDING is set.
|
||||
*/
|
||||
old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags);
|
||||
+
|
||||
if (!(old_flags & BH_PENDING)) {
|
||||
+ /*
|
||||
+ * At this point the bottom half becomes visible to aio_bh_poll().
|
||||
+ * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in
|
||||
+ * aio_bh_poll(), ensuring that:
|
||||
+ * 1. any writes needed by the callback are visible from the callback
|
||||
+ * after aio_bh_dequeue() returns bh.
|
||||
+ * 2. ctx is loaded before the callback has a chance to execute and bh
|
||||
+ * could be freed.
|
||||
+ */
|
||||
QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next);
|
||||
}
|
||||
|
||||
@@ -106,11 +113,8 @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
|
||||
QSLIST_REMOVE_HEAD(head, next);
|
||||
|
||||
/*
|
||||
- * The qatomic_and is paired with aio_bh_enqueue(). The implicit memory
|
||||
- * barrier ensures that the callback sees all writes done by the scheduling
|
||||
- * thread. It also ensures that the scheduling thread sees the cleared
|
||||
- * flag before bh->cb has run, and thus will call aio_notify again if
|
||||
- * necessary.
|
||||
+ * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that
|
||||
+ * the removal finishes before BH_PENDING is reset.
|
||||
*/
|
||||
*flags = qatomic_fetch_and(&bh->flags,
|
||||
~(BH_PENDING | BH_SCHEDULED | BH_IDLE));
|
||||
@@ -157,6 +161,7 @@ int aio_bh_poll(AioContext *ctx)
|
||||
BHListSlice *s;
|
||||
int ret = 0;
|
||||
|
||||
+ /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */
|
||||
QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list);
|
||||
QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next);
|
||||
|
||||
@@ -446,15 +451,15 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx)
|
||||
void aio_notify(AioContext *ctx)
|
||||
{
|
||||
/*
|
||||
- * Write e.g. bh->flags before writing ctx->notified. Pairs with smp_mb in
|
||||
- * aio_notify_accept.
|
||||
+ * Write e.g. ctx->bh_list before writing ctx->notified. Pairs with
|
||||
+ * smp_mb() in aio_notify_accept().
|
||||
*/
|
||||
smp_wmb();
|
||||
qatomic_set(&ctx->notified, true);
|
||||
|
||||
/*
|
||||
- * Write ctx->notified before reading ctx->notify_me. Pairs
|
||||
- * with smp_mb in aio_ctx_prepare or aio_poll.
|
||||
+ * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me.
|
||||
+ * Pairs with smp_mb() in aio_ctx_prepare or aio_poll.
|
||||
*/
|
||||
smp_mb();
|
||||
if (qatomic_read(&ctx->notify_me)) {
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,250 @@
|
||||
From 9bb9cafd736057fd2a8ebfa6f5769668f125fbe6 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:06 +0100
|
||||
Subject: [PATCH 24/31] block: Call drain callbacks only once
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [12/16] ea9a433dc01d1b8539a2d4ea12887f2a3ce830ea (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
We only need to call both the BlockDriver's callback and the parent
|
||||
callbacks when going from undrained to drained or vice versa. A second
|
||||
drain section doesn't make a difference for the driver or the parent,
|
||||
they weren't supposed to send new requests before and after the second
|
||||
drain.
|
||||
|
||||
One thing that gets in the way is the 'ignore_bds_parents' parameter in
|
||||
bdrv_do_drained_begin_quiesce() and bdrv_do_drained_end(): It means that
|
||||
bdrv_drain_all_begin() increases bs->quiesce_counter, but does not
|
||||
quiesce the parent through BdrvChildClass callbacks. If an additional
|
||||
drain section is started now, bs->quiesce_counter will be non-zero, but
|
||||
we would still need to quiesce the parent through BdrvChildClass in
|
||||
order to keep things consistent (and unquiesce it on the matching
|
||||
bdrv_drained_end(), even though the counter would not reach 0 yet as
|
||||
long as the bdrv_drain_all() section is still active).
|
||||
|
||||
Instead of keeping track of this, let's just get rid of the parameter.
|
||||
It was introduced in commit 6cd5c9d7b2d as an optimisation so that
|
||||
during bdrv_drain_all(), we wouldn't recursively drain all parents up to
|
||||
the root for each node, resulting in quadratic complexity. As it happens,
|
||||
calling the callbacks only once solves the same problem, so as of this
|
||||
patch, we'll still have O(n) complexity and ignore_bds_parents is not
|
||||
needed any more.
|
||||
|
||||
This patch only ignores the 'ignore_bds_parents' parameter. It will be
|
||||
removed in a separate patch.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20221118174110.55183-12-kwolf@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 57e05be343f33f4e5899a8d8946a8596d68424a1)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 25 +++++++------------------
|
||||
block/io.c | 30 ++++++++++++++++++------------
|
||||
include/block/block_int-common.h | 8 ++++----
|
||||
tests/unit/test-bdrv-drain.c | 16 ++++++++++------
|
||||
4 files changed, 39 insertions(+), 40 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index e0e3b21790..5a583e260d 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -2824,7 +2824,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
{
|
||||
BlockDriverState *old_bs = child->bs;
|
||||
int new_bs_quiesce_counter;
|
||||
- int drain_saldo;
|
||||
|
||||
assert(!child->frozen);
|
||||
assert(old_bs != new_bs);
|
||||
@@ -2834,16 +2833,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
|
||||
}
|
||||
|
||||
- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
|
||||
- drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;
|
||||
-
|
||||
/*
|
||||
* If the new child node is drained but the old one was not, flush
|
||||
* all outstanding requests to the old child node.
|
||||
*/
|
||||
- while (drain_saldo > 0 && child->klass->drained_begin) {
|
||||
+ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
|
||||
+ if (new_bs_quiesce_counter && !child->quiesced_parent) {
|
||||
bdrv_parent_drained_begin_single(child, true);
|
||||
- drain_saldo--;
|
||||
}
|
||||
|
||||
if (old_bs) {
|
||||
@@ -2859,16 +2855,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
if (new_bs) {
|
||||
assert_bdrv_graph_writable(new_bs);
|
||||
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
|
||||
-
|
||||
- /*
|
||||
- * Polling in bdrv_parent_drained_begin_single() may have led to the new
|
||||
- * node's quiesce_counter having been decreased. Not a problem, we just
|
||||
- * need to recognize this here and then invoke drained_end appropriately
|
||||
- * more often.
|
||||
- */
|
||||
- assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
|
||||
- drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
|
||||
-
|
||||
if (child->klass->attach) {
|
||||
child->klass->attach(child);
|
||||
}
|
||||
@@ -2877,10 +2863,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
/*
|
||||
* If the old child node was drained but the new one is not, allow
|
||||
* requests to come in only after the new node has been attached.
|
||||
+ *
|
||||
+ * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single()
|
||||
+ * polls, which could have changed the value.
|
||||
*/
|
||||
- while (drain_saldo < 0 && child->klass->drained_end) {
|
||||
+ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
|
||||
+ if (!new_bs_quiesce_counter && child->quiesced_parent) {
|
||||
bdrv_parent_drained_end_single(child);
|
||||
- drain_saldo++;
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 75224480d0..87d6f22ec4 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -62,8 +62,9 @@ void bdrv_parent_drained_end_single(BdrvChild *c)
|
||||
{
|
||||
IO_OR_GS_CODE();
|
||||
|
||||
- assert(c->parent_quiesce_counter > 0);
|
||||
- c->parent_quiesce_counter--;
|
||||
+ assert(c->quiesced_parent);
|
||||
+ c->quiesced_parent = false;
|
||||
+
|
||||
if (c->klass->drained_end) {
|
||||
c->klass->drained_end(c);
|
||||
}
|
||||
@@ -110,7 +111,10 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
|
||||
{
|
||||
AioContext *ctx = bdrv_child_get_parent_aio_context(c);
|
||||
IO_OR_GS_CODE();
|
||||
- c->parent_quiesce_counter++;
|
||||
+
|
||||
+ assert(!c->quiesced_parent);
|
||||
+ c->quiesced_parent = true;
|
||||
+
|
||||
if (c->klass->drained_begin) {
|
||||
c->klass->drained_begin(c);
|
||||
}
|
||||
@@ -358,11 +362,12 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|
||||
/* Stop things in parent-to-child order */
|
||||
if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
|
||||
aio_disable_external(bdrv_get_aio_context(bs));
|
||||
- }
|
||||
|
||||
- bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
|
||||
- if (bs->drv && bs->drv->bdrv_drain_begin) {
|
||||
- bs->drv->bdrv_drain_begin(bs);
|
||||
+ /* TODO Remove ignore_bds_parents, we don't consider it any more */
|
||||
+ bdrv_parent_drained_begin(bs, parent, false);
|
||||
+ if (bs->drv && bs->drv->bdrv_drain_begin) {
|
||||
+ bs->drv->bdrv_drain_begin(bs);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -413,13 +418,14 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
|
||||
assert(bs->quiesce_counter > 0);
|
||||
|
||||
/* Re-enable things in child-to-parent order */
|
||||
- if (bs->drv && bs->drv->bdrv_drain_end) {
|
||||
- bs->drv->bdrv_drain_end(bs);
|
||||
- }
|
||||
- bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
|
||||
-
|
||||
old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
|
||||
if (old_quiesce_counter == 1) {
|
||||
+ if (bs->drv && bs->drv->bdrv_drain_end) {
|
||||
+ bs->drv->bdrv_drain_end(bs);
|
||||
+ }
|
||||
+ /* TODO Remove ignore_bds_parents, we don't consider it any more */
|
||||
+ bdrv_parent_drained_end(bs, parent, false);
|
||||
+
|
||||
aio_enable_external(bdrv_get_aio_context(bs));
|
||||
}
|
||||
}
|
||||
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
|
||||
index 791dddfd7d..a6bc6b7fe9 100644
|
||||
--- a/include/block/block_int-common.h
|
||||
+++ b/include/block/block_int-common.h
|
||||
@@ -980,13 +980,13 @@ struct BdrvChild {
|
||||
bool frozen;
|
||||
|
||||
/*
|
||||
- * How many times the parent of this child has been drained
|
||||
+ * True if the parent of this child has been drained by this BdrvChild
|
||||
* (through klass->drained_*).
|
||||
- * Usually, this is equal to bs->quiesce_counter (potentially
|
||||
- * reduced by bdrv_drain_all_count). It may differ while the
|
||||
+ *
|
||||
+ * It is generally true if bs->quiesce_counter > 0. It may differ while the
|
||||
* child is entering or leaving a drained section.
|
||||
*/
|
||||
- int parent_quiesce_counter;
|
||||
+ bool quiesced_parent;
|
||||
|
||||
QLIST_ENTRY(BdrvChild) next;
|
||||
QLIST_ENTRY(BdrvChild) next_parent;
|
||||
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
|
||||
index dda08de8db..172bc6debc 100644
|
||||
--- a/tests/unit/test-bdrv-drain.c
|
||||
+++ b/tests/unit/test-bdrv-drain.c
|
||||
@@ -296,7 +296,11 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive)
|
||||
|
||||
do_drain_begin(drain_type, bs);
|
||||
|
||||
- g_assert_cmpint(bs->quiesce_counter, ==, 1);
|
||||
+ if (drain_type == BDRV_DRAIN_ALL) {
|
||||
+ g_assert_cmpint(bs->quiesce_counter, ==, 2);
|
||||
+ } else {
|
||||
+ g_assert_cmpint(bs->quiesce_counter, ==, 1);
|
||||
+ }
|
||||
g_assert_cmpint(backing->quiesce_counter, ==, !!recursive);
|
||||
|
||||
do_drain_end(drain_type, bs);
|
||||
@@ -348,8 +352,8 @@ static void test_nested(void)
|
||||
|
||||
for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) {
|
||||
for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) {
|
||||
- int backing_quiesce = (outer != BDRV_DRAIN) +
|
||||
- (inner != BDRV_DRAIN);
|
||||
+ int backing_quiesce = (outer == BDRV_DRAIN_ALL) +
|
||||
+ (inner == BDRV_DRAIN_ALL);
|
||||
|
||||
g_assert_cmpint(bs->quiesce_counter, ==, 0);
|
||||
g_assert_cmpint(backing->quiesce_counter, ==, 0);
|
||||
@@ -359,10 +363,10 @@ static void test_nested(void)
|
||||
do_drain_begin(outer, bs);
|
||||
do_drain_begin(inner, bs);
|
||||
|
||||
- g_assert_cmpint(bs->quiesce_counter, ==, 2);
|
||||
+ g_assert_cmpint(bs->quiesce_counter, ==, 2 + !!backing_quiesce);
|
||||
g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce);
|
||||
- g_assert_cmpint(s->drain_count, ==, 2);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce);
|
||||
+ g_assert_cmpint(s->drain_count, ==, 1);
|
||||
+ g_assert_cmpint(backing_s->drain_count, ==, !!backing_quiesce);
|
||||
|
||||
do_drain_end(inner, bs);
|
||||
do_drain_end(outer, bs);
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,298 @@
|
||||
From 150ef3356cc6732fede7ca059168fc0565ed0b76 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:09 +0100
|
||||
Subject: [PATCH 27/31] block: Don't poll in bdrv_replace_child_noperm()
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [15/16] 5fc7d6b703a2d6c1118d875056f0afbd6ba5cca9 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
In order to make sure that bdrv_replace_child_noperm() doesn't have to
|
||||
poll any more, get rid of the bdrv_parent_drained_begin_single() call.
|
||||
|
||||
This is possible now because we can require that the parent is already
|
||||
drained through the child in question when the function is called and we
|
||||
don't call the parent drain callbacks more than once.
|
||||
|
||||
The additional drain calls needed in callers cause the test case to run
|
||||
its code in the drain handler too early (bdrv_attach_child() drains
|
||||
now), so modify it to only enable the code after the test setup has
|
||||
completed.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20221118174110.55183-15-kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 23987471285a26397e3152a9244b652445fd36c4)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 103 ++++++++++++++++++++++++++++++-----
|
||||
block/io.c | 2 +-
|
||||
include/block/block-io.h | 8 +++
|
||||
tests/unit/test-bdrv-drain.c | 10 ++++
|
||||
4 files changed, 108 insertions(+), 15 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index af31a94863..65588d313a 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -2407,6 +2407,20 @@ static void bdrv_replace_child_abort(void *opaque)
|
||||
|
||||
GLOBAL_STATE_CODE();
|
||||
/* old_bs reference is transparently moved from @s to @s->child */
|
||||
+ if (!s->child->bs) {
|
||||
+ /*
|
||||
+ * The parents were undrained when removing old_bs from the child. New
|
||||
+ * requests can't have been made, though, because the child was empty.
|
||||
+ *
|
||||
+ * TODO Make bdrv_replace_child_noperm() transactionable to avoid
|
||||
+ * undraining the parent in the first place. Once this is done, having
|
||||
+ * new_bs drained when calling bdrv_replace_child_tran() is not a
|
||||
+ * requirement any more.
|
||||
+ */
|
||||
+ bdrv_parent_drained_begin_single(s->child, false);
|
||||
+ assert(!bdrv_parent_drained_poll_single(s->child));
|
||||
+ }
|
||||
+ assert(s->child->quiesced_parent);
|
||||
bdrv_replace_child_noperm(s->child, s->old_bs);
|
||||
bdrv_unref(new_bs);
|
||||
}
|
||||
@@ -2422,12 +2436,19 @@ static TransactionActionDrv bdrv_replace_child_drv = {
|
||||
*
|
||||
* Note: real unref of old_bs is done only on commit.
|
||||
*
|
||||
+ * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be
|
||||
+ * kept drained until the transaction is completed.
|
||||
+ *
|
||||
* The function doesn't update permissions, caller is responsible for this.
|
||||
*/
|
||||
static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
|
||||
Transaction *tran)
|
||||
{
|
||||
BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
|
||||
+
|
||||
+ assert(child->quiesced_parent);
|
||||
+ assert(!new_bs || new_bs->quiesce_counter);
|
||||
+
|
||||
*s = (BdrvReplaceChildState) {
|
||||
.child = child,
|
||||
.old_bs = child->bs,
|
||||
@@ -2819,6 +2840,14 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
|
||||
return permissions[qapi_perm];
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Replaces the node that a BdrvChild points to without updating permissions.
|
||||
+ *
|
||||
+ * If @new_bs is non-NULL, the parent of @child must already be drained through
|
||||
+ * @child.
|
||||
+ *
|
||||
+ * This function does not poll.
|
||||
+ */
|
||||
static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
BlockDriverState *new_bs)
|
||||
{
|
||||
@@ -2826,6 +2855,28 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
int new_bs_quiesce_counter;
|
||||
|
||||
assert(!child->frozen);
|
||||
+
|
||||
+ /*
|
||||
+ * If we want to change the BdrvChild to point to a drained node as its new
|
||||
+ * child->bs, we need to make sure that its new parent is drained, too. In
|
||||
+ * other words, either child->quiesce_parent must already be true or we must
|
||||
+ * be able to set it and keep the parent's quiesce_counter consistent with
|
||||
+ * that, but without polling or starting new requests (this function
|
||||
+ * guarantees that it doesn't poll, and starting new requests would be
|
||||
+ * against the invariants of drain sections).
|
||||
+ *
|
||||
+ * To keep things simple, we pick the first option (child->quiesce_parent
|
||||
+ * must already be true). We also generalise the rule a bit to make it
|
||||
+ * easier to verify in callers and more likely to be covered in test cases:
|
||||
+ * The parent must be quiesced through this child even if new_bs isn't
|
||||
+ * currently drained.
|
||||
+ *
|
||||
+ * The only exception is for callers that always pass new_bs == NULL. In
|
||||
+ * this case, we obviously never need to consider the case of a drained
|
||||
+ * new_bs, so we can keep the callers simpler by allowing them not to drain
|
||||
+ * the parent.
|
||||
+ */
|
||||
+ assert(!new_bs || child->quiesced_parent);
|
||||
assert(old_bs != new_bs);
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
@@ -2833,15 +2884,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
|
||||
}
|
||||
|
||||
- /*
|
||||
- * If the new child node is drained but the old one was not, flush
|
||||
- * all outstanding requests to the old child node.
|
||||
- */
|
||||
- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
|
||||
- if (new_bs_quiesce_counter && !child->quiesced_parent) {
|
||||
- bdrv_parent_drained_begin_single(child, true);
|
||||
- }
|
||||
-
|
||||
if (old_bs) {
|
||||
if (child->klass->detach) {
|
||||
child->klass->detach(child);
|
||||
@@ -2861,11 +2903,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
}
|
||||
|
||||
/*
|
||||
- * If the old child node was drained but the new one is not, allow
|
||||
- * requests to come in only after the new node has been attached.
|
||||
- *
|
||||
- * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single()
|
||||
- * polls, which could have changed the value.
|
||||
+ * If the parent was drained through this BdrvChild previously, but new_bs
|
||||
+ * is not drained, allow requests to come in only after the new node has
|
||||
+ * been attached.
|
||||
*/
|
||||
new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
|
||||
if (!new_bs_quiesce_counter && child->quiesced_parent) {
|
||||
@@ -3002,6 +3042,24 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
|
||||
}
|
||||
|
||||
bdrv_ref(child_bs);
|
||||
+ /*
|
||||
+ * Let every new BdrvChild start with a drained parent. Inserting the child
|
||||
+ * in the graph with bdrv_replace_child_noperm() will undrain it if
|
||||
+ * @child_bs is not drained.
|
||||
+ *
|
||||
+ * The child was only just created and is not yet visible in global state
|
||||
+ * until bdrv_replace_child_noperm() inserts it into the graph, so nobody
|
||||
+ * could have sent requests and polling is not necessary.
|
||||
+ *
|
||||
+ * Note that this means that the parent isn't fully drained yet, we only
|
||||
+ * stop new requests from coming in. This is fine, we don't care about the
|
||||
+ * old requests here, they are not for this child. If another place enters a
|
||||
+ * drain section for the same parent, but wants it to be fully quiesced, it
|
||||
+ * will not run most of the the code in .drained_begin() again (which is not
|
||||
+ * a problem, we already did this), but it will still poll until the parent
|
||||
+ * is fully quiesced, so it will not be negatively affected either.
|
||||
+ */
|
||||
+ bdrv_parent_drained_begin_single(new_child, false);
|
||||
bdrv_replace_child_noperm(new_child, child_bs);
|
||||
|
||||
BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
|
||||
@@ -5059,12 +5117,24 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran)
|
||||
}
|
||||
|
||||
if (child->bs) {
|
||||
+ BlockDriverState *bs = child->bs;
|
||||
+ bdrv_drained_begin(bs);
|
||||
bdrv_replace_child_tran(child, NULL, tran);
|
||||
+ bdrv_drained_end(bs);
|
||||
}
|
||||
|
||||
tran_add(tran, &bdrv_remove_child_drv, child);
|
||||
}
|
||||
|
||||
+static void undrain_on_clean_cb(void *opaque)
|
||||
+{
|
||||
+ bdrv_drained_end(opaque);
|
||||
+}
|
||||
+
|
||||
+static TransactionActionDrv undrain_on_clean = {
|
||||
+ .clean = undrain_on_clean_cb,
|
||||
+};
|
||||
+
|
||||
static int bdrv_replace_node_noperm(BlockDriverState *from,
|
||||
BlockDriverState *to,
|
||||
bool auto_skip, Transaction *tran,
|
||||
@@ -5074,6 +5144,11 @@ static int bdrv_replace_node_noperm(BlockDriverState *from,
|
||||
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
+ bdrv_drained_begin(from);
|
||||
+ bdrv_drained_begin(to);
|
||||
+ tran_add(tran, &undrain_on_clean, from);
|
||||
+ tran_add(tran, &undrain_on_clean, to);
|
||||
+
|
||||
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
|
||||
assert(c->bs == from);
|
||||
if (!should_update_child(c, to)) {
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 5e9150d92c..ae64830eac 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -81,7 +81,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
|
||||
}
|
||||
}
|
||||
|
||||
-static bool bdrv_parent_drained_poll_single(BdrvChild *c)
|
||||
+bool bdrv_parent_drained_poll_single(BdrvChild *c)
|
||||
{
|
||||
if (c->klass->drained_poll) {
|
||||
return c->klass->drained_poll(c);
|
||||
diff --git a/include/block/block-io.h b/include/block/block-io.h
|
||||
index 8f5e75756a..65e6d2569b 100644
|
||||
--- a/include/block/block-io.h
|
||||
+++ b/include/block/block-io.h
|
||||
@@ -292,6 +292,14 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
|
||||
*/
|
||||
void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
|
||||
|
||||
+/**
|
||||
+ * bdrv_parent_drained_poll_single:
|
||||
+ *
|
||||
+ * Returns true if there is any pending activity to cease before @c can be
|
||||
+ * called quiesced, false otherwise.
|
||||
+ */
|
||||
+bool bdrv_parent_drained_poll_single(BdrvChild *c);
|
||||
+
|
||||
/**
|
||||
* bdrv_parent_drained_end_single:
|
||||
*
|
||||
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
|
||||
index 172bc6debc..2686a8acee 100644
|
||||
--- a/tests/unit/test-bdrv-drain.c
|
||||
+++ b/tests/unit/test-bdrv-drain.c
|
||||
@@ -1654,6 +1654,7 @@ static void test_drop_intermediate_poll(void)
|
||||
|
||||
|
||||
typedef struct BDRVReplaceTestState {
|
||||
+ bool setup_completed;
|
||||
bool was_drained;
|
||||
bool was_undrained;
|
||||
bool has_read;
|
||||
@@ -1738,6 +1739,10 @@ static void bdrv_replace_test_drain_begin(BlockDriverState *bs)
|
||||
{
|
||||
BDRVReplaceTestState *s = bs->opaque;
|
||||
|
||||
+ if (!s->setup_completed) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
if (!s->drain_count) {
|
||||
s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs);
|
||||
bdrv_inc_in_flight(bs);
|
||||
@@ -1769,6 +1774,10 @@ static void bdrv_replace_test_drain_end(BlockDriverState *bs)
|
||||
{
|
||||
BDRVReplaceTestState *s = bs->opaque;
|
||||
|
||||
+ if (!s->setup_completed) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
g_assert(s->drain_count > 0);
|
||||
if (!--s->drain_count) {
|
||||
s->was_undrained = true;
|
||||
@@ -1867,6 +1876,7 @@ static void do_test_replace_child_mid_drain(int old_drain_count,
|
||||
bdrv_ref(old_child_bs);
|
||||
bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds,
|
||||
BDRV_CHILD_COW, &error_abort);
|
||||
+ parent_s->setup_completed = true;
|
||||
|
||||
for (i = 0; i < old_drain_count; i++) {
|
||||
bdrv_drained_begin(old_child_bs);
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,54 @@
|
||||
From 6af6de77dace29aa8548b3649dc9c6163740ac86 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:03 +0100
|
||||
Subject: [PATCH 21/31] block: Don't use subtree drains in
|
||||
bdrv_drop_intermediate()
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [9/16] 3c06fa826f262558f57d38b0155500c2e8e23a53 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Instead of using a subtree drain from the top node (which also drains
|
||||
child nodes of base that we're not even interested in), use a normal
|
||||
drain for base, which automatically drains all of the parents, too.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20221118174110.55183-9-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 631086deefc32690ee56efed1c5b891dec31ae37)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index cb5e96b1cf..b3449a312e 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -5586,7 +5586,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
bdrv_ref(top);
|
||||
- bdrv_subtree_drained_begin(top);
|
||||
+ bdrv_drained_begin(base);
|
||||
|
||||
if (!top->drv || !base->drv) {
|
||||
goto exit;
|
||||
@@ -5659,7 +5659,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
|
||||
|
||||
ret = 0;
|
||||
exit:
|
||||
- bdrv_subtree_drained_end(top);
|
||||
+ bdrv_drained_end(base);
|
||||
bdrv_unref(top);
|
||||
return ret;
|
||||
}
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,157 @@
|
||||
From ad52cb621daad45d3c2a0e2e670d6ca2e16690bd Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:02 +0100
|
||||
Subject: [PATCH 20/31] block: Drain individual nodes during reopen
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [8/16] 5441b6f0ae9102ef40d1093e1db3084eea81e3b0 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
bdrv_reopen() and friends use subtree drains as a lazy way of covering
|
||||
all the nodes they touch. Turns out that this lazy way is a lot more
|
||||
complicated than just draining the nodes individually, even not
|
||||
accounting for the additional complexity in the drain mechanism itself.
|
||||
|
||||
Simplify the code by switching to draining the individual nodes that are
|
||||
already managed in the BlockReopenQueue anyway.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20221118174110.55183-8-kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit d22933acd2f470eeef779e4d444e848f76dcfaf8)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 16 +++++++++-------
|
||||
block/replication.c | 6 ------
|
||||
blockdev.c | 13 -------------
|
||||
3 files changed, 9 insertions(+), 26 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 46df410b07..cb5e96b1cf 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -4150,7 +4150,7 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
|
||||
* returns a pointer to bs_queue, which is either the newly allocated
|
||||
* bs_queue, or the existing bs_queue being used.
|
||||
*
|
||||
- * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
|
||||
+ * bs is drained here and undrained by bdrv_reopen_queue_free().
|
||||
*
|
||||
* To be called with bs->aio_context locked.
|
||||
*/
|
||||
@@ -4172,12 +4172,10 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
|
||||
int flags;
|
||||
QemuOpts *opts;
|
||||
|
||||
- /* Make sure that the caller remembered to use a drained section. This is
|
||||
- * important to avoid graph changes between the recursive queuing here and
|
||||
- * bdrv_reopen_multiple(). */
|
||||
- assert(bs->quiesce_counter > 0);
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
+ bdrv_drained_begin(bs);
|
||||
+
|
||||
if (bs_queue == NULL) {
|
||||
bs_queue = g_new0(BlockReopenQueue, 1);
|
||||
QTAILQ_INIT(bs_queue);
|
||||
@@ -4328,6 +4326,12 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue)
|
||||
if (bs_queue) {
|
||||
BlockReopenQueueEntry *bs_entry, *next;
|
||||
QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
|
||||
+ AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs);
|
||||
+
|
||||
+ aio_context_acquire(ctx);
|
||||
+ bdrv_drained_end(bs_entry->state.bs);
|
||||
+ aio_context_release(ctx);
|
||||
+
|
||||
qobject_unref(bs_entry->state.explicit_options);
|
||||
qobject_unref(bs_entry->state.options);
|
||||
g_free(bs_entry);
|
||||
@@ -4475,7 +4479,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
|
||||
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
- bdrv_subtree_drained_begin(bs);
|
||||
queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
|
||||
|
||||
if (ctx != qemu_get_aio_context()) {
|
||||
@@ -4486,7 +4489,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
|
||||
if (ctx != qemu_get_aio_context()) {
|
||||
aio_context_acquire(ctx);
|
||||
}
|
||||
- bdrv_subtree_drained_end(bs);
|
||||
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/replication.c b/block/replication.c
|
||||
index f1eed25e43..c62f48a874 100644
|
||||
--- a/block/replication.c
|
||||
+++ b/block/replication.c
|
||||
@@ -374,9 +374,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
|
||||
s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs);
|
||||
}
|
||||
|
||||
- bdrv_subtree_drained_begin(hidden_disk->bs);
|
||||
- bdrv_subtree_drained_begin(secondary_disk->bs);
|
||||
-
|
||||
if (s->orig_hidden_read_only) {
|
||||
QDict *opts = qdict_new();
|
||||
qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
|
||||
@@ -401,9 +398,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
|
||||
aio_context_acquire(ctx);
|
||||
}
|
||||
}
|
||||
-
|
||||
- bdrv_subtree_drained_end(hidden_disk->bs);
|
||||
- bdrv_subtree_drained_end(secondary_disk->bs);
|
||||
}
|
||||
|
||||
static void backup_job_cleanup(BlockDriverState *bs)
|
||||
diff --git a/blockdev.c b/blockdev.c
|
||||
index 3f1dec6242..8ffb3d9537 100644
|
||||
--- a/blockdev.c
|
||||
+++ b/blockdev.c
|
||||
@@ -3547,8 +3547,6 @@ fail:
|
||||
void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
|
||||
{
|
||||
BlockReopenQueue *queue = NULL;
|
||||
- GSList *drained = NULL;
|
||||
- GSList *p;
|
||||
|
||||
/* Add each one of the BDS that we want to reopen to the queue */
|
||||
for (; reopen_list != NULL; reopen_list = reopen_list->next) {
|
||||
@@ -3585,9 +3583,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
|
||||
ctx = bdrv_get_aio_context(bs);
|
||||
aio_context_acquire(ctx);
|
||||
|
||||
- bdrv_subtree_drained_begin(bs);
|
||||
queue = bdrv_reopen_queue(queue, bs, qdict, false);
|
||||
- drained = g_slist_prepend(drained, bs);
|
||||
|
||||
aio_context_release(ctx);
|
||||
}
|
||||
@@ -3598,15 +3594,6 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
|
||||
|
||||
fail:
|
||||
bdrv_reopen_queue_free(queue);
|
||||
- for (p = drained; p; p = p->next) {
|
||||
- BlockDriverState *bs = p->data;
|
||||
- AioContext *ctx = bdrv_get_aio_context(bs);
|
||||
-
|
||||
- aio_context_acquire(ctx);
|
||||
- bdrv_subtree_drained_end(bs);
|
||||
- aio_context_release(ctx);
|
||||
- }
|
||||
- g_slist_free(drained);
|
||||
}
|
||||
|
||||
void qmp_blockdev_del(const char *node_name, Error **errp)
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,96 @@
|
||||
From 9a789d104a4a69031ad95d7fad6380ab21e82503 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:08 +0100
|
||||
Subject: [PATCH 26/31] block: Drop out of coroutine in
|
||||
bdrv_do_drained_begin_quiesce()
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [14/16] c9266663b822f703e55b6a07de98ceb56e69e924 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
The next patch adds a parent drain to bdrv_attach_child_common(), which
|
||||
shouldn't be, but is currently called from coroutines in some cases (e.g.
|
||||
.bdrv_co_create implementations generally open new nodes). Therefore,
|
||||
the assertion that we're not in a coroutine doesn't hold true any more.
|
||||
|
||||
We could just remove the assertion because there is nothing in the
|
||||
function that should be in conflict with running in a coroutine, but
|
||||
just to be on the safe side, we can reverse the caller relationship
|
||||
between bdrv_do_drained_begin() and bdrv_do_drained_begin_quiesce() so
|
||||
that the latter also just drops out of coroutine context and we can
|
||||
still be certain in the future that any drain code doesn't run in
|
||||
coroutines.
|
||||
|
||||
As a nice side effect, the structure of bdrv_do_drained_begin() is now
|
||||
symmetrical with bdrv_do_drained_end().
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20221118174110.55183-14-kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 05c272ff0cf1b16cc3606f746182dd99b774f553)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/io.c | 25 ++++++++++++-------------
|
||||
1 file changed, 12 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 2e9503df6a..5e9150d92c 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -346,10 +346,15 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
}
|
||||
}
|
||||
|
||||
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
|
||||
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
|
||||
+ bool poll)
|
||||
{
|
||||
IO_OR_GS_CODE();
|
||||
- assert(!qemu_in_coroutine());
|
||||
+
|
||||
+ if (qemu_in_coroutine()) {
|
||||
+ bdrv_co_yield_to_drain(bs, true, parent, poll);
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
/* Stop things in parent-to-child order */
|
||||
if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
|
||||
@@ -359,17 +364,6 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
|
||||
bs->drv->bdrv_drain_begin(bs);
|
||||
}
|
||||
}
|
||||
-}
|
||||
-
|
||||
-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
|
||||
- bool poll)
|
||||
-{
|
||||
- if (qemu_in_coroutine()) {
|
||||
- bdrv_co_yield_to_drain(bs, true, parent, poll);
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- bdrv_do_drained_begin_quiesce(bs, parent);
|
||||
|
||||
/*
|
||||
* Wait for drained requests to finish.
|
||||
@@ -385,6 +379,11 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
|
||||
}
|
||||
}
|
||||
|
||||
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
|
||||
+{
|
||||
+ bdrv_do_drained_begin(bs, parent, false);
|
||||
+}
|
||||
+
|
||||
void bdrv_drained_begin(BlockDriverState *bs)
|
||||
{
|
||||
IO_OR_GS_CODE();
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,67 @@
|
||||
From e790b4c20a5124239fe93e91fbc87745e5f2cea6 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:01 +0100
|
||||
Subject: [PATCH 19/31] block: Fix locking for bdrv_reopen_queue_child()
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [7/16] 46bb54506c4400b9a1bf66b6bd7987ff67260003 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Callers don't agree whether bdrv_reopen_queue_child() should be called
|
||||
with the AioContext lock held or not. Standardise on holding the lock
|
||||
(as done by QMP blockdev-reopen and the replication block driver) and
|
||||
fix bdrv_reopen() to do the same.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20221118174110.55183-7-kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 2e117866d7c96cc17e84cd2946fee1bf3292d814)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 7 +++++--
|
||||
1 file changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 7999fd08c5..46df410b07 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -4151,6 +4151,8 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
|
||||
* bs_queue, or the existing bs_queue being used.
|
||||
*
|
||||
* bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
|
||||
+ *
|
||||
+ * To be called with bs->aio_context locked.
|
||||
*/
|
||||
static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
|
||||
BlockDriverState *bs,
|
||||
@@ -4309,6 +4311,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
|
||||
return bs_queue;
|
||||
}
|
||||
|
||||
+/* To be called with bs->aio_context locked */
|
||||
BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
|
||||
BlockDriverState *bs,
|
||||
QDict *options, bool keep_old_opts)
|
||||
@@ -4473,11 +4476,11 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
bdrv_subtree_drained_begin(bs);
|
||||
+ queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
|
||||
+
|
||||
if (ctx != qemu_get_aio_context()) {
|
||||
aio_context_release(ctx);
|
||||
}
|
||||
-
|
||||
- queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
|
||||
ret = bdrv_reopen_multiple(queue, errp);
|
||||
|
||||
if (ctx != qemu_get_aio_context()) {
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,132 @@
|
||||
From 074c89b05dae971c7118cb769fd34e22135c8f4c Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Mon, 20 Jun 2022 18:26:53 +0200
|
||||
Subject: [PATCH 06/20] block: Improve empty format-specific info dump
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
|
||||
RH-Bugzilla: 1860292
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [1/12] be551e83f426e620e673302198b51368bfd324ce (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
When a block driver supports obtaining format-specific information, but
|
||||
that object only contains optional fields, it is possible that none of
|
||||
them are present, so that dump_qobject() (called by
|
||||
bdrv_image_info_specific_dump()) will not print anything.
|
||||
|
||||
The callers of bdrv_image_info_specific_dump() put a header above this
|
||||
information ("Format specific information:\n"), which will look strange
|
||||
when there is nothing below. Modify bdrv_image_info_specific_dump() to
|
||||
print this header instead of its callers, and only if there is indeed
|
||||
something to be printed.
|
||||
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20220620162704.80987-2-hreitz@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 3716470b24f0f63090d59bcf28ad8fe6fb7835bd)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/qapi.c | 41 +++++++++++++++++++++++++++++++++++++----
|
||||
include/block/qapi.h | 3 ++-
|
||||
qemu-io-cmds.c | 4 ++--
|
||||
3 files changed, 41 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/block/qapi.c b/block/qapi.c
|
||||
index cf557e3aea..51202b470a 100644
|
||||
--- a/block/qapi.c
|
||||
+++ b/block/qapi.c
|
||||
@@ -777,7 +777,35 @@ static void dump_qdict(int indentation, QDict *dict)
|
||||
}
|
||||
}
|
||||
|
||||
-void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec)
|
||||
+/*
|
||||
+ * Return whether dumping the given QObject with dump_qobject() would
|
||||
+ * yield an empty dump, i.e. not print anything.
|
||||
+ */
|
||||
+static bool qobject_is_empty_dump(const QObject *obj)
|
||||
+{
|
||||
+ switch (qobject_type(obj)) {
|
||||
+ case QTYPE_QNUM:
|
||||
+ case QTYPE_QSTRING:
|
||||
+ case QTYPE_QBOOL:
|
||||
+ return false;
|
||||
+
|
||||
+ case QTYPE_QDICT:
|
||||
+ return qdict_size(qobject_to(QDict, obj)) == 0;
|
||||
+
|
||||
+ case QTYPE_QLIST:
|
||||
+ return qlist_empty(qobject_to(QList, obj));
|
||||
+
|
||||
+ default:
|
||||
+ abort();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * Dumps the given ImageInfoSpecific object in a human-readable form,
|
||||
+ * prepending an optional prefix if the dump is not empty.
|
||||
+ */
|
||||
+void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
|
||||
+ const char *prefix)
|
||||
{
|
||||
QObject *obj, *data;
|
||||
Visitor *v = qobject_output_visitor_new(&obj);
|
||||
@@ -785,7 +813,12 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec)
|
||||
visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort);
|
||||
visit_complete(v, &obj);
|
||||
data = qdict_get(qobject_to(QDict, obj), "data");
|
||||
- dump_qobject(1, data);
|
||||
+ if (!qobject_is_empty_dump(data)) {
|
||||
+ if (prefix) {
|
||||
+ qemu_printf("%s", prefix);
|
||||
+ }
|
||||
+ dump_qobject(1, data);
|
||||
+ }
|
||||
qobject_unref(obj);
|
||||
visit_free(v);
|
||||
}
|
||||
@@ -866,7 +899,7 @@ void bdrv_image_info_dump(ImageInfo *info)
|
||||
}
|
||||
|
||||
if (info->has_format_specific) {
|
||||
- qemu_printf("Format specific information:\n");
|
||||
- bdrv_image_info_specific_dump(info->format_specific);
|
||||
+ bdrv_image_info_specific_dump(info->format_specific,
|
||||
+ "Format specific information:\n");
|
||||
}
|
||||
}
|
||||
diff --git a/include/block/qapi.h b/include/block/qapi.h
|
||||
index 22c7807c89..c09859ea78 100644
|
||||
--- a/include/block/qapi.h
|
||||
+++ b/include/block/qapi.h
|
||||
@@ -40,6 +40,7 @@ void bdrv_query_image_info(BlockDriverState *bs,
|
||||
Error **errp);
|
||||
|
||||
void bdrv_snapshot_dump(QEMUSnapshotInfo *sn);
|
||||
-void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec);
|
||||
+void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
|
||||
+ const char *prefix);
|
||||
void bdrv_image_info_dump(ImageInfo *info);
|
||||
#endif
|
||||
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
|
||||
index 952dc940f1..f4a374528e 100644
|
||||
--- a/qemu-io-cmds.c
|
||||
+++ b/qemu-io-cmds.c
|
||||
@@ -1825,8 +1825,8 @@ static int info_f(BlockBackend *blk, int argc, char **argv)
|
||||
return -EIO;
|
||||
}
|
||||
if (spec_info) {
|
||||
- printf("Format specific information:\n");
|
||||
- bdrv_image_info_specific_dump(spec_info);
|
||||
+ bdrv_image_info_specific_dump(spec_info,
|
||||
+ "Format specific information:\n");
|
||||
qapi_free_ImageInfoSpecific(spec_info);
|
||||
}
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,81 @@
|
||||
From 1808e560396872173f787f8e338e9837a4c3d626 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:00 +0100
|
||||
Subject: [PATCH 18/31] block: Inline bdrv_drain_invoke()
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [6/16] 2c7473a36360eb43d94b967deb12308cb5ea0d3b (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
bdrv_drain_invoke() has now two entirely separate cases that share no
|
||||
code any more and are selected depending on a bool parameter. Each case
|
||||
has only one caller. Just inline the function.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20221118174110.55183-6-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit c7bc05f78ab31fb02fc9635f60b9bd22efc8d121)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/io.c | 23 ++++++-----------------
|
||||
1 file changed, 6 insertions(+), 17 deletions(-)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index f4ca62b034..a25103be6f 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -242,21 +242,6 @@ typedef struct {
|
||||
bool ignore_bds_parents;
|
||||
} BdrvCoDrainData;
|
||||
|
||||
-/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */
|
||||
-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
|
||||
-{
|
||||
- if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) ||
|
||||
- (!begin && !bs->drv->bdrv_drain_end)) {
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- if (begin) {
|
||||
- bs->drv->bdrv_drain_begin(bs);
|
||||
- } else {
|
||||
- bs->drv->bdrv_drain_end(bs);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
|
||||
bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
|
||||
BdrvChild *ignore_parent, bool ignore_bds_parents)
|
||||
@@ -390,7 +375,9 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|
||||
}
|
||||
|
||||
bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
|
||||
- bdrv_drain_invoke(bs, true);
|
||||
+ if (bs->drv && bs->drv->bdrv_drain_begin) {
|
||||
+ bs->drv->bdrv_drain_begin(bs);
|
||||
+ }
|
||||
}
|
||||
|
||||
static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
|
||||
@@ -461,7 +448,9 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
|
||||
assert(bs->quiesce_counter > 0);
|
||||
|
||||
/* Re-enable things in child-to-parent order */
|
||||
- bdrv_drain_invoke(bs, false);
|
||||
+ if (bs->drv && bs->drv->bdrv_drain_end) {
|
||||
+ bs->drv->bdrv_drain_end(bs);
|
||||
+ }
|
||||
bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
|
||||
|
||||
old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,433 @@
|
||||
From 3009e49f242ab371ffad35bb29c2c26ddfac75d4 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:40:59 +0100
|
||||
Subject: [PATCH 17/31] block: Remove drained_end_counter
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [5/16] 5589e3f05dece5394a05641f7f42096e8dc62bdb (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
drained_end_counter is unused now, nobody changes its value any more. It
|
||||
can be removed.
|
||||
|
||||
In cases where we had two almost identical functions that only differed
|
||||
in whether the caller passes drained_end_counter, or whether they would
|
||||
poll for a local drained_end_counter to reach 0, these become a single
|
||||
function.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Message-Id: <20221118174110.55183-5-kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 2f65df6e16dea2d6e7212fa675f4779d9281e26f)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 5 +-
|
||||
block/block-backend.c | 4 +-
|
||||
block/io.c | 98 ++++++++------------------------
|
||||
blockjob.c | 2 +-
|
||||
include/block/block-io.h | 24 --------
|
||||
include/block/block_int-common.h | 6 +-
|
||||
6 files changed, 30 insertions(+), 109 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 16a62a329c..7999fd08c5 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -1235,11 +1235,10 @@ static bool bdrv_child_cb_drained_poll(BdrvChild *child)
|
||||
return bdrv_drain_poll(bs, false, NULL, false);
|
||||
}
|
||||
|
||||
-static void bdrv_child_cb_drained_end(BdrvChild *child,
|
||||
- int *drained_end_counter)
|
||||
+static void bdrv_child_cb_drained_end(BdrvChild *child)
|
||||
{
|
||||
BlockDriverState *bs = child->opaque;
|
||||
- bdrv_drained_end_no_poll(bs, drained_end_counter);
|
||||
+ bdrv_drained_end(bs);
|
||||
}
|
||||
|
||||
static int bdrv_child_cb_inactivate(BdrvChild *child)
|
||||
diff --git a/block/block-backend.c b/block/block-backend.c
|
||||
index d98a96ff37..feaf2181fa 100644
|
||||
--- a/block/block-backend.c
|
||||
+++ b/block/block-backend.c
|
||||
@@ -129,7 +129,7 @@ static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format,
|
||||
}
|
||||
static void blk_root_drained_begin(BdrvChild *child);
|
||||
static bool blk_root_drained_poll(BdrvChild *child);
|
||||
-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter);
|
||||
+static void blk_root_drained_end(BdrvChild *child);
|
||||
|
||||
static void blk_root_change_media(BdrvChild *child, bool load);
|
||||
static void blk_root_resize(BdrvChild *child);
|
||||
@@ -2556,7 +2556,7 @@ static bool blk_root_drained_poll(BdrvChild *child)
|
||||
return busy || !!blk->in_flight;
|
||||
}
|
||||
|
||||
-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
|
||||
+static void blk_root_drained_end(BdrvChild *child)
|
||||
{
|
||||
BlockBackend *blk = child->opaque;
|
||||
assert(blk->quiesce_counter);
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index c2ed4b2af9..f4ca62b034 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -58,28 +58,19 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
|
||||
}
|
||||
}
|
||||
|
||||
-static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c,
|
||||
- int *drained_end_counter)
|
||||
+void bdrv_parent_drained_end_single(BdrvChild *c)
|
||||
{
|
||||
+ IO_OR_GS_CODE();
|
||||
+
|
||||
assert(c->parent_quiesce_counter > 0);
|
||||
c->parent_quiesce_counter--;
|
||||
if (c->klass->drained_end) {
|
||||
- c->klass->drained_end(c, drained_end_counter);
|
||||
+ c->klass->drained_end(c);
|
||||
}
|
||||
}
|
||||
|
||||
-void bdrv_parent_drained_end_single(BdrvChild *c)
|
||||
-{
|
||||
- int drained_end_counter = 0;
|
||||
- AioContext *ctx = bdrv_child_get_parent_aio_context(c);
|
||||
- IO_OR_GS_CODE();
|
||||
- bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter);
|
||||
- AIO_WAIT_WHILE(ctx, qatomic_read(&drained_end_counter) > 0);
|
||||
-}
|
||||
-
|
||||
static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
|
||||
- bool ignore_bds_parents,
|
||||
- int *drained_end_counter)
|
||||
+ bool ignore_bds_parents)
|
||||
{
|
||||
BdrvChild *c;
|
||||
|
||||
@@ -87,7 +78,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
|
||||
if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
|
||||
continue;
|
||||
}
|
||||
- bdrv_parent_drained_end_single_no_poll(c, drained_end_counter);
|
||||
+ bdrv_parent_drained_end_single(c);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -249,12 +240,10 @@ typedef struct {
|
||||
bool poll;
|
||||
BdrvChild *parent;
|
||||
bool ignore_bds_parents;
|
||||
- int *drained_end_counter;
|
||||
} BdrvCoDrainData;
|
||||
|
||||
/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */
|
||||
-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
|
||||
- int *drained_end_counter)
|
||||
+static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
|
||||
{
|
||||
if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) ||
|
||||
(!begin && !bs->drv->bdrv_drain_end)) {
|
||||
@@ -305,8 +294,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
|
||||
BdrvChild *parent, bool ignore_bds_parents,
|
||||
bool poll);
|
||||
static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
|
||||
- BdrvChild *parent, bool ignore_bds_parents,
|
||||
- int *drained_end_counter);
|
||||
+ BdrvChild *parent, bool ignore_bds_parents);
|
||||
|
||||
static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
{
|
||||
@@ -319,14 +307,12 @@ static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
aio_context_acquire(ctx);
|
||||
bdrv_dec_in_flight(bs);
|
||||
if (data->begin) {
|
||||
- assert(!data->drained_end_counter);
|
||||
bdrv_do_drained_begin(bs, data->recursive, data->parent,
|
||||
data->ignore_bds_parents, data->poll);
|
||||
} else {
|
||||
assert(!data->poll);
|
||||
bdrv_do_drained_end(bs, data->recursive, data->parent,
|
||||
- data->ignore_bds_parents,
|
||||
- data->drained_end_counter);
|
||||
+ data->ignore_bds_parents);
|
||||
}
|
||||
aio_context_release(ctx);
|
||||
} else {
|
||||
@@ -342,8 +328,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
bool begin, bool recursive,
|
||||
BdrvChild *parent,
|
||||
bool ignore_bds_parents,
|
||||
- bool poll,
|
||||
- int *drained_end_counter)
|
||||
+ bool poll)
|
||||
{
|
||||
BdrvCoDrainData data;
|
||||
Coroutine *self = qemu_coroutine_self();
|
||||
@@ -363,7 +348,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
.parent = parent,
|
||||
.ignore_bds_parents = ignore_bds_parents,
|
||||
.poll = poll,
|
||||
- .drained_end_counter = drained_end_counter,
|
||||
};
|
||||
|
||||
if (bs) {
|
||||
@@ -406,7 +390,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|
||||
}
|
||||
|
||||
bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
|
||||
- bdrv_drain_invoke(bs, true, NULL);
|
||||
+ bdrv_drain_invoke(bs, true);
|
||||
}
|
||||
|
||||
static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
|
||||
@@ -417,7 +401,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
|
||||
|
||||
if (qemu_in_coroutine()) {
|
||||
bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
|
||||
- poll, NULL);
|
||||
+ poll);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -461,38 +445,24 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs)
|
||||
|
||||
/**
|
||||
* This function does not poll, nor must any of its recursively called
|
||||
- * functions. The *drained_end_counter pointee will be incremented
|
||||
- * once for every background operation scheduled, and decremented once
|
||||
- * the operation settles. Therefore, the pointer must remain valid
|
||||
- * until the pointee reaches 0. That implies that whoever sets up the
|
||||
- * pointee has to poll until it is 0.
|
||||
- *
|
||||
- * We use atomic operations to access *drained_end_counter, because
|
||||
- * (1) when called from bdrv_set_aio_context_ignore(), the subgraph of
|
||||
- * @bs may contain nodes in different AioContexts,
|
||||
- * (2) bdrv_drain_all_end() uses the same counter for all nodes,
|
||||
- * regardless of which AioContext they are in.
|
||||
+ * functions.
|
||||
*/
|
||||
static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
|
||||
- BdrvChild *parent, bool ignore_bds_parents,
|
||||
- int *drained_end_counter)
|
||||
+ BdrvChild *parent, bool ignore_bds_parents)
|
||||
{
|
||||
BdrvChild *child;
|
||||
int old_quiesce_counter;
|
||||
|
||||
- assert(drained_end_counter != NULL);
|
||||
-
|
||||
if (qemu_in_coroutine()) {
|
||||
bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
|
||||
- false, drained_end_counter);
|
||||
+ false);
|
||||
return;
|
||||
}
|
||||
assert(bs->quiesce_counter > 0);
|
||||
|
||||
/* Re-enable things in child-to-parent order */
|
||||
- bdrv_drain_invoke(bs, false, drained_end_counter);
|
||||
- bdrv_parent_drained_end(bs, parent, ignore_bds_parents,
|
||||
- drained_end_counter);
|
||||
+ bdrv_drain_invoke(bs, false);
|
||||
+ bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
|
||||
|
||||
old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
|
||||
if (old_quiesce_counter == 1) {
|
||||
@@ -503,32 +473,21 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
|
||||
assert(!ignore_bds_parents);
|
||||
bs->recursive_quiesce_counter--;
|
||||
QLIST_FOREACH(child, &bs->children, next) {
|
||||
- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents,
|
||||
- drained_end_counter);
|
||||
+ bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void bdrv_drained_end(BlockDriverState *bs)
|
||||
{
|
||||
- int drained_end_counter = 0;
|
||||
IO_OR_GS_CODE();
|
||||
- bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter);
|
||||
- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
|
||||
-}
|
||||
-
|
||||
-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter)
|
||||
-{
|
||||
- IO_CODE();
|
||||
- bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter);
|
||||
+ bdrv_do_drained_end(bs, false, NULL, false);
|
||||
}
|
||||
|
||||
void bdrv_subtree_drained_end(BlockDriverState *bs)
|
||||
{
|
||||
- int drained_end_counter = 0;
|
||||
IO_OR_GS_CODE();
|
||||
- bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter);
|
||||
- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
|
||||
+ bdrv_do_drained_end(bs, true, NULL, false);
|
||||
}
|
||||
|
||||
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
|
||||
@@ -543,16 +502,12 @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
|
||||
|
||||
void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
|
||||
{
|
||||
- int drained_end_counter = 0;
|
||||
int i;
|
||||
IO_OR_GS_CODE();
|
||||
|
||||
for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
|
||||
- bdrv_do_drained_end(child->bs, true, child, false,
|
||||
- &drained_end_counter);
|
||||
+ bdrv_do_drained_end(child->bs, true, child, false);
|
||||
}
|
||||
-
|
||||
- BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0);
|
||||
}
|
||||
|
||||
void bdrv_drain(BlockDriverState *bs)
|
||||
@@ -610,7 +565,7 @@ void bdrv_drain_all_begin(void)
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
if (qemu_in_coroutine()) {
|
||||
- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
|
||||
+ bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -649,22 +604,19 @@ void bdrv_drain_all_begin(void)
|
||||
|
||||
void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
|
||||
{
|
||||
- int drained_end_counter = 0;
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
g_assert(bs->quiesce_counter > 0);
|
||||
g_assert(!bs->refcnt);
|
||||
|
||||
while (bs->quiesce_counter) {
|
||||
- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
|
||||
+ bdrv_do_drained_end(bs, false, NULL, true);
|
||||
}
|
||||
- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
|
||||
}
|
||||
|
||||
void bdrv_drain_all_end(void)
|
||||
{
|
||||
BlockDriverState *bs = NULL;
|
||||
- int drained_end_counter = 0;
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
/*
|
||||
@@ -680,13 +632,11 @@ void bdrv_drain_all_end(void)
|
||||
AioContext *aio_context = bdrv_get_aio_context(bs);
|
||||
|
||||
aio_context_acquire(aio_context);
|
||||
- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
|
||||
+ bdrv_do_drained_end(bs, false, NULL, true);
|
||||
aio_context_release(aio_context);
|
||||
}
|
||||
|
||||
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
|
||||
- AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0);
|
||||
-
|
||||
assert(bdrv_drain_all_count > 0);
|
||||
bdrv_drain_all_count--;
|
||||
}
|
||||
diff --git a/blockjob.c b/blockjob.c
|
||||
index f51d4e18f3..0ab721e139 100644
|
||||
--- a/blockjob.c
|
||||
+++ b/blockjob.c
|
||||
@@ -120,7 +120,7 @@ static bool child_job_drained_poll(BdrvChild *c)
|
||||
}
|
||||
}
|
||||
|
||||
-static void child_job_drained_end(BdrvChild *c, int *drained_end_counter)
|
||||
+static void child_job_drained_end(BdrvChild *c)
|
||||
{
|
||||
BlockJob *job = c->opaque;
|
||||
job_resume(&job->job);
|
||||
diff --git a/include/block/block-io.h b/include/block/block-io.h
|
||||
index b099d7db45..054e964c9b 100644
|
||||
--- a/include/block/block-io.h
|
||||
+++ b/include/block/block-io.h
|
||||
@@ -237,21 +237,6 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
|
||||
int64_t bytes, BdrvRequestFlags read_flags,
|
||||
BdrvRequestFlags write_flags);
|
||||
|
||||
-/**
|
||||
- * bdrv_drained_end_no_poll:
|
||||
- *
|
||||
- * Same as bdrv_drained_end(), but do not poll for the subgraph to
|
||||
- * actually become unquiesced. Therefore, no graph changes will occur
|
||||
- * with this function.
|
||||
- *
|
||||
- * *drained_end_counter is incremented for every background operation
|
||||
- * that is scheduled, and will be decremented for every operation once
|
||||
- * it settles. The caller must poll until it reaches 0. The counter
|
||||
- * should be accessed using atomic operations only.
|
||||
- */
|
||||
-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter);
|
||||
-
|
||||
-
|
||||
/*
|
||||
* "I/O or GS" API functions. These functions can run without
|
||||
* the BQL, but only in one specific iothread/main loop.
|
||||
@@ -311,9 +296,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
|
||||
* bdrv_parent_drained_end_single:
|
||||
*
|
||||
* End a quiesced section for the parent of @c.
|
||||
- *
|
||||
- * This polls @bs's AioContext until all scheduled sub-drained_ends
|
||||
- * have settled, which may result in graph changes.
|
||||
*/
|
||||
void bdrv_parent_drained_end_single(BdrvChild *c);
|
||||
|
||||
@@ -361,12 +343,6 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs);
|
||||
* bdrv_drained_end:
|
||||
*
|
||||
* End a quiescent section started by bdrv_drained_begin().
|
||||
- *
|
||||
- * This polls @bs's AioContext until all scheduled sub-drained_ends
|
||||
- * have settled. On one hand, that may result in graph changes. On
|
||||
- * the other, this requires that the caller either runs in the main
|
||||
- * loop; or that all involved nodes (@bs and all of its parents) are
|
||||
- * in the caller's AioContext.
|
||||
*/
|
||||
void bdrv_drained_end(BlockDriverState *bs);
|
||||
|
||||
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
|
||||
index 40d646d1ed..2b97576f6d 100644
|
||||
--- a/include/block/block_int-common.h
|
||||
+++ b/include/block/block_int-common.h
|
||||
@@ -939,15 +939,11 @@ struct BdrvChildClass {
|
||||
* These functions must not change the graph (and therefore also must not
|
||||
* call aio_poll(), which could change the graph indirectly).
|
||||
*
|
||||
- * If drained_end() schedules background operations, it must atomically
|
||||
- * increment *drained_end_counter for each such operation and atomically
|
||||
- * decrement it once the operation has settled.
|
||||
- *
|
||||
* Note that this can be nested. If drained_begin() was called twice, new
|
||||
* I/O is allowed only after drained_end() was called twice, too.
|
||||
*/
|
||||
void (*drained_begin)(BdrvChild *child);
|
||||
- void (*drained_end)(BdrvChild *child, int *drained_end_counter);
|
||||
+ void (*drained_end)(BdrvChild *child);
|
||||
|
||||
/*
|
||||
* Returns whether the parent has pending requests for the child. This
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,274 @@
|
||||
From 0dc7990533cef41e58579ee96315aca1fdc44ea1 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:07 +0100
|
||||
Subject: [PATCH 25/31] block: Remove ignore_bds_parents parameter from
|
||||
drain_begin/end.
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [13/16] 1ed88d975a9569bffeb33ad847874417780ce408 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
ignore_bds_parents is now ignored during drain_begin and drain_end, so
|
||||
we can just remove it there. It is still a valid optimisation for
|
||||
drain_all in bdrv_drained_poll(), so leave it around there.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20221118174110.55183-13-kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit a82a3bd135078d14f1bb4b5e50f51e77d3748270)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 2 +-
|
||||
block/io.c | 58 +++++++++++++++-------------------------
|
||||
include/block/block-io.h | 3 +--
|
||||
3 files changed, 24 insertions(+), 39 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 5a583e260d..af31a94863 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -1226,7 +1226,7 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c)
|
||||
static void bdrv_child_cb_drained_begin(BdrvChild *child)
|
||||
{
|
||||
BlockDriverState *bs = child->opaque;
|
||||
- bdrv_do_drained_begin_quiesce(bs, NULL, false);
|
||||
+ bdrv_do_drained_begin_quiesce(bs, NULL);
|
||||
}
|
||||
|
||||
static bool bdrv_child_cb_drained_poll(BdrvChild *child)
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 87d6f22ec4..2e9503df6a 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -45,13 +45,12 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs);
|
||||
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
|
||||
int64_t offset, int64_t bytes, BdrvRequestFlags flags);
|
||||
|
||||
-static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
|
||||
- bool ignore_bds_parents)
|
||||
+static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
|
||||
{
|
||||
BdrvChild *c, *next;
|
||||
|
||||
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
|
||||
- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
|
||||
+ if (c == ignore) {
|
||||
continue;
|
||||
}
|
||||
bdrv_parent_drained_begin_single(c, false);
|
||||
@@ -70,13 +69,12 @@ void bdrv_parent_drained_end_single(BdrvChild *c)
|
||||
}
|
||||
}
|
||||
|
||||
-static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
|
||||
- bool ignore_bds_parents)
|
||||
+static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
|
||||
{
|
||||
BdrvChild *c;
|
||||
|
||||
QLIST_FOREACH(c, &bs->parents, next_parent) {
|
||||
- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
|
||||
+ if (c == ignore) {
|
||||
continue;
|
||||
}
|
||||
bdrv_parent_drained_end_single(c);
|
||||
@@ -242,7 +240,6 @@ typedef struct {
|
||||
bool begin;
|
||||
bool poll;
|
||||
BdrvChild *parent;
|
||||
- bool ignore_bds_parents;
|
||||
} BdrvCoDrainData;
|
||||
|
||||
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
|
||||
@@ -269,9 +266,8 @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
|
||||
}
|
||||
|
||||
static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
|
||||
- bool ignore_bds_parents, bool poll);
|
||||
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
|
||||
- bool ignore_bds_parents);
|
||||
+ bool poll);
|
||||
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent);
|
||||
|
||||
static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
{
|
||||
@@ -284,11 +280,10 @@ static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
aio_context_acquire(ctx);
|
||||
bdrv_dec_in_flight(bs);
|
||||
if (data->begin) {
|
||||
- bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents,
|
||||
- data->poll);
|
||||
+ bdrv_do_drained_begin(bs, data->parent, data->poll);
|
||||
} else {
|
||||
assert(!data->poll);
|
||||
- bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents);
|
||||
+ bdrv_do_drained_end(bs, data->parent);
|
||||
}
|
||||
aio_context_release(ctx);
|
||||
} else {
|
||||
@@ -303,7 +298,6 @@ static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
bool begin,
|
||||
BdrvChild *parent,
|
||||
- bool ignore_bds_parents,
|
||||
bool poll)
|
||||
{
|
||||
BdrvCoDrainData data;
|
||||
@@ -321,7 +315,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
.done = false,
|
||||
.begin = begin,
|
||||
.parent = parent,
|
||||
- .ignore_bds_parents = ignore_bds_parents,
|
||||
.poll = poll,
|
||||
};
|
||||
|
||||
@@ -353,8 +346,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
}
|
||||
}
|
||||
|
||||
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|
||||
- BdrvChild *parent, bool ignore_bds_parents)
|
||||
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
|
||||
{
|
||||
IO_OR_GS_CODE();
|
||||
assert(!qemu_in_coroutine());
|
||||
@@ -362,9 +354,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|
||||
/* Stop things in parent-to-child order */
|
||||
if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
|
||||
aio_disable_external(bdrv_get_aio_context(bs));
|
||||
-
|
||||
- /* TODO Remove ignore_bds_parents, we don't consider it any more */
|
||||
- bdrv_parent_drained_begin(bs, parent, false);
|
||||
+ bdrv_parent_drained_begin(bs, parent);
|
||||
if (bs->drv && bs->drv->bdrv_drain_begin) {
|
||||
bs->drv->bdrv_drain_begin(bs);
|
||||
}
|
||||
@@ -372,14 +362,14 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|
||||
}
|
||||
|
||||
static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
|
||||
- bool ignore_bds_parents, bool poll)
|
||||
+ bool poll)
|
||||
{
|
||||
if (qemu_in_coroutine()) {
|
||||
- bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll);
|
||||
+ bdrv_co_yield_to_drain(bs, true, parent, poll);
|
||||
return;
|
||||
}
|
||||
|
||||
- bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
|
||||
+ bdrv_do_drained_begin_quiesce(bs, parent);
|
||||
|
||||
/*
|
||||
* Wait for drained requests to finish.
|
||||
@@ -391,7 +381,6 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
|
||||
* nodes.
|
||||
*/
|
||||
if (poll) {
|
||||
- assert(!ignore_bds_parents);
|
||||
BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
|
||||
}
|
||||
}
|
||||
@@ -399,20 +388,19 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
|
||||
void bdrv_drained_begin(BlockDriverState *bs)
|
||||
{
|
||||
IO_OR_GS_CODE();
|
||||
- bdrv_do_drained_begin(bs, NULL, false, true);
|
||||
+ bdrv_do_drained_begin(bs, NULL, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* This function does not poll, nor must any of its recursively called
|
||||
* functions.
|
||||
*/
|
||||
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
|
||||
- bool ignore_bds_parents)
|
||||
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
|
||||
{
|
||||
int old_quiesce_counter;
|
||||
|
||||
if (qemu_in_coroutine()) {
|
||||
- bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false);
|
||||
+ bdrv_co_yield_to_drain(bs, false, parent, false);
|
||||
return;
|
||||
}
|
||||
assert(bs->quiesce_counter > 0);
|
||||
@@ -423,9 +411,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
|
||||
if (bs->drv && bs->drv->bdrv_drain_end) {
|
||||
bs->drv->bdrv_drain_end(bs);
|
||||
}
|
||||
- /* TODO Remove ignore_bds_parents, we don't consider it any more */
|
||||
- bdrv_parent_drained_end(bs, parent, false);
|
||||
-
|
||||
+ bdrv_parent_drained_end(bs, parent);
|
||||
aio_enable_external(bdrv_get_aio_context(bs));
|
||||
}
|
||||
}
|
||||
@@ -433,7 +419,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
|
||||
void bdrv_drained_end(BlockDriverState *bs)
|
||||
{
|
||||
IO_OR_GS_CODE();
|
||||
- bdrv_do_drained_end(bs, NULL, false);
|
||||
+ bdrv_do_drained_end(bs, NULL);
|
||||
}
|
||||
|
||||
void bdrv_drain(BlockDriverState *bs)
|
||||
@@ -491,7 +477,7 @@ void bdrv_drain_all_begin(void)
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
if (qemu_in_coroutine()) {
|
||||
- bdrv_co_yield_to_drain(NULL, true, NULL, true, true);
|
||||
+ bdrv_co_yield_to_drain(NULL, true, NULL, true);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -516,7 +502,7 @@ void bdrv_drain_all_begin(void)
|
||||
AioContext *aio_context = bdrv_get_aio_context(bs);
|
||||
|
||||
aio_context_acquire(aio_context);
|
||||
- bdrv_do_drained_begin(bs, NULL, true, false);
|
||||
+ bdrv_do_drained_begin(bs, NULL, false);
|
||||
aio_context_release(aio_context);
|
||||
}
|
||||
|
||||
@@ -536,7 +522,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
|
||||
g_assert(!bs->refcnt);
|
||||
|
||||
while (bs->quiesce_counter) {
|
||||
- bdrv_do_drained_end(bs, NULL, true);
|
||||
+ bdrv_do_drained_end(bs, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -558,7 +544,7 @@ void bdrv_drain_all_end(void)
|
||||
AioContext *aio_context = bdrv_get_aio_context(bs);
|
||||
|
||||
aio_context_acquire(aio_context);
|
||||
- bdrv_do_drained_end(bs, NULL, true);
|
||||
+ bdrv_do_drained_end(bs, NULL);
|
||||
aio_context_release(aio_context);
|
||||
}
|
||||
|
||||
diff --git a/include/block/block-io.h b/include/block/block-io.h
|
||||
index 9c36a16a1f..8f5e75756a 100644
|
||||
--- a/include/block/block-io.h
|
||||
+++ b/include/block/block-io.h
|
||||
@@ -329,8 +329,7 @@ void bdrv_drained_begin(BlockDriverState *bs);
|
||||
* Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
|
||||
* running requests to complete.
|
||||
*/
|
||||
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|
||||
- BdrvChild *parent, bool ignore_bds_parents);
|
||||
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent);
|
||||
|
||||
/**
|
||||
* bdrv_drained_end:
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,106 @@
|
||||
From 60b66881fb972e1cdff1cd7b4c865e5e21c141b0 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:10 +0100
|
||||
Subject: [PATCH 28/31] block: Remove poll parameter from
|
||||
bdrv_parent_drained_begin_single()
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [16/16] fd526cc9e5bebeb256cfa56d23ec596f26caa37a (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
All callers of bdrv_parent_drained_begin_single() pass poll=false now,
|
||||
so we don't need the parameter any more.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20221118174110.55183-16-kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 606ed756c1d69cba4822be8923248d2fd714f069)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 4 ++--
|
||||
block/io.c | 8 ++------
|
||||
include/block/block-io.h | 5 ++---
|
||||
3 files changed, 6 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 65588d313a..0d78711416 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -2417,7 +2417,7 @@ static void bdrv_replace_child_abort(void *opaque)
|
||||
* new_bs drained when calling bdrv_replace_child_tran() is not a
|
||||
* requirement any more.
|
||||
*/
|
||||
- bdrv_parent_drained_begin_single(s->child, false);
|
||||
+ bdrv_parent_drained_begin_single(s->child);
|
||||
assert(!bdrv_parent_drained_poll_single(s->child));
|
||||
}
|
||||
assert(s->child->quiesced_parent);
|
||||
@@ -3059,7 +3059,7 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
|
||||
* a problem, we already did this), but it will still poll until the parent
|
||||
* is fully quiesced, so it will not be negatively affected either.
|
||||
*/
|
||||
- bdrv_parent_drained_begin_single(new_child, false);
|
||||
+ bdrv_parent_drained_begin_single(new_child);
|
||||
bdrv_replace_child_noperm(new_child, child_bs);
|
||||
|
||||
BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index ae64830eac..38e57d1f67 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -53,7 +53,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
|
||||
if (c == ignore) {
|
||||
continue;
|
||||
}
|
||||
- bdrv_parent_drained_begin_single(c, false);
|
||||
+ bdrv_parent_drained_begin_single(c);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,9 +105,8 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
|
||||
return busy;
|
||||
}
|
||||
|
||||
-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
|
||||
+void bdrv_parent_drained_begin_single(BdrvChild *c)
|
||||
{
|
||||
- AioContext *ctx = bdrv_child_get_parent_aio_context(c);
|
||||
IO_OR_GS_CODE();
|
||||
|
||||
assert(!c->quiesced_parent);
|
||||
@@ -116,9 +115,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
|
||||
if (c->klass->drained_begin) {
|
||||
c->klass->drained_begin(c);
|
||||
}
|
||||
- if (poll) {
|
||||
- AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c));
|
||||
- }
|
||||
}
|
||||
|
||||
static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
|
||||
diff --git a/include/block/block-io.h b/include/block/block-io.h
|
||||
index 65e6d2569b..92aaa7c1e9 100644
|
||||
--- a/include/block/block-io.h
|
||||
+++ b/include/block/block-io.h
|
||||
@@ -287,10 +287,9 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
|
||||
/**
|
||||
* bdrv_parent_drained_begin_single:
|
||||
*
|
||||
- * Begin a quiesced section for the parent of @c. If @poll is true, wait for
|
||||
- * any pending activity to cease.
|
||||
+ * Begin a quiesced section for the parent of @c.
|
||||
*/
|
||||
-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
|
||||
+void bdrv_parent_drained_begin_single(BdrvChild *c);
|
||||
|
||||
/**
|
||||
* bdrv_parent_drained_poll_single:
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,896 @@
|
||||
From 79063522861cb2baf921b204bcdf4c3bfb5697f4 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:41:05 +0100
|
||||
Subject: [PATCH 23/31] block: Remove subtree drains
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [11/16] d92f5041cceeeec49a65441b22d20f692c0f1c77 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Subtree drains are not used any more. Remove them.
|
||||
|
||||
After this, BdrvChildClass.attach/detach() don't poll any more.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20221118174110.55183-11-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 299403aedaeb7f08d8e98aa8614b29d4e5546066)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 20 +--
|
||||
block/io.c | 121 +++-----------
|
||||
include/block/block-io.h | 18 +--
|
||||
include/block/block_int-common.h | 1 -
|
||||
include/block/block_int-io.h | 12 --
|
||||
tests/unit/test-bdrv-drain.c | 261 ++-----------------------------
|
||||
6 files changed, 44 insertions(+), 389 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 5330e89903..e0e3b21790 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -1232,7 +1232,7 @@ static void bdrv_child_cb_drained_begin(BdrvChild *child)
|
||||
static bool bdrv_child_cb_drained_poll(BdrvChild *child)
|
||||
{
|
||||
BlockDriverState *bs = child->opaque;
|
||||
- return bdrv_drain_poll(bs, false, NULL, false);
|
||||
+ return bdrv_drain_poll(bs, NULL, false);
|
||||
}
|
||||
|
||||
static void bdrv_child_cb_drained_end(BdrvChild *child)
|
||||
@@ -1482,8 +1482,6 @@ static void bdrv_child_cb_attach(BdrvChild *child)
|
||||
assert(!bs->file);
|
||||
bs->file = child;
|
||||
}
|
||||
-
|
||||
- bdrv_apply_subtree_drain(child, bs);
|
||||
}
|
||||
|
||||
static void bdrv_child_cb_detach(BdrvChild *child)
|
||||
@@ -1494,8 +1492,6 @@ static void bdrv_child_cb_detach(BdrvChild *child)
|
||||
bdrv_backing_detach(child);
|
||||
}
|
||||
|
||||
- bdrv_unapply_subtree_drain(child, bs);
|
||||
-
|
||||
assert_bdrv_graph_writable(bs);
|
||||
QLIST_REMOVE(child, next);
|
||||
if (child == bs->backing) {
|
||||
@@ -2851,9 +2847,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
}
|
||||
|
||||
if (old_bs) {
|
||||
- /* Detach first so that the recursive drain sections coming from @child
|
||||
- * are already gone and we only end the drain sections that came from
|
||||
- * elsewhere. */
|
||||
if (child->klass->detach) {
|
||||
child->klass->detach(child);
|
||||
}
|
||||
@@ -2868,17 +2861,14 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
|
||||
|
||||
/*
|
||||
- * Detaching the old node may have led to the new node's
|
||||
- * quiesce_counter having been decreased. Not a problem, we
|
||||
- * just need to recognize this here and then invoke
|
||||
- * drained_end appropriately more often.
|
||||
+ * Polling in bdrv_parent_drained_begin_single() may have led to the new
|
||||
+ * node's quiesce_counter having been decreased. Not a problem, we just
|
||||
+ * need to recognize this here and then invoke drained_end appropriately
|
||||
+ * more often.
|
||||
*/
|
||||
assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
|
||||
drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
|
||||
|
||||
- /* Attach only after starting new drained sections, so that recursive
|
||||
- * drain sections coming from @child don't get an extra .drained_begin
|
||||
- * callback. */
|
||||
if (child->klass->attach) {
|
||||
child->klass->attach(child);
|
||||
}
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index a25103be6f..75224480d0 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -236,17 +236,15 @@ typedef struct {
|
||||
BlockDriverState *bs;
|
||||
bool done;
|
||||
bool begin;
|
||||
- bool recursive;
|
||||
bool poll;
|
||||
BdrvChild *parent;
|
||||
bool ignore_bds_parents;
|
||||
} BdrvCoDrainData;
|
||||
|
||||
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
|
||||
-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
|
||||
- BdrvChild *ignore_parent, bool ignore_bds_parents)
|
||||
+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
|
||||
+ bool ignore_bds_parents)
|
||||
{
|
||||
- BdrvChild *child, *next;
|
||||
IO_OR_GS_CODE();
|
||||
|
||||
if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
|
||||
@@ -257,29 +255,19 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
|
||||
return true;
|
||||
}
|
||||
|
||||
- if (recursive) {
|
||||
- assert(!ignore_bds_parents);
|
||||
- QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
|
||||
- if (bdrv_drain_poll(child->bs, recursive, child, false)) {
|
||||
- return true;
|
||||
- }
|
||||
- }
|
||||
- }
|
||||
-
|
||||
return false;
|
||||
}
|
||||
|
||||
-static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
|
||||
+static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
|
||||
BdrvChild *ignore_parent)
|
||||
{
|
||||
- return bdrv_drain_poll(bs, recursive, ignore_parent, false);
|
||||
+ return bdrv_drain_poll(bs, ignore_parent, false);
|
||||
}
|
||||
|
||||
-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
|
||||
- BdrvChild *parent, bool ignore_bds_parents,
|
||||
- bool poll);
|
||||
-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
|
||||
- BdrvChild *parent, bool ignore_bds_parents);
|
||||
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
|
||||
+ bool ignore_bds_parents, bool poll);
|
||||
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
|
||||
+ bool ignore_bds_parents);
|
||||
|
||||
static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
{
|
||||
@@ -292,12 +280,11 @@ static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
aio_context_acquire(ctx);
|
||||
bdrv_dec_in_flight(bs);
|
||||
if (data->begin) {
|
||||
- bdrv_do_drained_begin(bs, data->recursive, data->parent,
|
||||
- data->ignore_bds_parents, data->poll);
|
||||
+ bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents,
|
||||
+ data->poll);
|
||||
} else {
|
||||
assert(!data->poll);
|
||||
- bdrv_do_drained_end(bs, data->recursive, data->parent,
|
||||
- data->ignore_bds_parents);
|
||||
+ bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents);
|
||||
}
|
||||
aio_context_release(ctx);
|
||||
} else {
|
||||
@@ -310,7 +297,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
}
|
||||
|
||||
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
- bool begin, bool recursive,
|
||||
+ bool begin,
|
||||
BdrvChild *parent,
|
||||
bool ignore_bds_parents,
|
||||
bool poll)
|
||||
@@ -329,7 +316,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
.bs = bs,
|
||||
.done = false,
|
||||
.begin = begin,
|
||||
- .recursive = recursive,
|
||||
.parent = parent,
|
||||
.ignore_bds_parents = ignore_bds_parents,
|
||||
.poll = poll,
|
||||
@@ -380,29 +366,16 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|
||||
}
|
||||
}
|
||||
|
||||
-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
|
||||
- BdrvChild *parent, bool ignore_bds_parents,
|
||||
- bool poll)
|
||||
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
|
||||
+ bool ignore_bds_parents, bool poll)
|
||||
{
|
||||
- BdrvChild *child, *next;
|
||||
-
|
||||
if (qemu_in_coroutine()) {
|
||||
- bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
|
||||
- poll);
|
||||
+ bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll);
|
||||
return;
|
||||
}
|
||||
|
||||
bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
|
||||
|
||||
- if (recursive) {
|
||||
- assert(!ignore_bds_parents);
|
||||
- bs->recursive_quiesce_counter++;
|
||||
- QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
|
||||
- bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
|
||||
- false);
|
||||
- }
|
||||
- }
|
||||
-
|
||||
/*
|
||||
* Wait for drained requests to finish.
|
||||
*
|
||||
@@ -414,35 +387,27 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
|
||||
*/
|
||||
if (poll) {
|
||||
assert(!ignore_bds_parents);
|
||||
- BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
|
||||
+ BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
|
||||
}
|
||||
}
|
||||
|
||||
void bdrv_drained_begin(BlockDriverState *bs)
|
||||
{
|
||||
IO_OR_GS_CODE();
|
||||
- bdrv_do_drained_begin(bs, false, NULL, false, true);
|
||||
-}
|
||||
-
|
||||
-void bdrv_subtree_drained_begin(BlockDriverState *bs)
|
||||
-{
|
||||
- IO_OR_GS_CODE();
|
||||
- bdrv_do_drained_begin(bs, true, NULL, false, true);
|
||||
+ bdrv_do_drained_begin(bs, NULL, false, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* This function does not poll, nor must any of its recursively called
|
||||
* functions.
|
||||
*/
|
||||
-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
|
||||
- BdrvChild *parent, bool ignore_bds_parents)
|
||||
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
|
||||
+ bool ignore_bds_parents)
|
||||
{
|
||||
- BdrvChild *child;
|
||||
int old_quiesce_counter;
|
||||
|
||||
if (qemu_in_coroutine()) {
|
||||
- bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
|
||||
- false);
|
||||
+ bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false);
|
||||
return;
|
||||
}
|
||||
assert(bs->quiesce_counter > 0);
|
||||
@@ -457,46 +422,12 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
|
||||
if (old_quiesce_counter == 1) {
|
||||
aio_enable_external(bdrv_get_aio_context(bs));
|
||||
}
|
||||
-
|
||||
- if (recursive) {
|
||||
- assert(!ignore_bds_parents);
|
||||
- bs->recursive_quiesce_counter--;
|
||||
- QLIST_FOREACH(child, &bs->children, next) {
|
||||
- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents);
|
||||
- }
|
||||
- }
|
||||
}
|
||||
|
||||
void bdrv_drained_end(BlockDriverState *bs)
|
||||
{
|
||||
IO_OR_GS_CODE();
|
||||
- bdrv_do_drained_end(bs, false, NULL, false);
|
||||
-}
|
||||
-
|
||||
-void bdrv_subtree_drained_end(BlockDriverState *bs)
|
||||
-{
|
||||
- IO_OR_GS_CODE();
|
||||
- bdrv_do_drained_end(bs, true, NULL, false);
|
||||
-}
|
||||
-
|
||||
-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
|
||||
-{
|
||||
- int i;
|
||||
- IO_OR_GS_CODE();
|
||||
-
|
||||
- for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
|
||||
- bdrv_do_drained_begin(child->bs, true, child, false, true);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
|
||||
-{
|
||||
- int i;
|
||||
- IO_OR_GS_CODE();
|
||||
-
|
||||
- for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
|
||||
- bdrv_do_drained_end(child->bs, true, child, false);
|
||||
- }
|
||||
+ bdrv_do_drained_end(bs, NULL, false);
|
||||
}
|
||||
|
||||
void bdrv_drain(BlockDriverState *bs)
|
||||
@@ -529,7 +460,7 @@ static bool bdrv_drain_all_poll(void)
|
||||
while ((bs = bdrv_next_all_states(bs))) {
|
||||
AioContext *aio_context = bdrv_get_aio_context(bs);
|
||||
aio_context_acquire(aio_context);
|
||||
- result |= bdrv_drain_poll(bs, false, NULL, true);
|
||||
+ result |= bdrv_drain_poll(bs, NULL, true);
|
||||
aio_context_release(aio_context);
|
||||
}
|
||||
|
||||
@@ -554,7 +485,7 @@ void bdrv_drain_all_begin(void)
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
if (qemu_in_coroutine()) {
|
||||
- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true);
|
||||
+ bdrv_co_yield_to_drain(NULL, true, NULL, true, true);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -579,7 +510,7 @@ void bdrv_drain_all_begin(void)
|
||||
AioContext *aio_context = bdrv_get_aio_context(bs);
|
||||
|
||||
aio_context_acquire(aio_context);
|
||||
- bdrv_do_drained_begin(bs, false, NULL, true, false);
|
||||
+ bdrv_do_drained_begin(bs, NULL, true, false);
|
||||
aio_context_release(aio_context);
|
||||
}
|
||||
|
||||
@@ -599,7 +530,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
|
||||
g_assert(!bs->refcnt);
|
||||
|
||||
while (bs->quiesce_counter) {
|
||||
- bdrv_do_drained_end(bs, false, NULL, true);
|
||||
+ bdrv_do_drained_end(bs, NULL, true);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -621,7 +552,7 @@ void bdrv_drain_all_end(void)
|
||||
AioContext *aio_context = bdrv_get_aio_context(bs);
|
||||
|
||||
aio_context_acquire(aio_context);
|
||||
- bdrv_do_drained_end(bs, false, NULL, true);
|
||||
+ bdrv_do_drained_end(bs, NULL, true);
|
||||
aio_context_release(aio_context);
|
||||
}
|
||||
|
||||
diff --git a/include/block/block-io.h b/include/block/block-io.h
|
||||
index 054e964c9b..9c36a16a1f 100644
|
||||
--- a/include/block/block-io.h
|
||||
+++ b/include/block/block-io.h
|
||||
@@ -302,8 +302,7 @@ void bdrv_parent_drained_end_single(BdrvChild *c);
|
||||
/**
|
||||
* bdrv_drain_poll:
|
||||
*
|
||||
- * Poll for pending requests in @bs, its parents (except for @ignore_parent),
|
||||
- * and if @recursive is true its children as well (used for subtree drain).
|
||||
+ * Poll for pending requests in @bs and its parents (except for @ignore_parent).
|
||||
*
|
||||
* If @ignore_bds_parents is true, parents that are BlockDriverStates must
|
||||
* ignore the drain request because they will be drained separately (used for
|
||||
@@ -311,8 +310,8 @@ void bdrv_parent_drained_end_single(BdrvChild *c);
|
||||
*
|
||||
* This is part of bdrv_drained_begin.
|
||||
*/
|
||||
-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
|
||||
- BdrvChild *ignore_parent, bool ignore_bds_parents);
|
||||
+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
|
||||
+ bool ignore_bds_parents);
|
||||
|
||||
/**
|
||||
* bdrv_drained_begin:
|
||||
@@ -333,12 +332,6 @@ void bdrv_drained_begin(BlockDriverState *bs);
|
||||
void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|
||||
BdrvChild *parent, bool ignore_bds_parents);
|
||||
|
||||
-/**
|
||||
- * Like bdrv_drained_begin, but recursively begins a quiesced section for
|
||||
- * exclusive access to all child nodes as well.
|
||||
- */
|
||||
-void bdrv_subtree_drained_begin(BlockDriverState *bs);
|
||||
-
|
||||
/**
|
||||
* bdrv_drained_end:
|
||||
*
|
||||
@@ -346,9 +339,4 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs);
|
||||
*/
|
||||
void bdrv_drained_end(BlockDriverState *bs);
|
||||
|
||||
-/**
|
||||
- * End a quiescent section started by bdrv_subtree_drained_begin().
|
||||
- */
|
||||
-void bdrv_subtree_drained_end(BlockDriverState *bs);
|
||||
-
|
||||
#endif /* BLOCK_IO_H */
|
||||
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
|
||||
index 2b97576f6d..791dddfd7d 100644
|
||||
--- a/include/block/block_int-common.h
|
||||
+++ b/include/block/block_int-common.h
|
||||
@@ -1184,7 +1184,6 @@ struct BlockDriverState {
|
||||
|
||||
/* Accessed with atomic ops. */
|
||||
int quiesce_counter;
|
||||
- int recursive_quiesce_counter;
|
||||
|
||||
unsigned int write_gen; /* Current data generation */
|
||||
|
||||
diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
|
||||
index 4b0b3e17ef..8bc061ebb8 100644
|
||||
--- a/include/block/block_int-io.h
|
||||
+++ b/include/block/block_int-io.h
|
||||
@@ -179,16 +179,4 @@ void bdrv_bsc_invalidate_range(BlockDriverState *bs,
|
||||
*/
|
||||
void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes);
|
||||
|
||||
-
|
||||
-/*
|
||||
- * "I/O or GS" API functions. These functions can run without
|
||||
- * the BQL, but only in one specific iothread/main loop.
|
||||
- *
|
||||
- * See include/block/block-io.h for more information about
|
||||
- * the "I/O or GS" API.
|
||||
- */
|
||||
-
|
||||
-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
|
||||
-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
|
||||
-
|
||||
#endif /* BLOCK_INT_IO_H */
|
||||
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
|
||||
index 695519ee02..dda08de8db 100644
|
||||
--- a/tests/unit/test-bdrv-drain.c
|
||||
+++ b/tests/unit/test-bdrv-drain.c
|
||||
@@ -156,7 +156,6 @@ static void call_in_coroutine(void (*entry)(void))
|
||||
enum drain_type {
|
||||
BDRV_DRAIN_ALL,
|
||||
BDRV_DRAIN,
|
||||
- BDRV_SUBTREE_DRAIN,
|
||||
DRAIN_TYPE_MAX,
|
||||
};
|
||||
|
||||
@@ -165,7 +164,6 @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
|
||||
switch (drain_type) {
|
||||
case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break;
|
||||
case BDRV_DRAIN: bdrv_drained_begin(bs); break;
|
||||
- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break;
|
||||
default: g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
@@ -175,7 +173,6 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
|
||||
switch (drain_type) {
|
||||
case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break;
|
||||
case BDRV_DRAIN: bdrv_drained_end(bs); break;
|
||||
- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break;
|
||||
default: g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
@@ -271,11 +268,6 @@ static void test_drv_cb_drain(void)
|
||||
test_drv_cb_common(BDRV_DRAIN, false);
|
||||
}
|
||||
|
||||
-static void test_drv_cb_drain_subtree(void)
|
||||
-{
|
||||
- test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
|
||||
-}
|
||||
-
|
||||
static void test_drv_cb_co_drain_all(void)
|
||||
{
|
||||
call_in_coroutine(test_drv_cb_drain_all);
|
||||
@@ -286,11 +278,6 @@ static void test_drv_cb_co_drain(void)
|
||||
call_in_coroutine(test_drv_cb_drain);
|
||||
}
|
||||
|
||||
-static void test_drv_cb_co_drain_subtree(void)
|
||||
-{
|
||||
- call_in_coroutine(test_drv_cb_drain_subtree);
|
||||
-}
|
||||
-
|
||||
static void test_quiesce_common(enum drain_type drain_type, bool recursive)
|
||||
{
|
||||
BlockBackend *blk;
|
||||
@@ -332,11 +319,6 @@ static void test_quiesce_drain(void)
|
||||
test_quiesce_common(BDRV_DRAIN, false);
|
||||
}
|
||||
|
||||
-static void test_quiesce_drain_subtree(void)
|
||||
-{
|
||||
- test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
|
||||
-}
|
||||
-
|
||||
static void test_quiesce_co_drain_all(void)
|
||||
{
|
||||
call_in_coroutine(test_quiesce_drain_all);
|
||||
@@ -347,11 +329,6 @@ static void test_quiesce_co_drain(void)
|
||||
call_in_coroutine(test_quiesce_drain);
|
||||
}
|
||||
|
||||
-static void test_quiesce_co_drain_subtree(void)
|
||||
-{
|
||||
- call_in_coroutine(test_quiesce_drain_subtree);
|
||||
-}
|
||||
-
|
||||
static void test_nested(void)
|
||||
{
|
||||
BlockBackend *blk;
|
||||
@@ -402,158 +379,6 @@ static void test_nested(void)
|
||||
blk_unref(blk);
|
||||
}
|
||||
|
||||
-static void test_multiparent(void)
|
||||
-{
|
||||
- BlockBackend *blk_a, *blk_b;
|
||||
- BlockDriverState *bs_a, *bs_b, *backing;
|
||||
- BDRVTestState *a_s, *b_s, *backing_s;
|
||||
-
|
||||
- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
|
||||
- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
|
||||
- &error_abort);
|
||||
- a_s = bs_a->opaque;
|
||||
- blk_insert_bs(blk_a, bs_a, &error_abort);
|
||||
-
|
||||
- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
|
||||
- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
|
||||
- &error_abort);
|
||||
- b_s = bs_b->opaque;
|
||||
- blk_insert_bs(blk_b, bs_b, &error_abort);
|
||||
-
|
||||
- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
|
||||
- backing_s = backing->opaque;
|
||||
- bdrv_set_backing_hd(bs_a, backing, &error_abort);
|
||||
- bdrv_set_backing_hd(bs_b, backing, &error_abort);
|
||||
-
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 0);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 0);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 0);
|
||||
-
|
||||
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
|
||||
-
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 1);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 1);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 1);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 1);
|
||||
-
|
||||
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
|
||||
-
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 2);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 2);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 2);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 2);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 2);
|
||||
-
|
||||
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
|
||||
-
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 1);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 1);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 1);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 1);
|
||||
-
|
||||
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
|
||||
-
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 0);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 0);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 0);
|
||||
-
|
||||
- bdrv_unref(backing);
|
||||
- bdrv_unref(bs_a);
|
||||
- bdrv_unref(bs_b);
|
||||
- blk_unref(blk_a);
|
||||
- blk_unref(blk_b);
|
||||
-}
|
||||
-
|
||||
-static void test_graph_change_drain_subtree(void)
|
||||
-{
|
||||
- BlockBackend *blk_a, *blk_b;
|
||||
- BlockDriverState *bs_a, *bs_b, *backing;
|
||||
- BDRVTestState *a_s, *b_s, *backing_s;
|
||||
-
|
||||
- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
|
||||
- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
|
||||
- &error_abort);
|
||||
- a_s = bs_a->opaque;
|
||||
- blk_insert_bs(blk_a, bs_a, &error_abort);
|
||||
-
|
||||
- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
|
||||
- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
|
||||
- &error_abort);
|
||||
- b_s = bs_b->opaque;
|
||||
- blk_insert_bs(blk_b, bs_b, &error_abort);
|
||||
-
|
||||
- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
|
||||
- backing_s = backing->opaque;
|
||||
- bdrv_set_backing_hd(bs_a, backing, &error_abort);
|
||||
-
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 0);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 0);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 0);
|
||||
-
|
||||
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
|
||||
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
|
||||
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
|
||||
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
|
||||
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
|
||||
-
|
||||
- bdrv_set_backing_hd(bs_b, backing, &error_abort);
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 5);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 5);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 5);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 5);
|
||||
-
|
||||
- bdrv_set_backing_hd(bs_b, NULL, &error_abort);
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 3);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 3);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 3);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 2);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 3);
|
||||
-
|
||||
- bdrv_set_backing_hd(bs_b, backing, &error_abort);
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 5);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 5);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 5);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 5);
|
||||
-
|
||||
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
|
||||
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
|
||||
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
|
||||
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
|
||||
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
|
||||
-
|
||||
- g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(backing->quiesce_counter, ==, 0);
|
||||
- g_assert_cmpint(a_s->drain_count, ==, 0);
|
||||
- g_assert_cmpint(b_s->drain_count, ==, 0);
|
||||
- g_assert_cmpint(backing_s->drain_count, ==, 0);
|
||||
-
|
||||
- bdrv_unref(backing);
|
||||
- bdrv_unref(bs_a);
|
||||
- bdrv_unref(bs_b);
|
||||
- blk_unref(blk_a);
|
||||
- blk_unref(blk_b);
|
||||
-}
|
||||
-
|
||||
static void test_graph_change_drain_all(void)
|
||||
{
|
||||
BlockBackend *blk_a, *blk_b;
|
||||
@@ -773,12 +598,6 @@ static void test_iothread_drain(void)
|
||||
test_iothread_common(BDRV_DRAIN, 1);
|
||||
}
|
||||
|
||||
-static void test_iothread_drain_subtree(void)
|
||||
-{
|
||||
- test_iothread_common(BDRV_SUBTREE_DRAIN, 0);
|
||||
- test_iothread_common(BDRV_SUBTREE_DRAIN, 1);
|
||||
-}
|
||||
-
|
||||
|
||||
typedef struct TestBlockJob {
|
||||
BlockJob common;
|
||||
@@ -863,7 +682,6 @@ enum test_job_result {
|
||||
enum test_job_drain_node {
|
||||
TEST_JOB_DRAIN_SRC,
|
||||
TEST_JOB_DRAIN_SRC_CHILD,
|
||||
- TEST_JOB_DRAIN_SRC_PARENT,
|
||||
};
|
||||
|
||||
static void test_blockjob_common_drain_node(enum drain_type drain_type,
|
||||
@@ -901,9 +719,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
|
||||
case TEST_JOB_DRAIN_SRC_CHILD:
|
||||
drain_bs = src_backing;
|
||||
break;
|
||||
- case TEST_JOB_DRAIN_SRC_PARENT:
|
||||
- drain_bs = src_overlay;
|
||||
- break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
@@ -1055,10 +870,6 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
|
||||
TEST_JOB_DRAIN_SRC);
|
||||
test_blockjob_common_drain_node(drain_type, use_iothread, result,
|
||||
TEST_JOB_DRAIN_SRC_CHILD);
|
||||
- if (drain_type == BDRV_SUBTREE_DRAIN) {
|
||||
- test_blockjob_common_drain_node(drain_type, use_iothread, result,
|
||||
- TEST_JOB_DRAIN_SRC_PARENT);
|
||||
- }
|
||||
}
|
||||
|
||||
static void test_blockjob_drain_all(void)
|
||||
@@ -1071,11 +882,6 @@ static void test_blockjob_drain(void)
|
||||
test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS);
|
||||
}
|
||||
|
||||
-static void test_blockjob_drain_subtree(void)
|
||||
-{
|
||||
- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS);
|
||||
-}
|
||||
-
|
||||
static void test_blockjob_error_drain_all(void)
|
||||
{
|
||||
test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN);
|
||||
@@ -1088,12 +894,6 @@ static void test_blockjob_error_drain(void)
|
||||
test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE);
|
||||
}
|
||||
|
||||
-static void test_blockjob_error_drain_subtree(void)
|
||||
-{
|
||||
- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN);
|
||||
- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE);
|
||||
-}
|
||||
-
|
||||
static void test_blockjob_iothread_drain_all(void)
|
||||
{
|
||||
test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS);
|
||||
@@ -1104,11 +904,6 @@ static void test_blockjob_iothread_drain(void)
|
||||
test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS);
|
||||
}
|
||||
|
||||
-static void test_blockjob_iothread_drain_subtree(void)
|
||||
-{
|
||||
- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS);
|
||||
-}
|
||||
-
|
||||
static void test_blockjob_iothread_error_drain_all(void)
|
||||
{
|
||||
test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN);
|
||||
@@ -1121,12 +916,6 @@ static void test_blockjob_iothread_error_drain(void)
|
||||
test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE);
|
||||
}
|
||||
|
||||
-static void test_blockjob_iothread_error_drain_subtree(void)
|
||||
-{
|
||||
- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN);
|
||||
- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE);
|
||||
-}
|
||||
-
|
||||
|
||||
typedef struct BDRVTestTopState {
|
||||
BdrvChild *wait_child;
|
||||
@@ -1273,14 +1062,6 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete,
|
||||
bdrv_drain(child_bs);
|
||||
bdrv_unref(child_bs);
|
||||
break;
|
||||
- case BDRV_SUBTREE_DRAIN:
|
||||
- /* Would have to ref/unref bs here for !detach_instead_of_delete, but
|
||||
- * then the whole test becomes pointless because the graph changes
|
||||
- * don't occur during the drain any more. */
|
||||
- assert(detach_instead_of_delete);
|
||||
- bdrv_subtree_drained_begin(bs);
|
||||
- bdrv_subtree_drained_end(bs);
|
||||
- break;
|
||||
case BDRV_DRAIN_ALL:
|
||||
bdrv_drain_all_begin();
|
||||
bdrv_drain_all_end();
|
||||
@@ -1315,11 +1096,6 @@ static void test_detach_by_drain(void)
|
||||
do_test_delete_by_drain(true, BDRV_DRAIN);
|
||||
}
|
||||
|
||||
-static void test_detach_by_drain_subtree(void)
|
||||
-{
|
||||
- do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN);
|
||||
-}
|
||||
-
|
||||
|
||||
struct detach_by_parent_data {
|
||||
BlockDriverState *parent_b;
|
||||
@@ -1452,7 +1228,10 @@ static void test_detach_indirect(bool by_parent_cb)
|
||||
g_assert(acb != NULL);
|
||||
|
||||
/* Drain and check the expected result */
|
||||
- bdrv_subtree_drained_begin(parent_b);
|
||||
+ bdrv_drained_begin(parent_b);
|
||||
+ bdrv_drained_begin(a);
|
||||
+ bdrv_drained_begin(b);
|
||||
+ bdrv_drained_begin(c);
|
||||
|
||||
g_assert(detach_by_parent_data.child_c != NULL);
|
||||
|
||||
@@ -1467,12 +1246,15 @@ static void test_detach_indirect(bool by_parent_cb)
|
||||
g_assert(QLIST_NEXT(child_a, next) == NULL);
|
||||
|
||||
g_assert_cmpint(parent_a->quiesce_counter, ==, 1);
|
||||
- g_assert_cmpint(parent_b->quiesce_counter, ==, 1);
|
||||
+ g_assert_cmpint(parent_b->quiesce_counter, ==, 3);
|
||||
g_assert_cmpint(a->quiesce_counter, ==, 1);
|
||||
- g_assert_cmpint(b->quiesce_counter, ==, 0);
|
||||
+ g_assert_cmpint(b->quiesce_counter, ==, 1);
|
||||
g_assert_cmpint(c->quiesce_counter, ==, 1);
|
||||
|
||||
- bdrv_subtree_drained_end(parent_b);
|
||||
+ bdrv_drained_end(parent_b);
|
||||
+ bdrv_drained_end(a);
|
||||
+ bdrv_drained_end(b);
|
||||
+ bdrv_drained_end(c);
|
||||
|
||||
bdrv_unref(parent_b);
|
||||
blk_unref(blk);
|
||||
@@ -2202,70 +1984,47 @@ int main(int argc, char **argv)
|
||||
|
||||
g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
|
||||
g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
|
||||
- g_test_add_func("/bdrv-drain/driver-cb/drain_subtree",
|
||||
- test_drv_cb_drain_subtree);
|
||||
|
||||
g_test_add_func("/bdrv-drain/driver-cb/co/drain_all",
|
||||
test_drv_cb_co_drain_all);
|
||||
g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain);
|
||||
- g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree",
|
||||
- test_drv_cb_co_drain_subtree);
|
||||
-
|
||||
|
||||
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
|
||||
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
|
||||
- g_test_add_func("/bdrv-drain/quiesce/drain_subtree",
|
||||
- test_quiesce_drain_subtree);
|
||||
|
||||
g_test_add_func("/bdrv-drain/quiesce/co/drain_all",
|
||||
test_quiesce_co_drain_all);
|
||||
g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain);
|
||||
- g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree",
|
||||
- test_quiesce_co_drain_subtree);
|
||||
|
||||
g_test_add_func("/bdrv-drain/nested", test_nested);
|
||||
- g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
|
||||
|
||||
- g_test_add_func("/bdrv-drain/graph-change/drain_subtree",
|
||||
- test_graph_change_drain_subtree);
|
||||
g_test_add_func("/bdrv-drain/graph-change/drain_all",
|
||||
test_graph_change_drain_all);
|
||||
|
||||
g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all);
|
||||
g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain);
|
||||
- g_test_add_func("/bdrv-drain/iothread/drain_subtree",
|
||||
- test_iothread_drain_subtree);
|
||||
|
||||
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
|
||||
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
|
||||
- g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
|
||||
- test_blockjob_drain_subtree);
|
||||
|
||||
g_test_add_func("/bdrv-drain/blockjob/error/drain_all",
|
||||
test_blockjob_error_drain_all);
|
||||
g_test_add_func("/bdrv-drain/blockjob/error/drain",
|
||||
test_blockjob_error_drain);
|
||||
- g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree",
|
||||
- test_blockjob_error_drain_subtree);
|
||||
|
||||
g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all",
|
||||
test_blockjob_iothread_drain_all);
|
||||
g_test_add_func("/bdrv-drain/blockjob/iothread/drain",
|
||||
test_blockjob_iothread_drain);
|
||||
- g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree",
|
||||
- test_blockjob_iothread_drain_subtree);
|
||||
|
||||
g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all",
|
||||
test_blockjob_iothread_error_drain_all);
|
||||
g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain",
|
||||
test_blockjob_iothread_error_drain);
|
||||
- g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree",
|
||||
- test_blockjob_iothread_error_drain_subtree);
|
||||
|
||||
g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain);
|
||||
g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all);
|
||||
g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain);
|
||||
- g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree);
|
||||
g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb);
|
||||
g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb);
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,302 @@
|
||||
From 0e894c93cae97bb792dc483be8e295d097ebd7a1 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 18 Nov 2022 18:40:58 +0100
|
||||
Subject: [PATCH 16/31] block: Revert .bdrv_drained_begin/end to
|
||||
non-coroutine_fn
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [4/16] 86d6049e40a99604e414c2572b67f74b85868832 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Polling during bdrv_drained_end() can be problematic (and in the future,
|
||||
we may get cases for bdrv_drained_begin() where polling is forbidden,
|
||||
and we don't care about already in-flight requests, but just want to
|
||||
prevent new requests from arriving).
|
||||
|
||||
The .bdrv_drained_begin/end callbacks running in a coroutine is the only
|
||||
reason why we have to do this polling, so make them non-coroutine
|
||||
callbacks again. None of the callers actually yield any more.
|
||||
|
||||
This means that bdrv_drained_end() effectively doesn't poll any more,
|
||||
even if AIO_WAIT_WHILE() loops are still there (their condition is false
|
||||
from the beginning). This is generally not a problem, but in
|
||||
test-bdrv-drain, some additional explicit aio_poll() calls need to be
|
||||
added because the test case wants to verify the final state after BHs
|
||||
have executed.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20221118174110.55183-4-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 5e8ac21717373cbe96ef7a91e216bf5788815d63)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 4 +--
|
||||
block/io.c | 49 +++++---------------------------
|
||||
block/qed.c | 6 ++--
|
||||
block/throttle.c | 8 +++---
|
||||
include/block/block_int-common.h | 10 ++++---
|
||||
tests/unit/test-bdrv-drain.c | 18 ++++++------
|
||||
6 files changed, 32 insertions(+), 63 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index ec184150a2..16a62a329c 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -1713,8 +1713,8 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
|
||||
assert(is_power_of_2(bs->bl.request_alignment));
|
||||
|
||||
for (i = 0; i < bs->quiesce_counter; i++) {
|
||||
- if (drv->bdrv_co_drain_begin) {
|
||||
- drv->bdrv_co_drain_begin(bs);
|
||||
+ if (drv->bdrv_drain_begin) {
|
||||
+ drv->bdrv_drain_begin(bs);
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index b9424024f9..c2ed4b2af9 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -252,55 +252,20 @@ typedef struct {
|
||||
int *drained_end_counter;
|
||||
} BdrvCoDrainData;
|
||||
|
||||
-static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
|
||||
-{
|
||||
- BdrvCoDrainData *data = opaque;
|
||||
- BlockDriverState *bs = data->bs;
|
||||
-
|
||||
- if (data->begin) {
|
||||
- bs->drv->bdrv_co_drain_begin(bs);
|
||||
- } else {
|
||||
- bs->drv->bdrv_co_drain_end(bs);
|
||||
- }
|
||||
-
|
||||
- /* Set data->done and decrement drained_end_counter before bdrv_wakeup() */
|
||||
- qatomic_mb_set(&data->done, true);
|
||||
- if (!data->begin) {
|
||||
- qatomic_dec(data->drained_end_counter);
|
||||
- }
|
||||
- bdrv_dec_in_flight(bs);
|
||||
-
|
||||
- g_free(data);
|
||||
-}
|
||||
-
|
||||
-/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
|
||||
+/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */
|
||||
static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
|
||||
int *drained_end_counter)
|
||||
{
|
||||
- BdrvCoDrainData *data;
|
||||
-
|
||||
- if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
|
||||
- (!begin && !bs->drv->bdrv_co_drain_end)) {
|
||||
+ if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) ||
|
||||
+ (!begin && !bs->drv->bdrv_drain_end)) {
|
||||
return;
|
||||
}
|
||||
|
||||
- data = g_new(BdrvCoDrainData, 1);
|
||||
- *data = (BdrvCoDrainData) {
|
||||
- .bs = bs,
|
||||
- .done = false,
|
||||
- .begin = begin,
|
||||
- .drained_end_counter = drained_end_counter,
|
||||
- };
|
||||
-
|
||||
- if (!begin) {
|
||||
- qatomic_inc(drained_end_counter);
|
||||
+ if (begin) {
|
||||
+ bs->drv->bdrv_drain_begin(bs);
|
||||
+ } else {
|
||||
+ bs->drv->bdrv_drain_end(bs);
|
||||
}
|
||||
-
|
||||
- /* Make sure the driver callback completes during the polling phase for
|
||||
- * drain_begin. */
|
||||
- bdrv_inc_in_flight(bs);
|
||||
- data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
|
||||
- aio_co_schedule(bdrv_get_aio_context(bs), data->co);
|
||||
}
|
||||
|
||||
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
|
||||
diff --git a/block/qed.c b/block/qed.c
|
||||
index 013f826c44..c2691a85b1 100644
|
||||
--- a/block/qed.c
|
||||
+++ b/block/qed.c
|
||||
@@ -262,7 +262,7 @@ static bool coroutine_fn qed_plug_allocating_write_reqs(BDRVQEDState *s)
|
||||
assert(!s->allocating_write_reqs_plugged);
|
||||
if (s->allocating_acb != NULL) {
|
||||
/* Another allocating write came concurrently. This cannot happen
|
||||
- * from bdrv_qed_co_drain_begin, but it can happen when the timer runs.
|
||||
+ * from bdrv_qed_drain_begin, but it can happen when the timer runs.
|
||||
*/
|
||||
qemu_co_mutex_unlock(&s->table_lock);
|
||||
return false;
|
||||
@@ -365,7 +365,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
|
||||
}
|
||||
}
|
||||
|
||||
-static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs)
|
||||
+static void bdrv_qed_drain_begin(BlockDriverState *bs)
|
||||
{
|
||||
BDRVQEDState *s = bs->opaque;
|
||||
|
||||
@@ -1661,7 +1661,7 @@ static BlockDriver bdrv_qed = {
|
||||
.bdrv_co_check = bdrv_qed_co_check,
|
||||
.bdrv_detach_aio_context = bdrv_qed_detach_aio_context,
|
||||
.bdrv_attach_aio_context = bdrv_qed_attach_aio_context,
|
||||
- .bdrv_co_drain_begin = bdrv_qed_co_drain_begin,
|
||||
+ .bdrv_drain_begin = bdrv_qed_drain_begin,
|
||||
};
|
||||
|
||||
static void bdrv_qed_init(void)
|
||||
diff --git a/block/throttle.c b/block/throttle.c
|
||||
index 131eba3ab4..88851c84f4 100644
|
||||
--- a/block/throttle.c
|
||||
+++ b/block/throttle.c
|
||||
@@ -214,7 +214,7 @@ static void throttle_reopen_abort(BDRVReopenState *reopen_state)
|
||||
reopen_state->opaque = NULL;
|
||||
}
|
||||
|
||||
-static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs)
|
||||
+static void throttle_drain_begin(BlockDriverState *bs)
|
||||
{
|
||||
ThrottleGroupMember *tgm = bs->opaque;
|
||||
if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) {
|
||||
@@ -222,7 +222,7 @@ static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs)
|
||||
}
|
||||
}
|
||||
|
||||
-static void coroutine_fn throttle_co_drain_end(BlockDriverState *bs)
|
||||
+static void throttle_drain_end(BlockDriverState *bs)
|
||||
{
|
||||
ThrottleGroupMember *tgm = bs->opaque;
|
||||
assert(tgm->io_limits_disabled);
|
||||
@@ -261,8 +261,8 @@ static BlockDriver bdrv_throttle = {
|
||||
.bdrv_reopen_commit = throttle_reopen_commit,
|
||||
.bdrv_reopen_abort = throttle_reopen_abort,
|
||||
|
||||
- .bdrv_co_drain_begin = throttle_co_drain_begin,
|
||||
- .bdrv_co_drain_end = throttle_co_drain_end,
|
||||
+ .bdrv_drain_begin = throttle_drain_begin,
|
||||
+ .bdrv_drain_end = throttle_drain_end,
|
||||
|
||||
.is_filter = true,
|
||||
.strong_runtime_opts = throttle_strong_runtime_opts,
|
||||
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
|
||||
index 31ae91e56e..40d646d1ed 100644
|
||||
--- a/include/block/block_int-common.h
|
||||
+++ b/include/block/block_int-common.h
|
||||
@@ -735,17 +735,19 @@ struct BlockDriver {
|
||||
void (*bdrv_io_unplug)(BlockDriverState *bs);
|
||||
|
||||
/**
|
||||
- * bdrv_co_drain_begin is called if implemented in the beginning of a
|
||||
+ * bdrv_drain_begin is called if implemented in the beginning of a
|
||||
* drain operation to drain and stop any internal sources of requests in
|
||||
* the driver.
|
||||
- * bdrv_co_drain_end is called if implemented at the end of the drain.
|
||||
+ * bdrv_drain_end is called if implemented at the end of the drain.
|
||||
*
|
||||
* They should be used by the driver to e.g. manage scheduled I/O
|
||||
* requests, or toggle an internal state. After the end of the drain new
|
||||
* requests will continue normally.
|
||||
+ *
|
||||
+ * Implementations of both functions must not call aio_poll().
|
||||
*/
|
||||
- void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs);
|
||||
- void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
|
||||
+ void (*bdrv_drain_begin)(BlockDriverState *bs);
|
||||
+ void (*bdrv_drain_end)(BlockDriverState *bs);
|
||||
|
||||
bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
|
||||
bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)(
|
||||
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
|
||||
index 24f34e24ad..695519ee02 100644
|
||||
--- a/tests/unit/test-bdrv-drain.c
|
||||
+++ b/tests/unit/test-bdrv-drain.c
|
||||
@@ -46,7 +46,7 @@ static void coroutine_fn sleep_in_drain_begin(void *opaque)
|
||||
bdrv_dec_in_flight(bs);
|
||||
}
|
||||
|
||||
-static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
|
||||
+static void bdrv_test_drain_begin(BlockDriverState *bs)
|
||||
{
|
||||
BDRVTestState *s = bs->opaque;
|
||||
s->drain_count++;
|
||||
@@ -57,7 +57,7 @@ static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
|
||||
}
|
||||
}
|
||||
|
||||
-static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs)
|
||||
+static void bdrv_test_drain_end(BlockDriverState *bs)
|
||||
{
|
||||
BDRVTestState *s = bs->opaque;
|
||||
s->drain_count--;
|
||||
@@ -111,8 +111,8 @@ static BlockDriver bdrv_test = {
|
||||
.bdrv_close = bdrv_test_close,
|
||||
.bdrv_co_preadv = bdrv_test_co_preadv,
|
||||
|
||||
- .bdrv_co_drain_begin = bdrv_test_co_drain_begin,
|
||||
- .bdrv_co_drain_end = bdrv_test_co_drain_end,
|
||||
+ .bdrv_drain_begin = bdrv_test_drain_begin,
|
||||
+ .bdrv_drain_end = bdrv_test_drain_end,
|
||||
|
||||
.bdrv_child_perm = bdrv_default_perms,
|
||||
|
||||
@@ -1703,6 +1703,7 @@ static void test_blockjob_commit_by_drained_end(void)
|
||||
bdrv_drained_begin(bs_child);
|
||||
g_assert(!job_has_completed);
|
||||
bdrv_drained_end(bs_child);
|
||||
+ aio_poll(qemu_get_aio_context(), false);
|
||||
g_assert(job_has_completed);
|
||||
|
||||
bdrv_unref(bs_parents[0]);
|
||||
@@ -1858,6 +1859,7 @@ static void test_drop_intermediate_poll(void)
|
||||
|
||||
g_assert(!job_has_completed);
|
||||
ret = bdrv_drop_intermediate(chain[1], chain[0], NULL);
|
||||
+ aio_poll(qemu_get_aio_context(), false);
|
||||
g_assert(ret == 0);
|
||||
g_assert(job_has_completed);
|
||||
|
||||
@@ -1946,7 +1948,7 @@ static void coroutine_fn bdrv_replace_test_drain_co(void *opaque)
|
||||
* .was_drained.
|
||||
* Increment .drain_count.
|
||||
*/
|
||||
-static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs)
|
||||
+static void bdrv_replace_test_drain_begin(BlockDriverState *bs)
|
||||
{
|
||||
BDRVReplaceTestState *s = bs->opaque;
|
||||
|
||||
@@ -1977,7 +1979,7 @@ static void coroutine_fn bdrv_replace_test_read_entry(void *opaque)
|
||||
* If .drain_count reaches 0 and the node has a backing file, issue a
|
||||
* read request.
|
||||
*/
|
||||
-static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs)
|
||||
+static void bdrv_replace_test_drain_end(BlockDriverState *bs)
|
||||
{
|
||||
BDRVReplaceTestState *s = bs->opaque;
|
||||
|
||||
@@ -2002,8 +2004,8 @@ static BlockDriver bdrv_replace_test = {
|
||||
.bdrv_close = bdrv_replace_test_close,
|
||||
.bdrv_co_preadv = bdrv_replace_test_co_preadv,
|
||||
|
||||
- .bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin,
|
||||
- .bdrv_co_drain_end = bdrv_replace_test_co_drain_end,
|
||||
+ .bdrv_drain_begin = bdrv_replace_test_drain_begin,
|
||||
+ .bdrv_drain_end = bdrv_replace_test_drain_end,
|
||||
|
||||
.bdrv_child_perm = bdrv_default_perms,
|
||||
};
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,246 @@
|
||||
From 54e290df4bc1c9e83be7357caed6a2b1ba4f21f0 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Mon, 20 Jun 2022 18:26:56 +0200
|
||||
Subject: [PATCH 09/20] block: Split BlockNodeInfo off of ImageInfo
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
|
||||
RH-Bugzilla: 1860292
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [4/12] fc8d69d549bb9a929db218b91697ee3ae95c1ff6 (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
ImageInfo sometimes contains flat information, and sometimes it does
|
||||
not. Split off a BlockNodeInfo struct, which only contains information
|
||||
about a single node and has no link to the backing image.
|
||||
|
||||
We do this so we can extend BlockNodeInfo to a BlockGraphInfo struct,
|
||||
which has links to all child nodes, not just the backing node. It would
|
||||
be strange to base BlockGraphInfo on ImageInfo, because then this
|
||||
extended struct would have two links to the backing node (one in
|
||||
BlockGraphInfo as one of all the child links, and one in ImageInfo).
|
||||
|
||||
Furthermore, it is quite common to ignore the backing-image field
|
||||
altogether: bdrv_query_image_info() does not set it, and
|
||||
bdrv_image_info_dump() does not evaluate it. That signals that we
|
||||
should have different structs for describing a single node and one that
|
||||
has a link to the backing image.
|
||||
|
||||
Still, bdrv_query_image_info() and bdrv_image_info_dump() are not
|
||||
changed too much in this patch. Follow-up patches will handle them.
|
||||
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20220620162704.80987-5-hreitz@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit a2085f8909377b6df738f6c3f7ee6db4d16da8f7)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/qapi.c | 86 ++++++++++++++++++++++++++++++++------------
|
||||
include/block/qapi.h | 3 ++
|
||||
qapi/block-core.json | 24 +++++++++----
|
||||
3 files changed, 85 insertions(+), 28 deletions(-)
|
||||
|
||||
diff --git a/block/qapi.c b/block/qapi.c
|
||||
index 51202b470a..e5022b4481 100644
|
||||
--- a/block/qapi.c
|
||||
+++ b/block/qapi.c
|
||||
@@ -241,30 +241,18 @@ int bdrv_query_snapshot_info_list(BlockDriverState *bs,
|
||||
}
|
||||
|
||||
/**
|
||||
- * bdrv_query_image_info:
|
||||
- * @bs: block device to examine
|
||||
- * @p_info: location to store image information
|
||||
- * @errp: location to store error information
|
||||
- *
|
||||
- * Store "flat" image information in @p_info.
|
||||
- *
|
||||
- * "Flat" means it does *not* query backing image information,
|
||||
- * i.e. (*pinfo)->has_backing_image will be set to false and
|
||||
- * (*pinfo)->backing_image to NULL even when the image does in fact have
|
||||
- * a backing image.
|
||||
- *
|
||||
- * @p_info will be set only on success. On error, store error in @errp.
|
||||
+ * Helper function for other query info functions. Store information about @bs
|
||||
+ * in @info, setting @errp on error.
|
||||
*/
|
||||
-void bdrv_query_image_info(BlockDriverState *bs,
|
||||
- ImageInfo **p_info,
|
||||
- Error **errp)
|
||||
+static void bdrv_do_query_node_info(BlockDriverState *bs,
|
||||
+ BlockNodeInfo *info,
|
||||
+ Error **errp)
|
||||
{
|
||||
int64_t size;
|
||||
const char *backing_filename;
|
||||
BlockDriverInfo bdi;
|
||||
int ret;
|
||||
Error *err = NULL;
|
||||
- ImageInfo *info;
|
||||
|
||||
aio_context_acquire(bdrv_get_aio_context(bs));
|
||||
|
||||
@@ -277,7 +265,6 @@ void bdrv_query_image_info(BlockDriverState *bs,
|
||||
|
||||
bdrv_refresh_filename(bs);
|
||||
|
||||
- info = g_new0(ImageInfo, 1);
|
||||
info->filename = g_strdup(bs->filename);
|
||||
info->format = g_strdup(bdrv_get_format_name(bs));
|
||||
info->virtual_size = size;
|
||||
@@ -298,7 +285,6 @@ void bdrv_query_image_info(BlockDriverState *bs,
|
||||
info->format_specific = bdrv_get_specific_info(bs, &err);
|
||||
if (err) {
|
||||
error_propagate(errp, err);
|
||||
- qapi_free_ImageInfo(info);
|
||||
goto out;
|
||||
}
|
||||
info->has_format_specific = info->format_specific != NULL;
|
||||
@@ -339,16 +325,72 @@ void bdrv_query_image_info(BlockDriverState *bs,
|
||||
break;
|
||||
default:
|
||||
error_propagate(errp, err);
|
||||
- qapi_free_ImageInfo(info);
|
||||
goto out;
|
||||
}
|
||||
|
||||
- *p_info = info;
|
||||
-
|
||||
out:
|
||||
aio_context_release(bdrv_get_aio_context(bs));
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * bdrv_query_block_node_info:
|
||||
+ * @bs: block node to examine
|
||||
+ * @p_info: location to store node information
|
||||
+ * @errp: location to store error information
|
||||
+ *
|
||||
+ * Store image information about @bs in @p_info.
|
||||
+ *
|
||||
+ * @p_info will be set only on success. On error, store error in @errp.
|
||||
+ */
|
||||
+void bdrv_query_block_node_info(BlockDriverState *bs,
|
||||
+ BlockNodeInfo **p_info,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ BlockNodeInfo *info;
|
||||
+ ERRP_GUARD();
|
||||
+
|
||||
+ info = g_new0(BlockNodeInfo, 1);
|
||||
+ bdrv_do_query_node_info(bs, info, errp);
|
||||
+ if (*errp) {
|
||||
+ qapi_free_BlockNodeInfo(info);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ *p_info = info;
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * bdrv_query_image_info:
|
||||
+ * @bs: block node to examine
|
||||
+ * @p_info: location to store image information
|
||||
+ * @errp: location to store error information
|
||||
+ *
|
||||
+ * Store "flat" image information in @p_info.
|
||||
+ *
|
||||
+ * "Flat" means it does *not* query backing image information,
|
||||
+ * i.e. (*pinfo)->has_backing_image will be set to false and
|
||||
+ * (*pinfo)->backing_image to NULL even when the image does in fact have
|
||||
+ * a backing image.
|
||||
+ *
|
||||
+ * @p_info will be set only on success. On error, store error in @errp.
|
||||
+ */
|
||||
+void bdrv_query_image_info(BlockDriverState *bs,
|
||||
+ ImageInfo **p_info,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ ImageInfo *info;
|
||||
+ ERRP_GUARD();
|
||||
+
|
||||
+ info = g_new0(ImageInfo, 1);
|
||||
+ bdrv_do_query_node_info(bs, qapi_ImageInfo_base(info), errp);
|
||||
+ if (*errp) {
|
||||
+ qapi_free_ImageInfo(info);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ *p_info = info;
|
||||
+}
|
||||
+
|
||||
/* @p_info will be set only on success. */
|
||||
static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
|
||||
Error **errp)
|
||||
diff --git a/include/block/qapi.h b/include/block/qapi.h
|
||||
index c09859ea78..c7de4e3fa9 100644
|
||||
--- a/include/block/qapi.h
|
||||
+++ b/include/block/qapi.h
|
||||
@@ -35,6 +35,9 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
|
||||
int bdrv_query_snapshot_info_list(BlockDriverState *bs,
|
||||
SnapshotInfoList **p_list,
|
||||
Error **errp);
|
||||
+void bdrv_query_block_node_info(BlockDriverState *bs,
|
||||
+ BlockNodeInfo **p_info,
|
||||
+ Error **errp);
|
||||
void bdrv_query_image_info(BlockDriverState *bs,
|
||||
ImageInfo **p_info,
|
||||
Error **errp);
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index 4b9365167f..7720da0498 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -251,7 +251,7 @@
|
||||
} }
|
||||
|
||||
##
|
||||
-# @ImageInfo:
|
||||
+# @BlockNodeInfo:
|
||||
#
|
||||
# Information about a QEMU image file
|
||||
#
|
||||
@@ -279,22 +279,34 @@
|
||||
#
|
||||
# @snapshots: list of VM snapshots
|
||||
#
|
||||
-# @backing-image: info of the backing image (since 1.6)
|
||||
-#
|
||||
# @format-specific: structure supplying additional format-specific
|
||||
# information (since 1.7)
|
||||
#
|
||||
-# Since: 1.3
|
||||
+# Since: 8.0
|
||||
##
|
||||
-{ 'struct': 'ImageInfo',
|
||||
+{ 'struct': 'BlockNodeInfo',
|
||||
'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool',
|
||||
'*actual-size': 'int', 'virtual-size': 'int',
|
||||
'*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool',
|
||||
'*backing-filename': 'str', '*full-backing-filename': 'str',
|
||||
'*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
|
||||
- '*backing-image': 'ImageInfo',
|
||||
'*format-specific': 'ImageInfoSpecific' } }
|
||||
|
||||
+##
|
||||
+# @ImageInfo:
|
||||
+#
|
||||
+# Information about a QEMU image file, and potentially its backing image
|
||||
+#
|
||||
+# @backing-image: info of the backing image
|
||||
+#
|
||||
+# Since: 1.3
|
||||
+##
|
||||
+{ 'struct': 'ImageInfo',
|
||||
+ 'base': 'BlockNodeInfo',
|
||||
+ 'data': {
|
||||
+ '*backing-image': 'ImageInfo'
|
||||
+ } }
|
||||
+
|
||||
##
|
||||
# @ImageCheck:
|
||||
#
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,70 @@
|
||||
From defd6b325264d94ffb1355a8b19f9a77bd694a2f Mon Sep 17 00:00:00 2001
|
||||
From: Vladimir Sementsov-Ogievskiy <vladimir.sementsov-ogievskiy@openvz.org>
|
||||
Date: Mon, 7 Nov 2022 19:35:56 +0300
|
||||
Subject: [PATCH 13/31] block: drop bdrv_remove_filter_or_cow_child
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
|
||||
RH-Bugzilla: 2155112
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [1/16] 6339edd738c3b79f8ecb6c1e012e52b6afb1a622 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Drop this simple wrapper used only in one place. We have too many graph
|
||||
modifying functions even without it.
|
||||
|
||||
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@openvz.org>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20221107163558.618889-3-vsementsov@yandex-team.ru>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit f38eaec4c3618dfc4a23e20435cefb5bf8325264)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 15 +--------------
|
||||
1 file changed, 1 insertion(+), 14 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index a18f052374..ec184150a2 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -93,8 +93,6 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
|
||||
static void bdrv_replace_child_noperm(BdrvChild *child,
|
||||
BlockDriverState *new_bs);
|
||||
static void bdrv_remove_child(BdrvChild *child, Transaction *tran);
|
||||
-static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
|
||||
- Transaction *tran);
|
||||
|
||||
static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
|
||||
BlockReopenQueue *queue,
|
||||
@@ -5073,17 +5071,6 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran)
|
||||
tran_add(tran, &bdrv_remove_child_drv, child);
|
||||
}
|
||||
|
||||
-/*
|
||||
- * A function to remove backing-chain child of @bs if exists: cow child for
|
||||
- * format nodes (always .backing) and filter child for filters (may be .file or
|
||||
- * .backing)
|
||||
- */
|
||||
-static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
|
||||
- Transaction *tran)
|
||||
-{
|
||||
- bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran);
|
||||
-}
|
||||
-
|
||||
static int bdrv_replace_node_noperm(BlockDriverState *from,
|
||||
BlockDriverState *to,
|
||||
bool auto_skip, Transaction *tran,
|
||||
@@ -5168,7 +5155,7 @@ static int bdrv_replace_node_common(BlockDriverState *from,
|
||||
}
|
||||
|
||||
if (detach_subchain) {
|
||||
- bdrv_remove_filter_or_cow_child(to_cow_parent, tran);
|
||||
+ bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran);
|
||||
}
|
||||
|
||||
found = g_hash_table_new(NULL, NULL);
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,145 @@
|
||||
From 4af86458d6bea2a6e15fd57d4d4bbe88e35f7e72 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Mon, 20 Jun 2022 18:26:54 +0200
|
||||
Subject: [PATCH 07/20] block/file: Add file-specific image info
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
|
||||
RH-Bugzilla: 1860292
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [2/12] d8cc351d6c16c41b2000e41dc555f13093a9edce (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
Add some (optional) information that the file driver can provide for
|
||||
image files, namely the extent size hint.
|
||||
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20220620162704.80987-3-hreitz@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 7f36a50ab4e7d39369cac67be4ba9d6ee4081dc0)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/file-posix.c | 30 ++++++++++++++++++++++++++++++
|
||||
qapi/block-core.json | 26 ++++++++++++++++++++++++--
|
||||
2 files changed, 54 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/block/file-posix.c b/block/file-posix.c
|
||||
index b9647c5ffc..df3da79aed 100644
|
||||
--- a/block/file-posix.c
|
||||
+++ b/block/file-posix.c
|
||||
@@ -3095,6 +3095,34 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static ImageInfoSpecific *raw_get_specific_info(BlockDriverState *bs,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ ImageInfoSpecificFile *file_info = g_new0(ImageInfoSpecificFile, 1);
|
||||
+ ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1);
|
||||
+
|
||||
+ *spec_info = (ImageInfoSpecific){
|
||||
+ .type = IMAGE_INFO_SPECIFIC_KIND_FILE,
|
||||
+ .u.file.data = file_info,
|
||||
+ };
|
||||
+
|
||||
+#ifdef FS_IOC_FSGETXATTR
|
||||
+ {
|
||||
+ BDRVRawState *s = bs->opaque;
|
||||
+ struct fsxattr attr;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = ioctl(s->fd, FS_IOC_FSGETXATTR, &attr);
|
||||
+ if (!ret && attr.fsx_extsize != 0) {
|
||||
+ file_info->has_extent_size_hint = true;
|
||||
+ file_info->extent_size_hint = attr.fsx_extsize;
|
||||
+ }
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
+ return spec_info;
|
||||
+}
|
||||
+
|
||||
static BlockStatsSpecificFile get_blockstats_specific_file(BlockDriverState *bs)
|
||||
{
|
||||
BDRVRawState *s = bs->opaque;
|
||||
@@ -3328,6 +3356,7 @@ BlockDriver bdrv_file = {
|
||||
.bdrv_co_truncate = raw_co_truncate,
|
||||
.bdrv_getlength = raw_getlength,
|
||||
.bdrv_get_info = raw_get_info,
|
||||
+ .bdrv_get_specific_info = raw_get_specific_info,
|
||||
.bdrv_get_allocated_file_size
|
||||
= raw_get_allocated_file_size,
|
||||
.bdrv_get_specific_stats = raw_get_specific_stats,
|
||||
@@ -3700,6 +3729,7 @@ static BlockDriver bdrv_host_device = {
|
||||
.bdrv_co_truncate = raw_co_truncate,
|
||||
.bdrv_getlength = raw_getlength,
|
||||
.bdrv_get_info = raw_get_info,
|
||||
+ .bdrv_get_specific_info = raw_get_specific_info,
|
||||
.bdrv_get_allocated_file_size
|
||||
= raw_get_allocated_file_size,
|
||||
.bdrv_get_specific_stats = hdev_get_specific_stats,
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index 95ac4fa634..f5d822cbd6 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -139,16 +139,29 @@
|
||||
'*encryption-format': 'RbdImageEncryptionFormat'
|
||||
} }
|
||||
|
||||
+##
|
||||
+# @ImageInfoSpecificFile:
|
||||
+#
|
||||
+# @extent-size-hint: Extent size hint (if available)
|
||||
+#
|
||||
+# Since: 8.0
|
||||
+##
|
||||
+{ 'struct': 'ImageInfoSpecificFile',
|
||||
+ 'data': {
|
||||
+ '*extent-size-hint': 'size'
|
||||
+ } }
|
||||
+
|
||||
##
|
||||
# @ImageInfoSpecificKind:
|
||||
#
|
||||
# @luks: Since 2.7
|
||||
# @rbd: Since 6.1
|
||||
+# @file: Since 8.0
|
||||
#
|
||||
# Since: 1.7
|
||||
##
|
||||
{ 'enum': 'ImageInfoSpecificKind',
|
||||
- 'data': [ 'qcow2', 'vmdk', 'luks', 'rbd' ] }
|
||||
+ 'data': [ 'qcow2', 'vmdk', 'luks', 'rbd', 'file' ] }
|
||||
|
||||
##
|
||||
# @ImageInfoSpecificQCow2Wrapper:
|
||||
@@ -185,6 +198,14 @@
|
||||
{ 'struct': 'ImageInfoSpecificRbdWrapper',
|
||||
'data': { 'data': 'ImageInfoSpecificRbd' } }
|
||||
|
||||
+##
|
||||
+# @ImageInfoSpecificFileWrapper:
|
||||
+#
|
||||
+# Since: 8.0
|
||||
+##
|
||||
+{ 'struct': 'ImageInfoSpecificFileWrapper',
|
||||
+ 'data': { 'data': 'ImageInfoSpecificFile' } }
|
||||
+
|
||||
##
|
||||
# @ImageInfoSpecific:
|
||||
#
|
||||
@@ -199,7 +220,8 @@
|
||||
'qcow2': 'ImageInfoSpecificQCow2Wrapper',
|
||||
'vmdk': 'ImageInfoSpecificVmdkWrapper',
|
||||
'luks': 'ImageInfoSpecificLUKSWrapper',
|
||||
- 'rbd': 'ImageInfoSpecificRbdWrapper'
|
||||
+ 'rbd': 'ImageInfoSpecificRbdWrapper',
|
||||
+ 'file': 'ImageInfoSpecificFileWrapper'
|
||||
} }
|
||||
|
||||
##
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,206 @@
|
||||
From c8c282c2e1d74cfc5de6527f7e20dfc3e76b67ac Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Mon, 20 Jun 2022 18:27:00 +0200
|
||||
Subject: [PATCH 13/20] block/qapi: Add indentation to bdrv_node_info_dump()
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
|
||||
RH-Bugzilla: 1860292
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [8/12] d3a697e81ab9828457198075e5815a592363c725 (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
In order to let qemu-img info present a block graph, add a parameter to
|
||||
bdrv_node_info_dump() and bdrv_image_info_specific_dump() so that the
|
||||
information of nodes below the root level can be given an indentation.
|
||||
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20220620162704.80987-9-hreitz@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 76c9e9750d1bd580e8ed4465f6be3a986434e7c3)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/monitor/block-hmp-cmds.c | 2 +-
|
||||
block/qapi.c | 47 +++++++++++++++++++---------------
|
||||
include/block/qapi.h | 5 ++--
|
||||
qemu-img.c | 2 +-
|
||||
qemu-io-cmds.c | 3 ++-
|
||||
5 files changed, 34 insertions(+), 25 deletions(-)
|
||||
|
||||
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
|
||||
index aa37faa601..72824d4e2e 100644
|
||||
--- a/block/monitor/block-hmp-cmds.c
|
||||
+++ b/block/monitor/block-hmp-cmds.c
|
||||
@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info,
|
||||
monitor_printf(mon, "\nImages:\n");
|
||||
image_info = inserted->image;
|
||||
while (1) {
|
||||
- bdrv_node_info_dump(qapi_ImageInfo_base(image_info));
|
||||
+ bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0);
|
||||
if (image_info->has_backing_image) {
|
||||
image_info = image_info->backing_image;
|
||||
} else {
|
||||
diff --git a/block/qapi.c b/block/qapi.c
|
||||
index f208c21ccf..3e35603f0c 100644
|
||||
--- a/block/qapi.c
|
||||
+++ b/block/qapi.c
|
||||
@@ -915,7 +915,8 @@ static bool qobject_is_empty_dump(const QObject *obj)
|
||||
* prepending an optional prefix if the dump is not empty.
|
||||
*/
|
||||
void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
|
||||
- const char *prefix)
|
||||
+ const char *prefix,
|
||||
+ int indentation)
|
||||
{
|
||||
QObject *obj, *data;
|
||||
Visitor *v = qobject_output_visitor_new(&obj);
|
||||
@@ -925,48 +926,51 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
|
||||
data = qdict_get(qobject_to(QDict, obj), "data");
|
||||
if (!qobject_is_empty_dump(data)) {
|
||||
if (prefix) {
|
||||
- qemu_printf("%s", prefix);
|
||||
+ qemu_printf("%*s%s", indentation * 4, "", prefix);
|
||||
}
|
||||
- dump_qobject(1, data);
|
||||
+ dump_qobject(indentation + 1, data);
|
||||
}
|
||||
qobject_unref(obj);
|
||||
visit_free(v);
|
||||
}
|
||||
|
||||
-void bdrv_node_info_dump(BlockNodeInfo *info)
|
||||
+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation)
|
||||
{
|
||||
char *size_buf, *dsize_buf;
|
||||
+ g_autofree char *ind_s = g_strdup_printf("%*s", indentation * 4, "");
|
||||
+
|
||||
if (!info->has_actual_size) {
|
||||
dsize_buf = g_strdup("unavailable");
|
||||
} else {
|
||||
dsize_buf = size_to_str(info->actual_size);
|
||||
}
|
||||
size_buf = size_to_str(info->virtual_size);
|
||||
- qemu_printf("image: %s\n"
|
||||
- "file format: %s\n"
|
||||
- "virtual size: %s (%" PRId64 " bytes)\n"
|
||||
- "disk size: %s\n",
|
||||
- info->filename, info->format, size_buf,
|
||||
- info->virtual_size,
|
||||
- dsize_buf);
|
||||
+ qemu_printf("%simage: %s\n"
|
||||
+ "%sfile format: %s\n"
|
||||
+ "%svirtual size: %s (%" PRId64 " bytes)\n"
|
||||
+ "%sdisk size: %s\n",
|
||||
+ ind_s, info->filename,
|
||||
+ ind_s, info->format,
|
||||
+ ind_s, size_buf, info->virtual_size,
|
||||
+ ind_s, dsize_buf);
|
||||
g_free(size_buf);
|
||||
g_free(dsize_buf);
|
||||
|
||||
if (info->has_encrypted && info->encrypted) {
|
||||
- qemu_printf("encrypted: yes\n");
|
||||
+ qemu_printf("%sencrypted: yes\n", ind_s);
|
||||
}
|
||||
|
||||
if (info->has_cluster_size) {
|
||||
- qemu_printf("cluster_size: %" PRId64 "\n",
|
||||
- info->cluster_size);
|
||||
+ qemu_printf("%scluster_size: %" PRId64 "\n",
|
||||
+ ind_s, info->cluster_size);
|
||||
}
|
||||
|
||||
if (info->has_dirty_flag && info->dirty_flag) {
|
||||
- qemu_printf("cleanly shut down: no\n");
|
||||
+ qemu_printf("%scleanly shut down: no\n", ind_s);
|
||||
}
|
||||
|
||||
if (info->has_backing_filename) {
|
||||
- qemu_printf("backing file: %s", info->backing_filename);
|
||||
+ qemu_printf("%sbacking file: %s", ind_s, info->backing_filename);
|
||||
if (!info->has_full_backing_filename) {
|
||||
qemu_printf(" (cannot determine actual path)");
|
||||
} else if (strcmp(info->backing_filename,
|
||||
@@ -975,15 +979,16 @@ void bdrv_node_info_dump(BlockNodeInfo *info)
|
||||
}
|
||||
qemu_printf("\n");
|
||||
if (info->has_backing_filename_format) {
|
||||
- qemu_printf("backing file format: %s\n",
|
||||
- info->backing_filename_format);
|
||||
+ qemu_printf("%sbacking file format: %s\n",
|
||||
+ ind_s, info->backing_filename_format);
|
||||
}
|
||||
}
|
||||
|
||||
if (info->has_snapshots) {
|
||||
SnapshotInfoList *elem;
|
||||
|
||||
- qemu_printf("Snapshot list:\n");
|
||||
+ qemu_printf("%sSnapshot list:\n", ind_s);
|
||||
+ qemu_printf("%s", ind_s);
|
||||
bdrv_snapshot_dump(NULL);
|
||||
qemu_printf("\n");
|
||||
|
||||
@@ -1003,6 +1008,7 @@ void bdrv_node_info_dump(BlockNodeInfo *info)
|
||||
|
||||
pstrcpy(sn.id_str, sizeof(sn.id_str), elem->value->id);
|
||||
pstrcpy(sn.name, sizeof(sn.name), elem->value->name);
|
||||
+ qemu_printf("%s", ind_s);
|
||||
bdrv_snapshot_dump(&sn);
|
||||
qemu_printf("\n");
|
||||
}
|
||||
@@ -1010,6 +1016,7 @@ void bdrv_node_info_dump(BlockNodeInfo *info)
|
||||
|
||||
if (info->has_format_specific) {
|
||||
bdrv_image_info_specific_dump(info->format_specific,
|
||||
- "Format specific information:\n");
|
||||
+ "Format specific information:\n",
|
||||
+ indentation);
|
||||
}
|
||||
}
|
||||
diff --git a/include/block/qapi.h b/include/block/qapi.h
|
||||
index 196436020e..38855f2ae9 100644
|
||||
--- a/include/block/qapi.h
|
||||
+++ b/include/block/qapi.h
|
||||
@@ -49,6 +49,7 @@ void bdrv_query_block_graph_info(BlockDriverState *bs,
|
||||
|
||||
void bdrv_snapshot_dump(QEMUSnapshotInfo *sn);
|
||||
void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
|
||||
- const char *prefix);
|
||||
-void bdrv_node_info_dump(BlockNodeInfo *info);
|
||||
+ const char *prefix,
|
||||
+ int indentation);
|
||||
+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation);
|
||||
#endif
|
||||
diff --git a/qemu-img.c b/qemu-img.c
|
||||
index 3b2ca3bbcb..30b4ea58bb 100644
|
||||
--- a/qemu-img.c
|
||||
+++ b/qemu-img.c
|
||||
@@ -2859,7 +2859,7 @@ static void dump_human_image_info_list(BlockNodeInfoList *list)
|
||||
}
|
||||
delim = true;
|
||||
|
||||
- bdrv_node_info_dump(elem->value);
|
||||
+ bdrv_node_info_dump(elem->value, 0);
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
|
||||
index f4a374528e..fdcb89211b 100644
|
||||
--- a/qemu-io-cmds.c
|
||||
+++ b/qemu-io-cmds.c
|
||||
@@ -1826,7 +1826,8 @@ static int info_f(BlockBackend *blk, int argc, char **argv)
|
||||
}
|
||||
if (spec_info) {
|
||||
bdrv_image_info_specific_dump(spec_info,
|
||||
- "Format specific information:\n");
|
||||
+ "Format specific information:\n",
|
||||
+ 0);
|
||||
qapi_free_ImageInfoSpecific(spec_info);
|
||||
}
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,155 @@
|
||||
From 0044e3848b02ef6edba5961d1f4b6297d137d207 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Mon, 20 Jun 2022 18:26:59 +0200
|
||||
Subject: [PATCH 12/20] block/qapi: Introduce BlockGraphInfo
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
|
||||
RH-Bugzilla: 1860292
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [7/12] de47bac372cd552b812c774a2f35f95923af74ff (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
Introduce a new QAPI type BlockGraphInfo and an associated
|
||||
bdrv_query_block_graph_info() function that recursively gathers
|
||||
BlockNodeInfo objects through a block graph.
|
||||
|
||||
A follow-up patch is going to make "qemu-img info" use this to print
|
||||
information about all nodes that are (usually implicitly) opened for a
|
||||
given image file.
|
||||
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20220620162704.80987-8-hreitz@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 6cab33997b91eb86e82a6a2ae58a24f835249d4a)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/qapi.c | 48 ++++++++++++++++++++++++++++++++++++++++++++
|
||||
include/block/qapi.h | 3 +++
|
||||
qapi/block-core.json | 35 ++++++++++++++++++++++++++++++++
|
||||
3 files changed, 86 insertions(+)
|
||||
|
||||
diff --git a/block/qapi.c b/block/qapi.c
|
||||
index 5d0a8d2ce3..f208c21ccf 100644
|
||||
--- a/block/qapi.c
|
||||
+++ b/block/qapi.c
|
||||
@@ -411,6 +411,54 @@ fail:
|
||||
qapi_free_ImageInfo(info);
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * bdrv_query_block_graph_info:
|
||||
+ * @bs: root node to start from
|
||||
+ * @p_info: location to store image information
|
||||
+ * @errp: location to store error information
|
||||
+ *
|
||||
+ * Store image information about the graph starting from @bs in @p_info.
|
||||
+ *
|
||||
+ * @p_info will be set only on success. On error, store error in @errp.
|
||||
+ */
|
||||
+void bdrv_query_block_graph_info(BlockDriverState *bs,
|
||||
+ BlockGraphInfo **p_info,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ BlockGraphInfo *info;
|
||||
+ BlockChildInfoList **children_list_tail;
|
||||
+ BdrvChild *c;
|
||||
+ ERRP_GUARD();
|
||||
+
|
||||
+ info = g_new0(BlockGraphInfo, 1);
|
||||
+ bdrv_do_query_node_info(bs, qapi_BlockGraphInfo_base(info), errp);
|
||||
+ if (*errp) {
|
||||
+ goto fail;
|
||||
+ }
|
||||
+
|
||||
+ children_list_tail = &info->children;
|
||||
+
|
||||
+ QLIST_FOREACH(c, &bs->children, next) {
|
||||
+ BlockChildInfo *c_info;
|
||||
+
|
||||
+ c_info = g_new0(BlockChildInfo, 1);
|
||||
+ QAPI_LIST_APPEND(children_list_tail, c_info);
|
||||
+
|
||||
+ c_info->name = g_strdup(c->name);
|
||||
+ bdrv_query_block_graph_info(c->bs, &c_info->info, errp);
|
||||
+ if (*errp) {
|
||||
+ goto fail;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ *p_info = info;
|
||||
+ return;
|
||||
+
|
||||
+fail:
|
||||
+ assert(*errp != NULL);
|
||||
+ qapi_free_BlockGraphInfo(info);
|
||||
+}
|
||||
+
|
||||
/* @p_info will be set only on success. */
|
||||
static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
|
||||
Error **errp)
|
||||
diff --git a/include/block/qapi.h b/include/block/qapi.h
|
||||
index 2174bf8fa2..196436020e 100644
|
||||
--- a/include/block/qapi.h
|
||||
+++ b/include/block/qapi.h
|
||||
@@ -43,6 +43,9 @@ void bdrv_query_image_info(BlockDriverState *bs,
|
||||
bool flat,
|
||||
bool skip_implicit_filters,
|
||||
Error **errp);
|
||||
+void bdrv_query_block_graph_info(BlockDriverState *bs,
|
||||
+ BlockGraphInfo **p_info,
|
||||
+ Error **errp);
|
||||
|
||||
void bdrv_snapshot_dump(QEMUSnapshotInfo *sn);
|
||||
void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index 4cf2deeb6c..d703e0fb16 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -307,6 +307,41 @@
|
||||
'*backing-image': 'ImageInfo'
|
||||
} }
|
||||
|
||||
+##
|
||||
+# @BlockChildInfo:
|
||||
+#
|
||||
+# Information about all nodes in the block graph starting at some node,
|
||||
+# annotated with information about that node in relation to its parent.
|
||||
+#
|
||||
+# @name: Child name of the root node in the BlockGraphInfo struct, in its role
|
||||
+# as the child of some undescribed parent node
|
||||
+#
|
||||
+# @info: Block graph information starting at this node
|
||||
+#
|
||||
+# Since: 8.0
|
||||
+##
|
||||
+{ 'struct': 'BlockChildInfo',
|
||||
+ 'data': {
|
||||
+ 'name': 'str',
|
||||
+ 'info': 'BlockGraphInfo'
|
||||
+ } }
|
||||
+
|
||||
+##
|
||||
+# @BlockGraphInfo:
|
||||
+#
|
||||
+# Information about all nodes in a block (sub)graph in the form of BlockNodeInfo
|
||||
+# data.
|
||||
+# The base BlockNodeInfo struct contains the information for the (sub)graph's
|
||||
+# root node.
|
||||
+#
|
||||
+# @children: Array of links to this node's child nodes' information
|
||||
+#
|
||||
+# Since: 8.0
|
||||
+##
|
||||
+{ 'struct': 'BlockGraphInfo',
|
||||
+ 'base': 'BlockNodeInfo',
|
||||
+ 'data': { 'children': ['BlockChildInfo'] } }
|
||||
+
|
||||
##
|
||||
# @ImageCheck:
|
||||
#
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,197 @@
|
||||
From ae2c3df00d673d436fe4d8ec9103a3b76d7e6233 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Mon, 20 Jun 2022 18:26:58 +0200
|
||||
Subject: [PATCH 11/20] block/qapi: Let bdrv_query_image_info() recurse
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
|
||||
RH-Bugzilla: 1860292
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [6/12] 451a83fd682cd6dd6026c22974d18c2f12ee06e3 (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
There is no real reason why bdrv_query_image_info() should generally not
|
||||
recurse. The ImageInfo struct has a pointer to the backing image, so it
|
||||
should generally be filled, unless the caller explicitly opts out.
|
||||
|
||||
This moves the recursing code from bdrv_block_device_info() into
|
||||
bdrv_query_image_info().
|
||||
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20220620162704.80987-7-hreitz@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 5d8813593f3f673fc96eed199beb35690cc46f58)
|
||||
|
||||
Conflicts:
|
||||
block/qapi.c: Conflicts with
|
||||
54fde4ff0621c22b15cbaaa3c74301cc0dbd1c9e ("qapi block: Elide
|
||||
redundant has_FOO in generated C"), which dropped
|
||||
`has_backing_image`. Without that commit (and 44ea9d9be before it),
|
||||
we still need to set `has_backing_image` in
|
||||
`bdrv_query_image_info()`.
|
||||
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/qapi.c | 94 +++++++++++++++++++++++++++-----------------
|
||||
include/block/qapi.h | 2 +
|
||||
2 files changed, 59 insertions(+), 37 deletions(-)
|
||||
|
||||
diff --git a/block/qapi.c b/block/qapi.c
|
||||
index ad88bf9b38..5d0a8d2ce3 100644
|
||||
--- a/block/qapi.c
|
||||
+++ b/block/qapi.c
|
||||
@@ -47,8 +47,10 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
|
||||
Error **errp)
|
||||
{
|
||||
ImageInfo **p_image_info;
|
||||
+ ImageInfo *backing_info;
|
||||
BlockDriverState *bs0, *backing;
|
||||
BlockDeviceInfo *info;
|
||||
+ ERRP_GUARD();
|
||||
|
||||
if (!bs->drv) {
|
||||
error_setg(errp, "Block device %s is ejected", bs->node_name);
|
||||
@@ -149,38 +151,21 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
|
||||
bs0 = bs;
|
||||
p_image_info = &info->image;
|
||||
info->backing_file_depth = 0;
|
||||
- while (1) {
|
||||
- Error *local_err = NULL;
|
||||
- bdrv_query_image_info(bs0, p_image_info, &local_err);
|
||||
- if (local_err) {
|
||||
- error_propagate(errp, local_err);
|
||||
- qapi_free_BlockDeviceInfo(info);
|
||||
- return NULL;
|
||||
- }
|
||||
-
|
||||
- /* stop gathering data for flat output */
|
||||
- if (flat) {
|
||||
- break;
|
||||
- }
|
||||
|
||||
- if (bs0->drv && bdrv_filter_or_cow_child(bs0)) {
|
||||
- /*
|
||||
- * Put any filtered child here (for backwards compatibility to when
|
||||
- * we put bs0->backing here, which might be any filtered child).
|
||||
- */
|
||||
- info->backing_file_depth++;
|
||||
- bs0 = bdrv_filter_or_cow_bs(bs0);
|
||||
- (*p_image_info)->has_backing_image = true;
|
||||
- p_image_info = &((*p_image_info)->backing_image);
|
||||
- } else {
|
||||
- break;
|
||||
- }
|
||||
+ /*
|
||||
+ * Skip automatically inserted nodes that the user isn't aware of for
|
||||
+ * query-block (blk != NULL), but not for query-named-block-nodes
|
||||
+ */
|
||||
+ bdrv_query_image_info(bs0, p_image_info, flat, blk != NULL, errp);
|
||||
+ if (*errp) {
|
||||
+ qapi_free_BlockDeviceInfo(info);
|
||||
+ return NULL;
|
||||
+ }
|
||||
|
||||
- /* Skip automatically inserted nodes that the user isn't aware of for
|
||||
- * query-block (blk != NULL), but not for query-named-block-nodes */
|
||||
- if (blk) {
|
||||
- bs0 = bdrv_skip_implicit_filters(bs0);
|
||||
- }
|
||||
+ backing_info = info->image->backing_image;
|
||||
+ while (backing_info) {
|
||||
+ info->backing_file_depth++;
|
||||
+ backing_info = backing_info->backing_image;
|
||||
}
|
||||
|
||||
return info;
|
||||
@@ -363,19 +348,28 @@ void bdrv_query_block_node_info(BlockDriverState *bs,
|
||||
* bdrv_query_image_info:
|
||||
* @bs: block node to examine
|
||||
* @p_info: location to store image information
|
||||
+ * @flat: skip backing node information
|
||||
+ * @skip_implicit_filters: skip implicit filters in the backing chain
|
||||
* @errp: location to store error information
|
||||
*
|
||||
- * Store "flat" image information in @p_info.
|
||||
+ * Store image information in @p_info, potentially recursively covering the
|
||||
+ * backing chain.
|
||||
*
|
||||
- * "Flat" means it does *not* query backing image information,
|
||||
- * i.e. (*pinfo)->has_backing_image will be set to false and
|
||||
- * (*pinfo)->backing_image to NULL even when the image does in fact have
|
||||
- * a backing image.
|
||||
+ * If @flat is true, do not query backing image information, i.e.
|
||||
+ * (*p_info)->has_backing_image will be set to false and
|
||||
+ * (*p_info)->backing_image to NULL even when the image does in fact have a
|
||||
+ * backing image.
|
||||
+ *
|
||||
+ * If @skip_implicit_filters is true, implicit filter nodes in the backing chain
|
||||
+ * will be skipped when querying backing image information.
|
||||
+ * (@skip_implicit_filters is ignored when @flat is true.)
|
||||
*
|
||||
* @p_info will be set only on success. On error, store error in @errp.
|
||||
*/
|
||||
void bdrv_query_image_info(BlockDriverState *bs,
|
||||
ImageInfo **p_info,
|
||||
+ bool flat,
|
||||
+ bool skip_implicit_filters,
|
||||
Error **errp)
|
||||
{
|
||||
ImageInfo *info;
|
||||
@@ -384,11 +378,37 @@ void bdrv_query_image_info(BlockDriverState *bs,
|
||||
info = g_new0(ImageInfo, 1);
|
||||
bdrv_do_query_node_info(bs, qapi_ImageInfo_base(info), errp);
|
||||
if (*errp) {
|
||||
- qapi_free_ImageInfo(info);
|
||||
- return;
|
||||
+ goto fail;
|
||||
+ }
|
||||
+
|
||||
+ if (!flat) {
|
||||
+ BlockDriverState *backing;
|
||||
+
|
||||
+ /*
|
||||
+ * Use any filtered child here (for backwards compatibility to when
|
||||
+ * we always took bs->backing, which might be any filtered child).
|
||||
+ */
|
||||
+ backing = bdrv_filter_or_cow_bs(bs);
|
||||
+ if (skip_implicit_filters) {
|
||||
+ backing = bdrv_skip_implicit_filters(backing);
|
||||
+ }
|
||||
+
|
||||
+ if (backing) {
|
||||
+ bdrv_query_image_info(backing, &info->backing_image, false,
|
||||
+ skip_implicit_filters, errp);
|
||||
+ if (*errp) {
|
||||
+ goto fail;
|
||||
+ }
|
||||
+ info->has_backing_image = true;
|
||||
+ }
|
||||
}
|
||||
|
||||
*p_info = info;
|
||||
+ return;
|
||||
+
|
||||
+fail:
|
||||
+ assert(*errp);
|
||||
+ qapi_free_ImageInfo(info);
|
||||
}
|
||||
|
||||
/* @p_info will be set only on success. */
|
||||
diff --git a/include/block/qapi.h b/include/block/qapi.h
|
||||
index 22198dcd0c..2174bf8fa2 100644
|
||||
--- a/include/block/qapi.h
|
||||
+++ b/include/block/qapi.h
|
||||
@@ -40,6 +40,8 @@ void bdrv_query_block_node_info(BlockDriverState *bs,
|
||||
Error **errp);
|
||||
void bdrv_query_image_info(BlockDriverState *bs,
|
||||
ImageInfo **p_info,
|
||||
+ bool flat,
|
||||
+ bool skip_implicit_filters,
|
||||
Error **errp);
|
||||
|
||||
void bdrv_snapshot_dump(QEMUSnapshotInfo *sn);
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,99 @@
|
||||
From b952c8f1da6f8597736c0e040565830139369359 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Tue, 14 Feb 2023 18:16:21 +0100
|
||||
Subject: [PATCH] block: temporarily hold the new AioContext of bs_top in
|
||||
bdrv_append()
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 153: block: temporarily hold the new AioContext of bs_top in bdrv_append()
|
||||
RH-Bugzilla: 2168209
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [1/1] 5b190426d996e8c9f7a781bd97aee8d25756dbd3 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
bdrv_append() is called with bs_top AioContext held, but
|
||||
bdrv_attach_child_noperm() could change the AioContext of bs_top.
|
||||
|
||||
bdrv_replace_node_noperm() calls bdrv_drained_begin() starting from
|
||||
commit 2398747128 ("block: Don't poll in bdrv_replace_child_noperm()").
|
||||
bdrv_drained_begin() can call BDRV_POLL_WHILE that assumes the new lock
|
||||
is taken, so let's temporarily hold the new AioContext to prevent QEMU
|
||||
from failing in BDRV_POLL_WHILE when it tries to release the wrong
|
||||
AioContext.
|
||||
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2168209
|
||||
Reported-by: Aihua Liang <aliang@redhat.com>
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-Id: <20230214171621.11574-1-sgarzare@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 60d90bf43c169b9d1dbcb17ed794b7b02c6862b1)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block.c | 23 +++++++++++++++++++++++
|
||||
1 file changed, 23 insertions(+)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 0d78711416..9e1dcb9e47 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -5275,6 +5275,8 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
|
||||
* child.
|
||||
*
|
||||
* This function does not create any image files.
|
||||
+ *
|
||||
+ * The caller must hold the AioContext lock for @bs_top.
|
||||
*/
|
||||
int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
|
||||
Error **errp)
|
||||
@@ -5282,11 +5284,14 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
|
||||
int ret;
|
||||
BdrvChild *child;
|
||||
Transaction *tran = tran_new();
|
||||
+ AioContext *old_context, *new_context = NULL;
|
||||
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
assert(!bs_new->backing);
|
||||
|
||||
+ old_context = bdrv_get_aio_context(bs_top);
|
||||
+
|
||||
child = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
|
||||
&child_of_bds, bdrv_backing_role(bs_new),
|
||||
tran, errp);
|
||||
@@ -5295,6 +5300,19 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
|
||||
goto out;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * bdrv_attach_child_noperm could change the AioContext of bs_top.
|
||||
+ * bdrv_replace_node_noperm calls bdrv_drained_begin, so let's temporarily
|
||||
+ * hold the new AioContext, since bdrv_drained_begin calls BDRV_POLL_WHILE
|
||||
+ * that assumes the new lock is taken.
|
||||
+ */
|
||||
+ new_context = bdrv_get_aio_context(bs_top);
|
||||
+
|
||||
+ if (old_context != new_context) {
|
||||
+ aio_context_release(old_context);
|
||||
+ aio_context_acquire(new_context);
|
||||
+ }
|
||||
+
|
||||
ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp);
|
||||
if (ret < 0) {
|
||||
goto out;
|
||||
@@ -5306,6 +5324,11 @@ out:
|
||||
|
||||
bdrv_refresh_limits(bs_top, NULL, NULL);
|
||||
|
||||
+ if (new_context && old_context != new_context) {
|
||||
+ aio_context_release(new_context);
|
||||
+ aio_context_acquire(old_context);
|
||||
+ }
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,140 @@
|
||||
From d8caed018afb0f60f449e971398d2a8d6c2992e7 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Mon, 20 Jun 2022 18:26:55 +0200
|
||||
Subject: [PATCH 08/20] block/vmdk: Change extent info type
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
|
||||
RH-Bugzilla: 1860292
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [3/12] efe50a2797c679ce6bb5faa423047461a34e6792 (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
VMDK's implementation of .bdrv_get_specific_info() returns information
|
||||
about its extent files, ostensibly in the form of ImageInfo objects.
|
||||
However, it does not get this information through
|
||||
bdrv_query_image_info(), but fills only a select few fields with custom
|
||||
information that does not always match the fields' purposes.
|
||||
|
||||
For example, @format, which is supposed to be a block driver name, is
|
||||
filled with the extent type, e.g. SPARSE or FLAT.
|
||||
|
||||
In ImageInfo, @compressed shows whether the data that can be seen in the
|
||||
image is stored in compressed form or not. For example, a compressed
|
||||
qcow2 image will store compressed data in its data file, but when
|
||||
accessing the qcow2 node, you will see normal data. This is not how
|
||||
VMDK uses the @compressed field for its extent files: Instead, it
|
||||
signifies whether accessing the extent file will yield compressed data
|
||||
(which the VMDK driver then (de-)compresses).
|
||||
|
||||
Create a new structure to represent the extent information. This allows
|
||||
us to clarify the fields' meanings, and it clearly shows that these are
|
||||
not complete ImageInfo objects. (That is, if a user wants an extent
|
||||
file's ImageInfo object, they will need to query it separately, and will
|
||||
not get it from ImageInfoSpecificVmdk.extents.)
|
||||
|
||||
Note that this removes the last use of ['ImageInfo'] (i.e. an array of
|
||||
ImageInfo objects), so the QAPI generator will no longer generate
|
||||
ImageInfoList by default. However, we use it in qemu-img.c, so we need
|
||||
to create a dummy object to force the generate to create that type,
|
||||
similarly to DummyForceArrays in machine.json (introduced in commit
|
||||
9f08c8ec73878122ad4b061ed334f0437afaaa32 ("qapi: Lazy creation of array
|
||||
types")).
|
||||
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20220620162704.80987-4-hreitz@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 456e75171a85c19a5bfa202eefcbdc4ef1692f05)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/vmdk.c | 8 ++++----
|
||||
qapi/block-core.json | 38 +++++++++++++++++++++++++++++++++++++-
|
||||
2 files changed, 41 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/block/vmdk.c b/block/vmdk.c
|
||||
index 26376352b9..4435b9880b 100644
|
||||
--- a/block/vmdk.c
|
||||
+++ b/block/vmdk.c
|
||||
@@ -2901,12 +2901,12 @@ static int vmdk_has_zero_init(BlockDriverState *bs)
|
||||
return 1;
|
||||
}
|
||||
|
||||
-static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent)
|
||||
+static VmdkExtentInfo *vmdk_get_extent_info(VmdkExtent *extent)
|
||||
{
|
||||
- ImageInfo *info = g_new0(ImageInfo, 1);
|
||||
+ VmdkExtentInfo *info = g_new0(VmdkExtentInfo, 1);
|
||||
|
||||
bdrv_refresh_filename(extent->file->bs);
|
||||
- *info = (ImageInfo){
|
||||
+ *info = (VmdkExtentInfo){
|
||||
.filename = g_strdup(extent->file->bs->filename),
|
||||
.format = g_strdup(extent->type),
|
||||
.virtual_size = extent->sectors * BDRV_SECTOR_SIZE,
|
||||
@@ -2985,7 +2985,7 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs,
|
||||
int i;
|
||||
BDRVVmdkState *s = bs->opaque;
|
||||
ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1);
|
||||
- ImageInfoList **tail;
|
||||
+ VmdkExtentInfoList **tail;
|
||||
|
||||
*spec_info = (ImageInfoSpecific){
|
||||
.type = IMAGE_INFO_SPECIFIC_KIND_VMDK,
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index f5d822cbd6..4b9365167f 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -124,7 +124,33 @@
|
||||
'create-type': 'str',
|
||||
'cid': 'int',
|
||||
'parent-cid': 'int',
|
||||
- 'extents': ['ImageInfo']
|
||||
+ 'extents': ['VmdkExtentInfo']
|
||||
+ } }
|
||||
+
|
||||
+##
|
||||
+# @VmdkExtentInfo:
|
||||
+#
|
||||
+# Information about a VMDK extent file
|
||||
+#
|
||||
+# @filename: Name of the extent file
|
||||
+#
|
||||
+# @format: Extent type (e.g. FLAT or SPARSE)
|
||||
+#
|
||||
+# @virtual-size: Number of bytes covered by this extent
|
||||
+#
|
||||
+# @cluster-size: Cluster size in bytes (for non-flat extents)
|
||||
+#
|
||||
+# @compressed: Whether this extent contains compressed data
|
||||
+#
|
||||
+# Since: 8.0
|
||||
+##
|
||||
+{ 'struct': 'VmdkExtentInfo',
|
||||
+ 'data': {
|
||||
+ 'filename': 'str',
|
||||
+ 'format': 'str',
|
||||
+ 'virtual-size': 'int',
|
||||
+ '*cluster-size': 'int',
|
||||
+ '*compressed': 'bool'
|
||||
} }
|
||||
|
||||
##
|
||||
@@ -5754,3 +5780,13 @@
|
||||
'data': { 'device': 'str', '*id': 'str', '*name': 'str'},
|
||||
'returns': 'SnapshotInfo',
|
||||
'allow-preconfig': true }
|
||||
+
|
||||
+##
|
||||
+# @DummyBlockCoreForceArrays:
|
||||
+#
|
||||
+# Not used by QMP; hack to let us use ImageInfoList internally
|
||||
+#
|
||||
+# Since: 8.0
|
||||
+##
|
||||
+{ 'struct': 'DummyBlockCoreForceArrays',
|
||||
+ 'data': { 'unused-image-info': ['ImageInfo'] } }
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,41 +0,0 @@
|
||||
From 3a0e9bb88e82cc76ca5efc0595ce94b5dc34749e Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Mon, 25 Apr 2022 13:42:46 +0800
|
||||
Subject: [PATCH 1/2] configs/devices/aarch64-softmmu: Enable CONFIG_VIRTIO_MEM
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 80: Enable virtio-mem for aarch64
|
||||
RH-Commit: [1/1] 1afbd08da6d7c860da8d617a0a932d3660514878 (gwshan/qemu-rhel-9)
|
||||
RH-Bugzilla: 2044162
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2044162
|
||||
|
||||
This enables virtio-mem device on aarch64 since all needed commits
|
||||
are ready.
|
||||
|
||||
b1b87327a9 hw/arm/virt: Support for virtio-mem-pci
|
||||
1263615efe virtio-mem: Correct default THP size for ARM64
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
|
||||
index 5f6ee1de5b..187938573f 100644
|
||||
--- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
|
||||
+++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
|
||||
@@ -22,6 +22,7 @@ CONFIG_VFIO=y
|
||||
CONFIG_VFIO_PCI=y
|
||||
CONFIG_VIRTIO_MMIO=y
|
||||
CONFIG_VIRTIO_PCI=y
|
||||
+CONFIG_VIRTIO_MEM=y
|
||||
CONFIG_XIO3130=y
|
||||
CONFIG_NVDIMM=y
|
||||
CONFIG_ACPI_APEI=y
|
||||
--
|
||||
2.35.1
|
||||
|
@ -1,101 +0,0 @@
|
||||
From e3cb8849862a9f0dd20f2913d540336a037d43c7 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Tue, 10 May 2022 17:10:19 +0200
|
||||
Subject: [PATCH 07/16] coroutine: Rename qemu_coroutine_inc/dec_pool_size()
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 87: coroutine: Fix crashes due to too large pool batch size
|
||||
RH-Commit: [1/2] 6389b11f70225f221784c270d9b90c1ea43ca8fb (kmwolf/centos-qemu-kvm)
|
||||
RH-Bugzilla: 2079938
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
|
||||
It's true that these functions currently affect the batch size in which
|
||||
coroutines are reused (i.e. moved from the global release pool to the
|
||||
allocation pool of a specific thread), but this is a bug and will be
|
||||
fixed in a separate patch.
|
||||
|
||||
In fact, the comment in the header file already just promises that it
|
||||
influences the pool size, so reflect this in the name of the functions.
|
||||
As a nice side effect, the shorter function name makes some line
|
||||
wrapping unnecessary.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20220510151020.105528-2-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 98e3ab35054b946f7c2aba5408822532b0920b53)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
hw/block/virtio-blk.c | 6 ++----
|
||||
include/qemu/coroutine.h | 6 +++---
|
||||
util/qemu-coroutine.c | 4 ++--
|
||||
3 files changed, 7 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
|
||||
index 540c38f829..6a1cc41877 100644
|
||||
--- a/hw/block/virtio-blk.c
|
||||
+++ b/hw/block/virtio-blk.c
|
||||
@@ -1215,8 +1215,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
|
||||
for (i = 0; i < conf->num_queues; i++) {
|
||||
virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output);
|
||||
}
|
||||
- qemu_coroutine_increase_pool_batch_size(conf->num_queues * conf->queue_size
|
||||
- / 2);
|
||||
+ qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2);
|
||||
virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err);
|
||||
if (err != NULL) {
|
||||
error_propagate(errp, err);
|
||||
@@ -1253,8 +1252,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev)
|
||||
for (i = 0; i < conf->num_queues; i++) {
|
||||
virtio_del_queue(vdev, i);
|
||||
}
|
||||
- qemu_coroutine_decrease_pool_batch_size(conf->num_queues * conf->queue_size
|
||||
- / 2);
|
||||
+ qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2);
|
||||
qemu_del_vm_change_state_handler(s->change);
|
||||
blockdev_mark_auto_del(s->blk);
|
||||
virtio_cleanup(vdev);
|
||||
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
|
||||
index c828a95ee0..5b621d1295 100644
|
||||
--- a/include/qemu/coroutine.h
|
||||
+++ b/include/qemu/coroutine.h
|
||||
@@ -334,12 +334,12 @@ void coroutine_fn yield_until_fd_readable(int fd);
|
||||
/**
|
||||
* Increase coroutine pool size
|
||||
*/
|
||||
-void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size);
|
||||
+void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size);
|
||||
|
||||
/**
|
||||
- * Devcrease coroutine pool size
|
||||
+ * Decrease coroutine pool size
|
||||
*/
|
||||
-void qemu_coroutine_decrease_pool_batch_size(unsigned int additional_pool_size);
|
||||
+void qemu_coroutine_dec_pool_size(unsigned int additional_pool_size);
|
||||
|
||||
#include "qemu/lockable.h"
|
||||
|
||||
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
|
||||
index c03b2422ff..faca0ca97c 100644
|
||||
--- a/util/qemu-coroutine.c
|
||||
+++ b/util/qemu-coroutine.c
|
||||
@@ -205,12 +205,12 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co)
|
||||
return co->ctx;
|
||||
}
|
||||
|
||||
-void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size)
|
||||
+void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size)
|
||||
{
|
||||
qatomic_add(&pool_batch_size, additional_pool_size);
|
||||
}
|
||||
|
||||
-void qemu_coroutine_decrease_pool_batch_size(unsigned int removing_pool_size)
|
||||
+void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size)
|
||||
{
|
||||
qatomic_sub(&pool_batch_size, removing_pool_size);
|
||||
}
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,138 +0,0 @@
|
||||
From 345107bfd5537b51f34aaeb97d6161858bb6feee Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Tue, 10 May 2022 17:10:20 +0200
|
||||
Subject: [PATCH 08/16] coroutine: Revert to constant batch size
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 87: coroutine: Fix crashes due to too large pool batch size
|
||||
RH-Commit: [2/2] 8a8a39af873854cdc8333d1a70f3479a97c3ec7a (kmwolf/centos-qemu-kvm)
|
||||
RH-Bugzilla: 2079938
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
|
||||
Commit 4c41c69e changed the way the coroutine pool is sized because for
|
||||
virtio-blk devices with a large queue size and heavy I/O, it was just
|
||||
too small and caused coroutines to be deleted and reallocated soon
|
||||
afterwards. The change made the size dynamic based on the number of
|
||||
queues and the queue size of virtio-blk devices.
|
||||
|
||||
There are two important numbers here: Slightly simplified, when a
|
||||
coroutine terminates, it is generally stored in the global release pool
|
||||
up to a certain pool size, and if the pool is full, it is freed.
|
||||
Conversely, when allocating a new coroutine, the coroutines in the
|
||||
release pool are reused if the pool already has reached a certain
|
||||
minimum size (the batch size), otherwise we allocate new coroutines.
|
||||
|
||||
The problem after commit 4c41c69e is that it not only increases the
|
||||
maximum pool size (which is the intended effect), but also the batch
|
||||
size for reusing coroutines (which is a bug). It means that in cases
|
||||
with many devices and/or a large queue size (which defaults to the
|
||||
number of vcpus for virtio-blk-pci), many thousand coroutines could be
|
||||
sitting in the release pool without being reused.
|
||||
|
||||
This is not only a waste of memory and allocations, but it actually
|
||||
makes the QEMU process likely to hit the vm.max_map_count limit on Linux
|
||||
because each coroutine requires two mappings (its stack and the guard
|
||||
page for the stack), causing it to abort() in qemu_alloc_stack() because
|
||||
when the limit is hit, mprotect() starts to fail with ENOMEM.
|
||||
|
||||
In order to fix the problem, change the batch size back to 64 to avoid
|
||||
uselessly accumulating coroutines in the release pool, but keep the
|
||||
dynamic maximum pool size so that coroutines aren't freed too early
|
||||
in heavy I/O scenarios.
|
||||
|
||||
Note that this fix doesn't strictly make it impossible to hit the limit,
|
||||
but this would only happen if most of the coroutines are actually in use
|
||||
at the same time, not just sitting in a pool. This is the same behaviour
|
||||
as we already had before commit 4c41c69e. Fully preventing this would
|
||||
require allowing qemu_coroutine_create() to return an error, but it
|
||||
doesn't seem to be a scenario that people hit in practice.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2079938
|
||||
Fixes: 4c41c69e05fe28c0f95f8abd2ebf407e95a4f04b
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20220510151020.105528-3-kwolf@redhat.com>
|
||||
Tested-by: Hiroki Narukawa <hnarukaw@yahoo-corp.jp>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 9ec7a59b5aad4b736871c378d30f5ef5ec51cb52)
|
||||
|
||||
Conflicts:
|
||||
util/qemu-coroutine.c
|
||||
|
||||
Trivial merge conflict because we don't have commit ac387a08 downstream.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
util/qemu-coroutine.c | 22 ++++++++++++++--------
|
||||
1 file changed, 14 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
|
||||
index faca0ca97c..804f672e0a 100644
|
||||
--- a/util/qemu-coroutine.c
|
||||
+++ b/util/qemu-coroutine.c
|
||||
@@ -20,14 +20,20 @@
|
||||
#include "qemu/coroutine_int.h"
|
||||
#include "block/aio.h"
|
||||
|
||||
-/** Initial batch size is 64, and is increased on demand */
|
||||
+/**
|
||||
+ * The minimal batch size is always 64, coroutines from the release_pool are
|
||||
+ * reused as soon as there are 64 coroutines in it. The maximum pool size starts
|
||||
+ * with 64 and is increased on demand so that coroutines are not deleted even if
|
||||
+ * they are not immediately reused.
|
||||
+ */
|
||||
enum {
|
||||
- POOL_INITIAL_BATCH_SIZE = 64,
|
||||
+ POOL_MIN_BATCH_SIZE = 64,
|
||||
+ POOL_INITIAL_MAX_SIZE = 64,
|
||||
};
|
||||
|
||||
/** Free list to speed up creation */
|
||||
static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
|
||||
-static unsigned int pool_batch_size = POOL_INITIAL_BATCH_SIZE;
|
||||
+static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE;
|
||||
static unsigned int release_pool_size;
|
||||
static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
|
||||
static __thread unsigned int alloc_pool_size;
|
||||
@@ -51,7 +57,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
|
||||
if (CONFIG_COROUTINE_POOL) {
|
||||
co = QSLIST_FIRST(&alloc_pool);
|
||||
if (!co) {
|
||||
- if (release_pool_size > qatomic_read(&pool_batch_size)) {
|
||||
+ if (release_pool_size > POOL_MIN_BATCH_SIZE) {
|
||||
/* Slow path; a good place to register the destructor, too. */
|
||||
if (!coroutine_pool_cleanup_notifier.notify) {
|
||||
coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
|
||||
@@ -88,12 +94,12 @@ static void coroutine_delete(Coroutine *co)
|
||||
co->caller = NULL;
|
||||
|
||||
if (CONFIG_COROUTINE_POOL) {
|
||||
- if (release_pool_size < qatomic_read(&pool_batch_size) * 2) {
|
||||
+ if (release_pool_size < qatomic_read(&pool_max_size) * 2) {
|
||||
QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
|
||||
qatomic_inc(&release_pool_size);
|
||||
return;
|
||||
}
|
||||
- if (alloc_pool_size < qatomic_read(&pool_batch_size)) {
|
||||
+ if (alloc_pool_size < qatomic_read(&pool_max_size)) {
|
||||
QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
|
||||
alloc_pool_size++;
|
||||
return;
|
||||
@@ -207,10 +213,10 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co)
|
||||
|
||||
void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size)
|
||||
{
|
||||
- qatomic_add(&pool_batch_size, additional_pool_size);
|
||||
+ qatomic_add(&pool_max_size, additional_pool_size);
|
||||
}
|
||||
|
||||
void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size)
|
||||
{
|
||||
- qatomic_sub(&pool_batch_size, removing_pool_size);
|
||||
+ qatomic_sub(&pool_max_size, removing_pool_size);
|
||||
}
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,132 +0,0 @@
|
||||
From ffbd90e5f4eba620c7cd631b04f0ed31beb22ffa Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 17 May 2022 12:07:56 +0100
|
||||
Subject: [PATCH 1/6] coroutine-ucontext: use QEMU_DEFINE_STATIC_CO_TLS()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables
|
||||
RH-Commit: [1/3] a9782fe8e919c4bd317b7e8744c7ff57d898add3 (stefanha/centos-stream-qemu-kvm)
|
||||
RH-Bugzilla: 1952483
|
||||
RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
|
||||
Thread-Local Storage variables cannot be used directly from coroutine
|
||||
code because the compiler may optimize TLS variable accesses across
|
||||
qemu_coroutine_yield() calls. When the coroutine is re-entered from
|
||||
another thread the TLS variables from the old thread must no longer be
|
||||
used.
|
||||
|
||||
Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20220307153853.602859-2-stefanha@redhat.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 34145a307d849d0b6734d0222a7aa0bb9eef7407)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
util/coroutine-ucontext.c | 38 ++++++++++++++++++++++++--------------
|
||||
1 file changed, 24 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c
|
||||
index 904b375192..127d5a13c8 100644
|
||||
--- a/util/coroutine-ucontext.c
|
||||
+++ b/util/coroutine-ucontext.c
|
||||
@@ -25,6 +25,7 @@
|
||||
#include "qemu/osdep.h"
|
||||
#include <ucontext.h>
|
||||
#include "qemu/coroutine_int.h"
|
||||
+#include "qemu/coroutine-tls.h"
|
||||
|
||||
#ifdef CONFIG_VALGRIND_H
|
||||
#include <valgrind/valgrind.h>
|
||||
@@ -66,8 +67,8 @@ typedef struct {
|
||||
/**
|
||||
* Per-thread coroutine bookkeeping
|
||||
*/
|
||||
-static __thread CoroutineUContext leader;
|
||||
-static __thread Coroutine *current;
|
||||
+QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current);
|
||||
+QEMU_DEFINE_STATIC_CO_TLS(CoroutineUContext, leader);
|
||||
|
||||
/*
|
||||
* va_args to makecontext() must be type 'int', so passing
|
||||
@@ -97,14 +98,15 @@ static inline __attribute__((always_inline))
|
||||
void finish_switch_fiber(void *fake_stack_save)
|
||||
{
|
||||
#ifdef CONFIG_ASAN
|
||||
+ CoroutineUContext *leaderp = get_ptr_leader();
|
||||
const void *bottom_old;
|
||||
size_t size_old;
|
||||
|
||||
__sanitizer_finish_switch_fiber(fake_stack_save, &bottom_old, &size_old);
|
||||
|
||||
- if (!leader.stack) {
|
||||
- leader.stack = (void *)bottom_old;
|
||||
- leader.stack_size = size_old;
|
||||
+ if (!leaderp->stack) {
|
||||
+ leaderp->stack = (void *)bottom_old;
|
||||
+ leaderp->stack_size = size_old;
|
||||
}
|
||||
#endif
|
||||
#ifdef CONFIG_TSAN
|
||||
@@ -161,8 +163,10 @@ static void coroutine_trampoline(int i0, int i1)
|
||||
|
||||
/* Initialize longjmp environment and switch back the caller */
|
||||
if (!sigsetjmp(self->env, 0)) {
|
||||
- start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, leader.stack,
|
||||
- leader.stack_size);
|
||||
+ CoroutineUContext *leaderp = get_ptr_leader();
|
||||
+
|
||||
+ start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save,
|
||||
+ leaderp->stack, leaderp->stack_size);
|
||||
start_switch_fiber_tsan(&fake_stack_save, self, true); /* true=caller */
|
||||
siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
|
||||
}
|
||||
@@ -297,7 +301,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
|
||||
int ret;
|
||||
void *fake_stack_save = NULL;
|
||||
|
||||
- current = to_;
|
||||
+ set_current(to_);
|
||||
|
||||
ret = sigsetjmp(from->env, 0);
|
||||
if (ret == 0) {
|
||||
@@ -315,18 +319,24 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
|
||||
|
||||
Coroutine *qemu_coroutine_self(void)
|
||||
{
|
||||
- if (!current) {
|
||||
- current = &leader.base;
|
||||
+ Coroutine *self = get_current();
|
||||
+ CoroutineUContext *leaderp = get_ptr_leader();
|
||||
+
|
||||
+ if (!self) {
|
||||
+ self = &leaderp->base;
|
||||
+ set_current(self);
|
||||
}
|
||||
#ifdef CONFIG_TSAN
|
||||
- if (!leader.tsan_co_fiber) {
|
||||
- leader.tsan_co_fiber = __tsan_get_current_fiber();
|
||||
+ if (!leaderp->tsan_co_fiber) {
|
||||
+ leaderp->tsan_co_fiber = __tsan_get_current_fiber();
|
||||
}
|
||||
#endif
|
||||
- return current;
|
||||
+ return self;
|
||||
}
|
||||
|
||||
bool qemu_in_coroutine(void)
|
||||
{
|
||||
- return current && current->caller;
|
||||
+ Coroutine *self = get_current();
|
||||
+
|
||||
+ return self && self->caller;
|
||||
}
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,139 +0,0 @@
|
||||
From 9c2e55d25fec6ffb21e344513b7dbeed7e21f641 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 17 May 2022 12:08:04 +0100
|
||||
Subject: [PATCH 2/6] coroutine: use QEMU_DEFINE_STATIC_CO_TLS()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables
|
||||
RH-Commit: [2/3] 68a8847e406e2eace6ddc31b0c5676a60600d606 (stefanha/centos-stream-qemu-kvm)
|
||||
RH-Bugzilla: 1952483
|
||||
RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
|
||||
Thread-Local Storage variables cannot be used directly from coroutine
|
||||
code because the compiler may optimize TLS variable accesses across
|
||||
qemu_coroutine_yield() calls. When the coroutine is re-entered from
|
||||
another thread the TLS variables from the old thread must no longer be
|
||||
used.
|
||||
|
||||
Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables.
|
||||
The alloc_pool QSLIST needs a typedef so the return value of
|
||||
get_ptr_alloc_pool() can be stored in a local variable.
|
||||
|
||||
One example of why this code is necessary: a coroutine that yields
|
||||
before calling qemu_coroutine_create() to create another coroutine is
|
||||
affected by the TLS issue.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20220307153853.602859-3-stefanha@redhat.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit ac387a08a9c9f6b36757da912f0339c25f421f90)
|
||||
|
||||
Conflicts:
|
||||
- Context conflicts due to commit 5411171c3ef4 ("coroutine: Revert to
|
||||
constant batch size").
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
util/qemu-coroutine.c | 41 ++++++++++++++++++++++++-----------------
|
||||
1 file changed, 24 insertions(+), 17 deletions(-)
|
||||
|
||||
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
|
||||
index 804f672e0a..4a8bd63ef0 100644
|
||||
--- a/util/qemu-coroutine.c
|
||||
+++ b/util/qemu-coroutine.c
|
||||
@@ -18,6 +18,7 @@
|
||||
#include "qemu/atomic.h"
|
||||
#include "qemu/coroutine.h"
|
||||
#include "qemu/coroutine_int.h"
|
||||
+#include "qemu/coroutine-tls.h"
|
||||
#include "block/aio.h"
|
||||
|
||||
/**
|
||||
@@ -35,17 +36,20 @@ enum {
|
||||
static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
|
||||
static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE;
|
||||
static unsigned int release_pool_size;
|
||||
-static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
|
||||
-static __thread unsigned int alloc_pool_size;
|
||||
-static __thread Notifier coroutine_pool_cleanup_notifier;
|
||||
+
|
||||
+typedef QSLIST_HEAD(, Coroutine) CoroutineQSList;
|
||||
+QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool);
|
||||
+QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size);
|
||||
+QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier);
|
||||
|
||||
static void coroutine_pool_cleanup(Notifier *n, void *value)
|
||||
{
|
||||
Coroutine *co;
|
||||
Coroutine *tmp;
|
||||
+ CoroutineQSList *alloc_pool = get_ptr_alloc_pool();
|
||||
|
||||
- QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) {
|
||||
- QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
|
||||
+ QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) {
|
||||
+ QSLIST_REMOVE_HEAD(alloc_pool, pool_next);
|
||||
qemu_coroutine_delete(co);
|
||||
}
|
||||
}
|
||||
@@ -55,27 +59,30 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
|
||||
Coroutine *co = NULL;
|
||||
|
||||
if (CONFIG_COROUTINE_POOL) {
|
||||
- co = QSLIST_FIRST(&alloc_pool);
|
||||
+ CoroutineQSList *alloc_pool = get_ptr_alloc_pool();
|
||||
+
|
||||
+ co = QSLIST_FIRST(alloc_pool);
|
||||
if (!co) {
|
||||
if (release_pool_size > POOL_MIN_BATCH_SIZE) {
|
||||
/* Slow path; a good place to register the destructor, too. */
|
||||
- if (!coroutine_pool_cleanup_notifier.notify) {
|
||||
- coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
|
||||
- qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier);
|
||||
+ Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier();
|
||||
+ if (!notifier->notify) {
|
||||
+ notifier->notify = coroutine_pool_cleanup;
|
||||
+ qemu_thread_atexit_add(notifier);
|
||||
}
|
||||
|
||||
/* This is not exact; there could be a little skew between
|
||||
* release_pool_size and the actual size of release_pool. But
|
||||
* it is just a heuristic, it does not need to be perfect.
|
||||
*/
|
||||
- alloc_pool_size = qatomic_xchg(&release_pool_size, 0);
|
||||
- QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool);
|
||||
- co = QSLIST_FIRST(&alloc_pool);
|
||||
+ set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0));
|
||||
+ QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool);
|
||||
+ co = QSLIST_FIRST(alloc_pool);
|
||||
}
|
||||
}
|
||||
if (co) {
|
||||
- QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
|
||||
- alloc_pool_size--;
|
||||
+ QSLIST_REMOVE_HEAD(alloc_pool, pool_next);
|
||||
+ set_alloc_pool_size(get_alloc_pool_size() - 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -99,9 +106,9 @@ static void coroutine_delete(Coroutine *co)
|
||||
qatomic_inc(&release_pool_size);
|
||||
return;
|
||||
}
|
||||
- if (alloc_pool_size < qatomic_read(&pool_max_size)) {
|
||||
- QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
|
||||
- alloc_pool_size++;
|
||||
+ if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) {
|
||||
+ QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next);
|
||||
+ set_alloc_pool_size(get_alloc_pool_size() + 1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,99 +0,0 @@
|
||||
From 336581e6e9ace3f1ddd24ad0a258db9785f9b0ed Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 17 May 2022 12:08:12 +0100
|
||||
Subject: [PATCH 3/6] coroutine-win32: use QEMU_DEFINE_STATIC_CO_TLS()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables
|
||||
RH-Commit: [3/3] 55b35dfdae1bc7d6f614ac9f81a92f5c6431f713 (stefanha/centos-stream-qemu-kvm)
|
||||
RH-Bugzilla: 1952483
|
||||
RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
|
||||
Thread-Local Storage variables cannot be used directly from coroutine
|
||||
code because the compiler may optimize TLS variable accesses across
|
||||
qemu_coroutine_yield() calls. When the coroutine is re-entered from
|
||||
another thread the TLS variables from the old thread must no longer be
|
||||
used.
|
||||
|
||||
Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables.
|
||||
|
||||
I think coroutine-win32.c could get away with __thread because the
|
||||
variables are only used in situations where either the stale value is
|
||||
correct (current) or outside coroutine context (loading leader when
|
||||
current is NULL). Due to the difficulty of being sure that this is
|
||||
really safe in all scenarios it seems worth converting it anyway.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20220307153853.602859-4-stefanha@redhat.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit c1fe694357a328c807ae3cc6961c19e923448fcc)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
util/coroutine-win32.c | 18 +++++++++++++-----
|
||||
1 file changed, 13 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c
|
||||
index de6bd4fd3e..c02a62c896 100644
|
||||
--- a/util/coroutine-win32.c
|
||||
+++ b/util/coroutine-win32.c
|
||||
@@ -25,6 +25,7 @@
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu-common.h"
|
||||
#include "qemu/coroutine_int.h"
|
||||
+#include "qemu/coroutine-tls.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -34,8 +35,8 @@ typedef struct
|
||||
CoroutineAction action;
|
||||
} CoroutineWin32;
|
||||
|
||||
-static __thread CoroutineWin32 leader;
|
||||
-static __thread Coroutine *current;
|
||||
+QEMU_DEFINE_STATIC_CO_TLS(CoroutineWin32, leader);
|
||||
+QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current);
|
||||
|
||||
/* This function is marked noinline to prevent GCC from inlining it
|
||||
* into coroutine_trampoline(). If we allow it to do that then it
|
||||
@@ -52,7 +53,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
|
||||
CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_);
|
||||
CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_);
|
||||
|
||||
- current = to_;
|
||||
+ set_current(to_);
|
||||
|
||||
to->action = action;
|
||||
SwitchToFiber(to->fiber);
|
||||
@@ -89,14 +90,21 @@ void qemu_coroutine_delete(Coroutine *co_)
|
||||
|
||||
Coroutine *qemu_coroutine_self(void)
|
||||
{
|
||||
+ Coroutine *current = get_current();
|
||||
+
|
||||
if (!current) {
|
||||
- current = &leader.base;
|
||||
- leader.fiber = ConvertThreadToFiber(NULL);
|
||||
+ CoroutineWin32 *leader = get_ptr_leader();
|
||||
+
|
||||
+ current = &leader->base;
|
||||
+ set_current(current);
|
||||
+ leader->fiber = ConvertThreadToFiber(NULL);
|
||||
}
|
||||
return current;
|
||||
}
|
||||
|
||||
bool qemu_in_coroutine(void)
|
||||
{
|
||||
+ Coroutine *current = get_current();
|
||||
+
|
||||
return current && current->caller;
|
||||
}
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,127 @@
|
||||
From b886411a682b56bfe674f0a35d40c67c8e9dc87a Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 21 Feb 2023 16:22:17 -0500
|
||||
Subject: [PATCH 02/12] dma-helpers: prevent dma_blk_cb() vs dma_aio_cancel()
|
||||
race
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread
|
||||
RH-Bugzilla: 2155748
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
|
||||
RH-Commit: [2/3] eeeea43c25d8f4fa84591b05547fb77e4058abff (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
dma_blk_cb() only takes the AioContext lock around ->io_func(). That
|
||||
means the rest of dma_blk_cb() is not protected. In particular, the
|
||||
DMAAIOCB field accesses happen outside the lock.
|
||||
|
||||
There is a race when the main loop thread holds the AioContext lock and
|
||||
invokes scsi_device_purge_requests() -> bdrv_aio_cancel() ->
|
||||
dma_aio_cancel() while an IOThread executes dma_blk_cb(). The dbs->acb
|
||||
field determines how cancellation proceeds. If dma_aio_cancel() sees
|
||||
dbs->acb == NULL while dma_blk_cb() is still running, the request can be
|
||||
completed twice (-ECANCELED and the actual return value).
|
||||
|
||||
The following assertion can occur with virtio-scsi when an IOThread is
|
||||
used:
|
||||
|
||||
../hw/scsi/scsi-disk.c:368: scsi_dma_complete: Assertion `r->req.aiocb != NULL' failed.
|
||||
|
||||
Fix the race by holding the AioContext across dma_blk_cb(). Now
|
||||
dma_aio_cancel() under the AioContext lock will not see
|
||||
inconsistent/intermediate states.
|
||||
|
||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20230221212218.1378734-3-stefanha@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit abfcd2760b3e70727bbc0792221b8b98a733dc32)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
hw/scsi/scsi-disk.c | 4 +---
|
||||
softmmu/dma-helpers.c | 12 +++++++-----
|
||||
2 files changed, 8 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
|
||||
index 5327f93f4c..b12d8b0816 100644
|
||||
--- a/hw/scsi/scsi-disk.c
|
||||
+++ b/hw/scsi/scsi-disk.c
|
||||
@@ -354,13 +354,12 @@ done:
|
||||
scsi_req_unref(&r->req);
|
||||
}
|
||||
|
||||
+/* Called with AioContext lock held */
|
||||
static void scsi_dma_complete(void *opaque, int ret)
|
||||
{
|
||||
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
|
||||
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||
|
||||
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||
-
|
||||
assert(r->req.aiocb != NULL);
|
||||
r->req.aiocb = NULL;
|
||||
|
||||
@@ -370,7 +369,6 @@ static void scsi_dma_complete(void *opaque, int ret)
|
||||
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
|
||||
}
|
||||
scsi_dma_complete_noio(r, ret);
|
||||
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
|
||||
}
|
||||
|
||||
static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
|
||||
diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c
|
||||
index 7820fec54c..2463964805 100644
|
||||
--- a/softmmu/dma-helpers.c
|
||||
+++ b/softmmu/dma-helpers.c
|
||||
@@ -113,17 +113,19 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
|
||||
static void dma_blk_cb(void *opaque, int ret)
|
||||
{
|
||||
DMAAIOCB *dbs = (DMAAIOCB *)opaque;
|
||||
+ AioContext *ctx = dbs->ctx;
|
||||
dma_addr_t cur_addr, cur_len;
|
||||
void *mem;
|
||||
|
||||
trace_dma_blk_cb(dbs, ret);
|
||||
|
||||
+ aio_context_acquire(ctx);
|
||||
dbs->acb = NULL;
|
||||
dbs->offset += dbs->iov.size;
|
||||
|
||||
if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
|
||||
dma_complete(dbs, ret);
|
||||
- return;
|
||||
+ goto out;
|
||||
}
|
||||
dma_blk_unmap(dbs);
|
||||
|
||||
@@ -164,9 +166,9 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||
|
||||
if (dbs->iov.size == 0) {
|
||||
trace_dma_map_wait(dbs);
|
||||
- dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs);
|
||||
+ dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
|
||||
cpu_register_map_client(dbs->bh);
|
||||
- return;
|
||||
+ goto out;
|
||||
}
|
||||
|
||||
if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
|
||||
@@ -174,11 +176,11 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||
QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
|
||||
}
|
||||
|
||||
- aio_context_acquire(dbs->ctx);
|
||||
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
|
||||
dma_blk_cb, dbs, dbs->io_func_opaque);
|
||||
- aio_context_release(dbs->ctx);
|
||||
assert(dbs->acb);
|
||||
+out:
|
||||
+ aio_context_release(ctx);
|
||||
}
|
||||
|
||||
static void dma_aio_cancel(BlockAIOCB *acb)
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,61 @@
|
||||
From 67bbeb056f75adc6c964468d876531ab68366fe0 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:24:36 -0500
|
||||
Subject: [PATCH 07/12] edu: add smp_mb__after_rmw()
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2175660
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [4/9] 2ad6fd6cb33fde39d2d017d94c0dde2152ad70c4 (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
|
||||
|
||||
commit 2482aeea4195ad84cf3d4e5b15b28ec5b420ed5a
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Thu Mar 2 11:16:13 2023 +0100
|
||||
|
||||
edu: add smp_mb__after_rmw()
|
||||
|
||||
Ensure ordering between clearing the COMPUTING flag and checking
|
||||
IRQFACT, and between setting the IRQFACT flag and checking
|
||||
COMPUTING. This ensures that no wakeups are lost.
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
hw/misc/edu.c | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
diff --git a/hw/misc/edu.c b/hw/misc/edu.c
|
||||
index e935c418d4..a1f8bc77e7 100644
|
||||
--- a/hw/misc/edu.c
|
||||
+++ b/hw/misc/edu.c
|
||||
@@ -267,6 +267,8 @@ static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val,
|
||||
case 0x20:
|
||||
if (val & EDU_STATUS_IRQFACT) {
|
||||
qatomic_or(&edu->status, EDU_STATUS_IRQFACT);
|
||||
+ /* Order check of the COMPUTING flag after setting IRQFACT. */
|
||||
+ smp_mb__after_rmw();
|
||||
} else {
|
||||
qatomic_and(&edu->status, ~EDU_STATUS_IRQFACT);
|
||||
}
|
||||
@@ -349,6 +351,9 @@ static void *edu_fact_thread(void *opaque)
|
||||
qemu_mutex_unlock(&edu->thr_mutex);
|
||||
qatomic_and(&edu->status, ~EDU_STATUS_COMPUTING);
|
||||
|
||||
+ /* Clear COMPUTING flag before checking IRQFACT. */
|
||||
+ smp_mb__after_rmw();
|
||||
+
|
||||
if (qatomic_read(&edu->status) & EDU_STATUS_IRQFACT) {
|
||||
qemu_mutex_lock_iothread();
|
||||
edu_raise_irq(edu, FACT_IRQ);
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,179 +0,0 @@
|
||||
From 8a12049e97149056f61f7748d9869606d282d16e Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Wed, 11 May 2022 18:01:35 +0800
|
||||
Subject: [PATCH 06/16] hw/acpi/aml-build: Use existing CPU topology to build
|
||||
PPTT table
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology
|
||||
RH-Commit: [6/6] 53fa376531c204cf706cc1a7a0499019756106cb (gwshan/qemu-rhel-9)
|
||||
RH-Bugzilla: 2041823
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Andrew Jones <drjones@redhat.com>
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823
|
||||
|
||||
When the PPTT table is built, the CPU topology is re-calculated, but
|
||||
it's unecessary because the CPU topology has been populated in
|
||||
virt_possible_cpu_arch_ids() on arm/virt machine.
|
||||
|
||||
This reworks build_pptt() to avoid by reusing the existing IDs in
|
||||
ms->possible_cpus. Currently, the only user of build_pptt() is
|
||||
arm/virt machine.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Tested-by: Yanan Wang <wangyanan55@huawei.com>
|
||||
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
|
||||
Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Acked-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Message-id: 20220503140304.855514-7-gshan@redhat.com
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit ae9141d4a3265553503bf07d3574b40f84615a34)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/acpi/aml-build.c | 111 +++++++++++++++++++-------------------------
|
||||
1 file changed, 48 insertions(+), 63 deletions(-)
|
||||
|
||||
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
|
||||
index 4086879ebf..e6bfac95c7 100644
|
||||
--- a/hw/acpi/aml-build.c
|
||||
+++ b/hw/acpi/aml-build.c
|
||||
@@ -2002,86 +2002,71 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
|
||||
const char *oem_id, const char *oem_table_id)
|
||||
{
|
||||
MachineClass *mc = MACHINE_GET_CLASS(ms);
|
||||
- GQueue *list = g_queue_new();
|
||||
- guint pptt_start = table_data->len;
|
||||
- guint parent_offset;
|
||||
- guint length, i;
|
||||
- int uid = 0;
|
||||
- int socket;
|
||||
+ CPUArchIdList *cpus = ms->possible_cpus;
|
||||
+ int64_t socket_id = -1, cluster_id = -1, core_id = -1;
|
||||
+ uint32_t socket_offset = 0, cluster_offset = 0, core_offset = 0;
|
||||
+ uint32_t pptt_start = table_data->len;
|
||||
+ int n;
|
||||
AcpiTable table = { .sig = "PPTT", .rev = 2,
|
||||
.oem_id = oem_id, .oem_table_id = oem_table_id };
|
||||
|
||||
acpi_table_begin(&table, table_data);
|
||||
|
||||
- for (socket = 0; socket < ms->smp.sockets; socket++) {
|
||||
- g_queue_push_tail(list,
|
||||
- GUINT_TO_POINTER(table_data->len - pptt_start));
|
||||
- build_processor_hierarchy_node(
|
||||
- table_data,
|
||||
- /*
|
||||
- * Physical package - represents the boundary
|
||||
- * of a physical package
|
||||
- */
|
||||
- (1 << 0),
|
||||
- 0, socket, NULL, 0);
|
||||
- }
|
||||
-
|
||||
- if (mc->smp_props.clusters_supported) {
|
||||
- length = g_queue_get_length(list);
|
||||
- for (i = 0; i < length; i++) {
|
||||
- int cluster;
|
||||
-
|
||||
- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
|
||||
- for (cluster = 0; cluster < ms->smp.clusters; cluster++) {
|
||||
- g_queue_push_tail(list,
|
||||
- GUINT_TO_POINTER(table_data->len - pptt_start));
|
||||
- build_processor_hierarchy_node(
|
||||
- table_data,
|
||||
- (0 << 0), /* not a physical package */
|
||||
- parent_offset, cluster, NULL, 0);
|
||||
- }
|
||||
+ /*
|
||||
+ * This works with the assumption that cpus[n].props.*_id has been
|
||||
+ * sorted from top to down levels in mc->possible_cpu_arch_ids().
|
||||
+ * Otherwise, the unexpected and duplicated containers will be
|
||||
+ * created.
|
||||
+ */
|
||||
+ for (n = 0; n < cpus->len; n++) {
|
||||
+ if (cpus->cpus[n].props.socket_id != socket_id) {
|
||||
+ assert(cpus->cpus[n].props.socket_id > socket_id);
|
||||
+ socket_id = cpus->cpus[n].props.socket_id;
|
||||
+ cluster_id = -1;
|
||||
+ core_id = -1;
|
||||
+ socket_offset = table_data->len - pptt_start;
|
||||
+ build_processor_hierarchy_node(table_data,
|
||||
+ (1 << 0), /* Physical package */
|
||||
+ 0, socket_id, NULL, 0);
|
||||
}
|
||||
- }
|
||||
|
||||
- length = g_queue_get_length(list);
|
||||
- for (i = 0; i < length; i++) {
|
||||
- int core;
|
||||
-
|
||||
- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
|
||||
- for (core = 0; core < ms->smp.cores; core++) {
|
||||
- if (ms->smp.threads > 1) {
|
||||
- g_queue_push_tail(list,
|
||||
- GUINT_TO_POINTER(table_data->len - pptt_start));
|
||||
- build_processor_hierarchy_node(
|
||||
- table_data,
|
||||
- (0 << 0), /* not a physical package */
|
||||
- parent_offset, core, NULL, 0);
|
||||
- } else {
|
||||
- build_processor_hierarchy_node(
|
||||
- table_data,
|
||||
- (1 << 1) | /* ACPI Processor ID valid */
|
||||
- (1 << 3), /* Node is a Leaf */
|
||||
- parent_offset, uid++, NULL, 0);
|
||||
+ if (mc->smp_props.clusters_supported) {
|
||||
+ if (cpus->cpus[n].props.cluster_id != cluster_id) {
|
||||
+ assert(cpus->cpus[n].props.cluster_id > cluster_id);
|
||||
+ cluster_id = cpus->cpus[n].props.cluster_id;
|
||||
+ core_id = -1;
|
||||
+ cluster_offset = table_data->len - pptt_start;
|
||||
+ build_processor_hierarchy_node(table_data,
|
||||
+ (0 << 0), /* Not a physical package */
|
||||
+ socket_offset, cluster_id, NULL, 0);
|
||||
}
|
||||
+ } else {
|
||||
+ cluster_offset = socket_offset;
|
||||
}
|
||||
- }
|
||||
|
||||
- length = g_queue_get_length(list);
|
||||
- for (i = 0; i < length; i++) {
|
||||
- int thread;
|
||||
+ if (ms->smp.threads == 1) {
|
||||
+ build_processor_hierarchy_node(table_data,
|
||||
+ (1 << 1) | /* ACPI Processor ID valid */
|
||||
+ (1 << 3), /* Node is a Leaf */
|
||||
+ cluster_offset, n, NULL, 0);
|
||||
+ } else {
|
||||
+ if (cpus->cpus[n].props.core_id != core_id) {
|
||||
+ assert(cpus->cpus[n].props.core_id > core_id);
|
||||
+ core_id = cpus->cpus[n].props.core_id;
|
||||
+ core_offset = table_data->len - pptt_start;
|
||||
+ build_processor_hierarchy_node(table_data,
|
||||
+ (0 << 0), /* Not a physical package */
|
||||
+ cluster_offset, core_id, NULL, 0);
|
||||
+ }
|
||||
|
||||
- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
|
||||
- for (thread = 0; thread < ms->smp.threads; thread++) {
|
||||
- build_processor_hierarchy_node(
|
||||
- table_data,
|
||||
+ build_processor_hierarchy_node(table_data,
|
||||
(1 << 1) | /* ACPI Processor ID valid */
|
||||
(1 << 2) | /* Processor is a Thread */
|
||||
(1 << 3), /* Node is a Leaf */
|
||||
- parent_offset, uid++, NULL, 0);
|
||||
+ core_offset, n, NULL, 0);
|
||||
}
|
||||
}
|
||||
|
||||
- g_queue_free(list);
|
||||
acpi_table_end(linker, &table);
|
||||
}
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,169 @@
|
||||
From 4ab2aff624908e49b099f00609875f4d03e9e1ec Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Wed, 21 Dec 2022 08:48:45 +0800
|
||||
Subject: [PATCH 6/8] hw/arm/virt: Add 'compact-highmem' property
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
|
||||
RH-Bugzilla: 2113840
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [6/8] 781506f3445493f05b511547370b6d88ef092457
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
|
||||
|
||||
After the improvement to high memory region address assignment is
|
||||
applied, the memory layout can be changed, introducing possible
|
||||
migration breakage. For example, VIRT_HIGH_PCIE_MMIO memory region
|
||||
is disabled or enabled when the optimization is applied or not, with
|
||||
the following configuration. The configuration is only achievable by
|
||||
modifying the source code until more properties are added to allow
|
||||
users selectively disable those high memory regions.
|
||||
|
||||
pa_bits = 40;
|
||||
vms->highmem_redists = false;
|
||||
vms->highmem_ecam = false;
|
||||
vms->highmem_mmio = true;
|
||||
|
||||
# qemu-system-aarch64 -accel kvm -cpu host \
|
||||
-machine virt-7.2,compact-highmem={on, off} \
|
||||
-m 4G,maxmem=511G -monitor stdio
|
||||
|
||||
Region compact-highmem=off compact-highmem=on
|
||||
----------------------------------------------------------------
|
||||
MEM [1GB 512GB] [1GB 512GB]
|
||||
HIGH_GIC_REDISTS2 [512GB 512GB+64MB] [disabled]
|
||||
HIGH_PCIE_ECAM [512GB+256MB 512GB+512MB] [disabled]
|
||||
HIGH_PCIE_MMIO [disabled] [512GB 1TB]
|
||||
|
||||
In order to keep backwords compatibility, we need to disable the
|
||||
optimization on machine, which is virt-7.1 or ealier than it. It
|
||||
means the optimization is enabled by default from virt-7.2. Besides,
|
||||
'compact-highmem' property is added so that the optimization can be
|
||||
explicitly enabled or disabled on all machine types by users.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Reviewed-by: Marc Zyngier <maz@kernel.org>
|
||||
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
Message-id: 20221029224307.138822-7-gshan@redhat.com
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit f40408a9fe5d1db70a75a33d2b26c8af8a5d57b0)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Conflicts:
|
||||
hw/arm/virt.c
|
||||
Comment out the handlers of property 'compact-highmem' since
|
||||
the property isn't exposed.
|
||||
---
|
||||
docs/system/arm/virt.rst | 4 ++++
|
||||
hw/arm/virt.c | 34 ++++++++++++++++++++++++++++++++++
|
||||
include/hw/arm/virt.h | 1 +
|
||||
3 files changed, 39 insertions(+)
|
||||
|
||||
diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
|
||||
index 20442ea2c1..4454706392 100644
|
||||
--- a/docs/system/arm/virt.rst
|
||||
+++ b/docs/system/arm/virt.rst
|
||||
@@ -94,6 +94,10 @@ highmem
|
||||
address space above 32 bits. The default is ``on`` for machine types
|
||||
later than ``virt-2.12``.
|
||||
|
||||
+compact-highmem
|
||||
+ Set ``on``/``off`` to enable/disable the compact layout for high memory regions.
|
||||
+ The default is ``on`` for machine types later than ``virt-7.2``.
|
||||
+
|
||||
gic-version
|
||||
Specify the version of the Generic Interrupt Controller (GIC) to provide.
|
||||
Valid values are:
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 6896e0ca0f..6087511ae9 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -216,6 +216,12 @@ static const MemMapEntry base_memmap[] = {
|
||||
* Note the extended_memmap is sized so that it eventually also includes the
|
||||
* base_memmap entries (VIRT_HIGH_GIC_REDIST2 index is greater than the last
|
||||
* index of base_memmap).
|
||||
+ *
|
||||
+ * The memory map for these Highmem IO Regions can be in legacy or compact
|
||||
+ * layout, depending on 'compact-highmem' property. With legacy layout, the
|
||||
+ * PA space for one specific region is always reserved, even if the region
|
||||
+ * has been disabled or doesn't fit into the PA space. However, the PA space
|
||||
+ * for the region won't be reserved in these circumstances with compact layout.
|
||||
*/
|
||||
static MemMapEntry extended_memmap[] = {
|
||||
/* Additional 64 MB redist region (can contain up to 512 redistributors) */
|
||||
@@ -2400,6 +2406,22 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp)
|
||||
vms->highmem = value;
|
||||
}
|
||||
|
||||
+#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
+static bool virt_get_compact_highmem(Object *obj, Error **errp)
|
||||
+{
|
||||
+ VirtMachineState *vms = VIRT_MACHINE(obj);
|
||||
+
|
||||
+ return vms->highmem_compact;
|
||||
+}
|
||||
+
|
||||
+static void virt_set_compact_highmem(Object *obj, bool value, Error **errp)
|
||||
+{
|
||||
+ VirtMachineState *vms = VIRT_MACHINE(obj);
|
||||
+
|
||||
+ vms->highmem_compact = value;
|
||||
+}
|
||||
+#endif /* disabled for RHEL */
|
||||
+
|
||||
static bool virt_get_its(Object *obj, Error **errp)
|
||||
{
|
||||
VirtMachineState *vms = VIRT_MACHINE(obj);
|
||||
@@ -3023,6 +3045,13 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
|
||||
"Set on/off to enable/disable using "
|
||||
"physical address space above 32 bits");
|
||||
|
||||
+ object_class_property_add_bool(oc, "compact-highmem",
|
||||
+ virt_get_compact_highmem,
|
||||
+ virt_set_compact_highmem);
|
||||
+ object_class_property_set_description(oc, "compact-highmem",
|
||||
+ "Set on/off to enable/disable compact "
|
||||
+ "layout for high memory regions");
|
||||
+
|
||||
object_class_property_add_str(oc, "gic-version", virt_get_gic_version,
|
||||
virt_set_gic_version);
|
||||
object_class_property_set_description(oc, "gic-version",
|
||||
@@ -3107,6 +3136,7 @@ static void virt_instance_init(Object *obj)
|
||||
|
||||
/* High memory is enabled by default */
|
||||
vms->highmem = true;
|
||||
+ vms->highmem_compact = !vmc->no_highmem_compact;
|
||||
vms->gic_version = VIRT_GIC_VERSION_NOSEL;
|
||||
|
||||
vms->highmem_ecam = !vmc->no_highmem_ecam;
|
||||
@@ -3176,8 +3206,12 @@ DEFINE_VIRT_MACHINE_AS_LATEST(7, 2)
|
||||
|
||||
static void virt_machine_7_1_options(MachineClass *mc)
|
||||
{
|
||||
+ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
|
||||
+
|
||||
virt_machine_7_2_options(mc);
|
||||
compat_props_add(mc->compat_props, hw_compat_7_1, hw_compat_7_1_len);
|
||||
+ /* Compact layout for high memory regions was introduced with 7.2 */
|
||||
+ vmc->no_highmem_compact = true;
|
||||
}
|
||||
DEFINE_VIRT_MACHINE(7, 1)
|
||||
|
||||
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
|
||||
index 15bd291311..85e7d61868 100644
|
||||
--- a/include/hw/arm/virt.h
|
||||
+++ b/include/hw/arm/virt.h
|
||||
@@ -125,6 +125,7 @@ struct VirtMachineClass {
|
||||
bool no_pmu;
|
||||
bool claim_edge_triggered_timers;
|
||||
bool smbios_old_sys_ver;
|
||||
+ bool no_highmem_compact;
|
||||
bool no_highmem_ecam;
|
||||
bool no_ged; /* Machines < 4.2 have no support for ACPI GED device */
|
||||
bool kvm_no_adjvtime;
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,179 @@
|
||||
From 30e86a7c4fbcdc95b74bcb2a15745cb221783091 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Wed, 21 Dec 2022 08:48:45 +0800
|
||||
Subject: [PATCH 7/8] hw/arm/virt: Add properties to disable high memory
|
||||
regions
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
|
||||
RH-Bugzilla: 2113840
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [7/8] 16f8762393b447a590b31c9e4d8d3c58c6bc9fa8
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
|
||||
|
||||
The 3 high memory regions are usually enabled by default, but they may
|
||||
be not used. For example, VIRT_HIGH_GIC_REDIST2 isn't needed by GICv2.
|
||||
This leads to waste in the PA space.
|
||||
|
||||
Add properties ("highmem-redists", "highmem-ecam", "highmem-mmio") to
|
||||
allow users selectively disable them if needed. After that, the high
|
||||
memory region for GICv3 or GICv4 redistributor can be disabled by user,
|
||||
the number of maximal supported CPUs needs to be calculated based on
|
||||
'vms->highmem_redists'. The follow-up error message is also improved
|
||||
to indicate if the high memory region for GICv3 and GICv4 has been
|
||||
enabled or not.
|
||||
|
||||
Suggested-by: Marc Zyngier <maz@kernel.org>
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Reviewed-by: Marc Zyngier <maz@kernel.org>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Message-id: 20221029224307.138822-8-gshan@redhat.com
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit 6a48c64eec355ab1aff694eb4522d07a8e461368)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Conflicts:
|
||||
hw/arm/virt.c
|
||||
Comment out the handlers of the property 'highmem-redists',
|
||||
'highmem-ecam' and 'highmem-mmio' since they aren't exposed.
|
||||
---
|
||||
docs/system/arm/virt.rst | 13 +++++++
|
||||
hw/arm/virt.c | 75 ++++++++++++++++++++++++++++++++++++++--
|
||||
2 files changed, 86 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
|
||||
index 4454706392..188a4f211f 100644
|
||||
--- a/docs/system/arm/virt.rst
|
||||
+++ b/docs/system/arm/virt.rst
|
||||
@@ -98,6 +98,19 @@ compact-highmem
|
||||
Set ``on``/``off`` to enable/disable the compact layout for high memory regions.
|
||||
The default is ``on`` for machine types later than ``virt-7.2``.
|
||||
|
||||
+highmem-redists
|
||||
+ Set ``on``/``off`` to enable/disable the high memory region for GICv3 or
|
||||
+ GICv4 redistributor. The default is ``on``. Setting this to ``off`` will
|
||||
+ limit the maximum number of CPUs when GICv3 or GICv4 is used.
|
||||
+
|
||||
+highmem-ecam
|
||||
+ Set ``on``/``off`` to enable/disable the high memory region for PCI ECAM.
|
||||
+ The default is ``on`` for machine types later than ``virt-3.0``.
|
||||
+
|
||||
+highmem-mmio
|
||||
+ Set ``on``/``off`` to enable/disable the high memory region for PCI MMIO.
|
||||
+ The default is ``on``.
|
||||
+
|
||||
gic-version
|
||||
Specify the version of the Generic Interrupt Controller (GIC) to provide.
|
||||
Valid values are:
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 6087511ae9..304fa0d6e7 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -2142,14 +2142,20 @@ static void machvirt_init(MachineState *machine)
|
||||
if (vms->gic_version == VIRT_GIC_VERSION_2) {
|
||||
virt_max_cpus = GIC_NCPU;
|
||||
} else {
|
||||
- virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST) +
|
||||
- virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2);
|
||||
+ virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST);
|
||||
+ if (vms->highmem_redists) {
|
||||
+ virt_max_cpus += virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2);
|
||||
+ }
|
||||
}
|
||||
|
||||
if (max_cpus > virt_max_cpus) {
|
||||
error_report("Number of SMP CPUs requested (%d) exceeds max CPUs "
|
||||
"supported by machine 'mach-virt' (%d)",
|
||||
max_cpus, virt_max_cpus);
|
||||
+ if (vms->gic_version != VIRT_GIC_VERSION_2 && !vms->highmem_redists) {
|
||||
+ error_printf("Try 'highmem-redists=on' for more CPUs\n");
|
||||
+ }
|
||||
+
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@@ -2420,6 +2426,49 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp)
|
||||
|
||||
vms->highmem_compact = value;
|
||||
}
|
||||
+
|
||||
+static bool virt_get_highmem_redists(Object *obj, Error **errp)
|
||||
+{
|
||||
+ VirtMachineState *vms = VIRT_MACHINE(obj);
|
||||
+
|
||||
+ return vms->highmem_redists;
|
||||
+}
|
||||
+
|
||||
+static void virt_set_highmem_redists(Object *obj, bool value, Error **errp)
|
||||
+{
|
||||
+ VirtMachineState *vms = VIRT_MACHINE(obj);
|
||||
+
|
||||
+ vms->highmem_redists = value;
|
||||
+}
|
||||
+
|
||||
+static bool virt_get_highmem_ecam(Object *obj, Error **errp)
|
||||
+{
|
||||
+ VirtMachineState *vms = VIRT_MACHINE(obj);
|
||||
+
|
||||
+ return vms->highmem_ecam;
|
||||
+}
|
||||
+
|
||||
+static void virt_set_highmem_ecam(Object *obj, bool value, Error **errp)
|
||||
+{
|
||||
+ VirtMachineState *vms = VIRT_MACHINE(obj);
|
||||
+
|
||||
+ vms->highmem_ecam = value;
|
||||
+}
|
||||
+
|
||||
+static bool virt_get_highmem_mmio(Object *obj, Error **errp)
|
||||
+{
|
||||
+ VirtMachineState *vms = VIRT_MACHINE(obj);
|
||||
+
|
||||
+ return vms->highmem_mmio;
|
||||
+}
|
||||
+
|
||||
+static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp)
|
||||
+{
|
||||
+ VirtMachineState *vms = VIRT_MACHINE(obj);
|
||||
+
|
||||
+ vms->highmem_mmio = value;
|
||||
+}
|
||||
+
|
||||
#endif /* disabled for RHEL */
|
||||
|
||||
static bool virt_get_its(Object *obj, Error **errp)
|
||||
@@ -3052,6 +3101,28 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
|
||||
"Set on/off to enable/disable compact "
|
||||
"layout for high memory regions");
|
||||
|
||||
+ object_class_property_add_bool(oc, "highmem-redists",
|
||||
+ virt_get_highmem_redists,
|
||||
+ virt_set_highmem_redists);
|
||||
+ object_class_property_set_description(oc, "highmem-redists",
|
||||
+ "Set on/off to enable/disable high "
|
||||
+ "memory region for GICv3 or GICv4 "
|
||||
+ "redistributor");
|
||||
+
|
||||
+ object_class_property_add_bool(oc, "highmem-ecam",
|
||||
+ virt_get_highmem_ecam,
|
||||
+ virt_set_highmem_ecam);
|
||||
+ object_class_property_set_description(oc, "highmem-ecam",
|
||||
+ "Set on/off to enable/disable high "
|
||||
+ "memory region for PCI ECAM");
|
||||
+
|
||||
+ object_class_property_add_bool(oc, "highmem-mmio",
|
||||
+ virt_get_highmem_mmio,
|
||||
+ virt_set_highmem_mmio);
|
||||
+ object_class_property_set_description(oc, "highmem-mmio",
|
||||
+ "Set on/off to enable/disable high "
|
||||
+ "memory region for PCI MMIO");
|
||||
+
|
||||
object_class_property_add_str(oc, "gic-version", virt_get_gic_version,
|
||||
virt_set_gic_version);
|
||||
object_class_property_set_description(oc, "gic-version",
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,74 +0,0 @@
|
||||
From 3b05d3464945295112b5d02d142422f524a52054 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Wed, 11 May 2022 18:01:35 +0800
|
||||
Subject: [PATCH 03/16] hw/arm/virt: Consider SMP configuration in CPU topology
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology
|
||||
RH-Commit: [3/6] 7125b41f038c2b1cb33377d0ef1222f1ea42b648 (gwshan/qemu-rhel-9)
|
||||
RH-Bugzilla: 2041823
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Andrew Jones <drjones@redhat.com>
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823
|
||||
|
||||
Currently, the SMP configuration isn't considered when the CPU
|
||||
topology is populated. In this case, it's impossible to provide
|
||||
the default CPU-to-NUMA mapping or association based on the socket
|
||||
ID of the given CPU.
|
||||
|
||||
This takes account of SMP configuration when the CPU topology
|
||||
is populated. The die ID for the given CPU isn't assigned since
|
||||
it's not supported on arm/virt machine. Besides, the used SMP
|
||||
configuration in qtest/numa-test/aarch64_numa_cpu() is corrcted
|
||||
to avoid testing failure
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
|
||||
Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Message-id: 20220503140304.855514-4-gshan@redhat.com
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit c9ec4cb5e4936f980889e717524e73896b0200ed)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 15 ++++++++++++++-
|
||||
1 file changed, 14 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 8be12e121d..a87c8d396a 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -2553,6 +2553,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
|
||||
int n;
|
||||
unsigned int max_cpus = ms->smp.max_cpus;
|
||||
VirtMachineState *vms = VIRT_MACHINE(ms);
|
||||
+ MachineClass *mc = MACHINE_GET_CLASS(vms);
|
||||
|
||||
if (ms->possible_cpus) {
|
||||
assert(ms->possible_cpus->len == max_cpus);
|
||||
@@ -2566,8 +2567,20 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
|
||||
ms->possible_cpus->cpus[n].type = ms->cpu_type;
|
||||
ms->possible_cpus->cpus[n].arch_id =
|
||||
virt_cpu_mp_affinity(vms, n);
|
||||
+
|
||||
+ assert(!mc->smp_props.dies_supported);
|
||||
+ ms->possible_cpus->cpus[n].props.has_socket_id = true;
|
||||
+ ms->possible_cpus->cpus[n].props.socket_id =
|
||||
+ n / (ms->smp.clusters * ms->smp.cores * ms->smp.threads);
|
||||
+ ms->possible_cpus->cpus[n].props.has_cluster_id = true;
|
||||
+ ms->possible_cpus->cpus[n].props.cluster_id =
|
||||
+ (n / (ms->smp.cores * ms->smp.threads)) % ms->smp.clusters;
|
||||
+ ms->possible_cpus->cpus[n].props.has_core_id = true;
|
||||
+ ms->possible_cpus->cpus[n].props.core_id =
|
||||
+ (n / ms->smp.threads) % ms->smp.cores;
|
||||
ms->possible_cpus->cpus[n].props.has_thread_id = true;
|
||||
- ms->possible_cpus->cpus[n].props.thread_id = n;
|
||||
+ ms->possible_cpus->cpus[n].props.thread_id =
|
||||
+ n % ms->smp.threads;
|
||||
}
|
||||
return ms->possible_cpus;
|
||||
}
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,51 @@
|
||||
From 969ea1ff46b52c5fe6d87f2eeb1625871a2dfb2a Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Wed, 21 Dec 2022 08:48:45 +0800
|
||||
Subject: [PATCH 8/8] hw/arm/virt: Enable compat high memory region address
|
||||
assignment for 9.2.0 machine
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
|
||||
RH-Bugzilla: 2113840
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [8/8] beda1791c0c35dce5c669efd47685302b8468032
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
|
||||
Upstream: RHEL only
|
||||
|
||||
The compact high memory region address assignment is enabled for 9.2.0,
|
||||
but it's kept as disabled for 9.0.0, to keep the backwards compatibility
|
||||
on 9.0.0. Note that these newly added properties ('compact-highmem',
|
||||
'highmem-redists', 'highmem-ecam', and 'highmem-mmio') in the upstream
|
||||
aren't exposed for the downstream.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 304fa0d6e7..e41c0b462c 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3581,6 +3581,7 @@ static void rhel_virt_instance_init(Object *obj)
|
||||
|
||||
/* High memory is enabled by default */
|
||||
vms->highmem = true;
|
||||
+ vms->highmem_compact = !vmc->no_highmem_compact;
|
||||
vms->gic_version = VIRT_GIC_VERSION_NOSEL;
|
||||
|
||||
vms->highmem_ecam = !vmc->no_highmem_ecam;
|
||||
@@ -3659,5 +3660,7 @@ static void rhel900_virt_options(MachineClass *mc)
|
||||
|
||||
/* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */
|
||||
vmc->no_tcg_lpa2 = true;
|
||||
+ /* Compact layout for high memory regions was introduced with 9.2.0 */
|
||||
+ vmc->no_highmem_compact = true;
|
||||
}
|
||||
DEFINE_RHEL_MACHINE(9, 0, 0)
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,88 +0,0 @@
|
||||
From 14e49ad3b98f01c1ad6fe456469d40a96a43dc3c Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Wed, 11 May 2022 18:01:35 +0800
|
||||
Subject: [PATCH 05/16] hw/arm/virt: Fix CPU's default NUMA node ID
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology
|
||||
RH-Commit: [5/6] 5336f62bc0c53c0417db1d71ef89544907bc28c0 (gwshan/qemu-rhel-9)
|
||||
RH-Bugzilla: 2041823
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Andrew Jones <drjones@redhat.com>
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823
|
||||
|
||||
When CPU-to-NUMA association isn't explicitly provided by users,
|
||||
the default one is given by mc->get_default_cpu_node_id(). However,
|
||||
the CPU topology isn't fully considered in the default association
|
||||
and this causes CPU topology broken warnings on booting Linux guest.
|
||||
|
||||
For example, the following warning messages are observed when the
|
||||
Linux guest is booted with the following command lines.
|
||||
|
||||
/home/gavin/sandbox/qemu.main/build/qemu-system-aarch64 \
|
||||
-accel kvm -machine virt,gic-version=host \
|
||||
-cpu host \
|
||||
-smp 6,sockets=2,cores=3,threads=1 \
|
||||
-m 1024M,slots=16,maxmem=64G \
|
||||
-object memory-backend-ram,id=mem0,size=128M \
|
||||
-object memory-backend-ram,id=mem1,size=128M \
|
||||
-object memory-backend-ram,id=mem2,size=128M \
|
||||
-object memory-backend-ram,id=mem3,size=128M \
|
||||
-object memory-backend-ram,id=mem4,size=128M \
|
||||
-object memory-backend-ram,id=mem4,size=384M \
|
||||
-numa node,nodeid=0,memdev=mem0 \
|
||||
-numa node,nodeid=1,memdev=mem1 \
|
||||
-numa node,nodeid=2,memdev=mem2 \
|
||||
-numa node,nodeid=3,memdev=mem3 \
|
||||
-numa node,nodeid=4,memdev=mem4 \
|
||||
-numa node,nodeid=5,memdev=mem5
|
||||
:
|
||||
alternatives: patching kernel code
|
||||
BUG: arch topology borken
|
||||
the CLS domain not a subset of the MC domain
|
||||
<the above error log repeats>
|
||||
BUG: arch topology borken
|
||||
the DIE domain not a subset of the NODE domain
|
||||
|
||||
With current implementation of mc->get_default_cpu_node_id(),
|
||||
CPU#0 to CPU#5 are associated with NODE#0 to NODE#5 separately.
|
||||
That's incorrect because CPU#0/1/2 should be associated with same
|
||||
NUMA node because they're seated in same socket.
|
||||
|
||||
This fixes the issue by considering the socket ID when the default
|
||||
CPU-to-NUMA association is provided in virt_possible_cpu_arch_ids().
|
||||
With this applied, no more CPU topology broken warnings are seen
|
||||
from the Linux guest. The 6 CPUs are associated with NODE#0/1, but
|
||||
there are no CPUs associated with NODE#2/3/4/5.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
|
||||
Message-id: 20220503140304.855514-6-gshan@redhat.com
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit 4c18bc192386dfbca530e7f550e0992df657818a)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index a87c8d396a..95d012d6eb 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -2545,7 +2545,9 @@ virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
|
||||
|
||||
static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx)
|
||||
{
|
||||
- return idx % ms->numa_state->num_nodes;
|
||||
+ int64_t socket_id = ms->possible_cpus->cpus[idx].props.socket_id;
|
||||
+
|
||||
+ return socket_id % ms->numa_state->num_nodes;
|
||||
}
|
||||
|
||||
static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,56 +0,0 @@
|
||||
From e25c40735d2f022c07481b548d20476222006657 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Wed, 4 May 2022 11:11:54 +0200
|
||||
Subject: [PATCH 2/5] hw/arm/virt: Fix missing initialization in
|
||||
instance/class_init()
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 82: hw/arm/virt: Remove the dtb-kaslr-seed machine option
|
||||
RH-Commit: [2/2] 22cbbfc30cf57a09b8acfb25d8a4dff2754c630c (eauger1/centos-qemu-kvm)
|
||||
RH-Bugzilla: 2046029
|
||||
RH-Acked-by: Gavin Shan <gshan@redhat.com>
|
||||
RH-Acked-by: Andrew Jones <drjones@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2046029
|
||||
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45133161
|
||||
Upstream Status: RHEL-only
|
||||
Tested: Boot RHEL guest and check migration from 8.6 to 9.1
|
||||
(with custom additions)
|
||||
|
||||
During the 7.0 rebase, the initialization of highmem_mmio and
|
||||
highmem_redists was forgotten in rhel_virt_instance_init().
|
||||
Fix it to match virt_instance_init() code.
|
||||
|
||||
Also mc->smp_props.clusters_supported was missing in
|
||||
rhel_machine_class_init().
|
||||
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index bde4f77994..8be12e121d 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3286,6 +3286,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data)
|
||||
hc->unplug_request = virt_machine_device_unplug_request_cb;
|
||||
hc->unplug = virt_machine_device_unplug_cb;
|
||||
mc->nvdimm_supported = true;
|
||||
+ mc->smp_props.clusters_supported = true;
|
||||
mc->auto_enable_numa_with_memhp = true;
|
||||
mc->auto_enable_numa_with_memdev = true;
|
||||
mc->default_ram_id = "mach-virt.ram";
|
||||
@@ -3366,6 +3367,8 @@ static void rhel_virt_instance_init(Object *obj)
|
||||
vms->gic_version = VIRT_GIC_VERSION_NOSEL;
|
||||
|
||||
vms->highmem_ecam = !vmc->no_highmem_ecam;
|
||||
+ vms->highmem_mmio = true;
|
||||
+ vms->highmem_redists = true;
|
||||
|
||||
if (vmc->no_its) {
|
||||
vms->its = false;
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,112 @@
|
||||
From 1c7fad3776a14ca35b24dc2fdb262d4ddf40d6eb Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Wed, 21 Dec 2022 08:48:45 +0800
|
||||
Subject: [PATCH 5/8] hw/arm/virt: Improve high memory region address
|
||||
assignment
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
|
||||
RH-Bugzilla: 2113840
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [5/8] 4d77fa78b5258a1bd8d30405cec5ba3311d42f92
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
|
||||
|
||||
There are three high memory regions, which are VIRT_HIGH_REDIST2,
|
||||
VIRT_HIGH_PCIE_ECAM and VIRT_HIGH_PCIE_MMIO. Their base addresses
|
||||
are floating on highest RAM address. However, they can be disabled
|
||||
in several cases.
|
||||
|
||||
(1) One specific high memory region is likely to be disabled by
|
||||
code by toggling vms->highmem_{redists, ecam, mmio}.
|
||||
|
||||
(2) VIRT_HIGH_PCIE_ECAM region is disabled on machine, which is
|
||||
'virt-2.12' or ealier than it.
|
||||
|
||||
(3) VIRT_HIGH_PCIE_ECAM region is disabled when firmware is loaded
|
||||
on 32-bits system.
|
||||
|
||||
(4) One specific high memory region is disabled when it breaks the
|
||||
PA space limit.
|
||||
|
||||
The current implementation of virt_set_{memmap, high_memmap}() isn't
|
||||
optimized because the high memory region's PA space is always reserved,
|
||||
regardless of whatever the actual state in the corresponding
|
||||
vms->highmem_{redists, ecam, mmio} flag. In the code, 'base' and
|
||||
'vms->highest_gpa' are always increased for case (1), (2) and (3).
|
||||
It's unnecessary since the assigned PA space for the disabled high
|
||||
memory region won't be used afterwards.
|
||||
|
||||
Improve the address assignment for those three high memory region by
|
||||
skipping the address assignment for one specific high memory region if
|
||||
it has been disabled in case (1), (2) and (3). The memory layout may
|
||||
be changed after the improvement is applied, which leads to potential
|
||||
migration breakage. So 'vms->highmem_compact' is added to control if
|
||||
the improvement should be applied. For now, 'vms->highmem_compact' is
|
||||
set to false, meaning that we don't have memory layout change until it
|
||||
becomes configurable through property 'compact-highmem' in next patch.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Reviewed-by: Marc Zyngier <maz@kernel.org>
|
||||
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
Message-id: 20221029224307.138822-6-gshan@redhat.com
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit 4a4ff9edc6a8fdc76082af5b41b059217138c09b)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 15 ++++++++++-----
|
||||
include/hw/arm/virt.h | 1 +
|
||||
2 files changed, 11 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 6e3b9fc060..6896e0ca0f 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -1768,18 +1768,23 @@ static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
vms->memmap[i].size = region_size;
|
||||
|
||||
/*
|
||||
- * Check each device to see if they fit in the PA space,
|
||||
- * moving highest_gpa as we go.
|
||||
+ * Check each device to see if it fits in the PA space,
|
||||
+ * moving highest_gpa as we go. For compatibility, move
|
||||
+ * highest_gpa for disabled fitting devices as well, if
|
||||
+ * the compact layout has been disabled.
|
||||
*
|
||||
* For each device that doesn't fit, disable it.
|
||||
*/
|
||||
fits = (region_base + region_size) <= BIT_ULL(pa_bits);
|
||||
- if (fits) {
|
||||
- vms->highest_gpa = region_base + region_size - 1;
|
||||
+ *region_enabled &= fits;
|
||||
+ if (vms->highmem_compact && !*region_enabled) {
|
||||
+ continue;
|
||||
}
|
||||
|
||||
- *region_enabled &= fits;
|
||||
base = region_base + region_size;
|
||||
+ if (fits) {
|
||||
+ vms->highest_gpa = base - 1;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
|
||||
index 22b54ec510..15bd291311 100644
|
||||
--- a/include/hw/arm/virt.h
|
||||
+++ b/include/hw/arm/virt.h
|
||||
@@ -144,6 +144,7 @@ struct VirtMachineState {
|
||||
PFlashCFI01 *flash[2];
|
||||
bool secure;
|
||||
bool highmem;
|
||||
+ bool highmem_compact;
|
||||
bool highmem_ecam;
|
||||
bool highmem_mmio;
|
||||
bool highmem_redists;
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,82 @@
|
||||
From 305a369fd18f29914bf96cc181add532d435d8ed Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Wed, 21 Dec 2022 08:48:45 +0800
|
||||
Subject: [PATCH 3/8] hw/arm/virt: Introduce variable region_base in
|
||||
virt_set_high_memmap()
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
|
||||
RH-Bugzilla: 2113840
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [3/8] 15de90df217d680ccc858b679898b3993e1c050a
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
|
||||
|
||||
This introduces variable 'region_base' for the base address of the
|
||||
specific high memory region. It's the preparatory work to optimize
|
||||
high memory region address assignment.
|
||||
|
||||
No functional change intended.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Reviewed-by: Marc Zyngier <maz@kernel.org>
|
||||
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
Message-id: 20221029224307.138822-4-gshan@redhat.com
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit fa245799b9407fc7b561da185b3d889df5e16a88)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 12 ++++++------
|
||||
1 file changed, 6 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index ca098d40b8..ddcf7ee2f8 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -1739,15 +1739,15 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
|
||||
static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
hwaddr base, int pa_bits)
|
||||
{
|
||||
- hwaddr region_size;
|
||||
+ hwaddr region_base, region_size;
|
||||
bool fits;
|
||||
int i;
|
||||
|
||||
for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
|
||||
+ region_base = ROUND_UP(base, extended_memmap[i].size);
|
||||
region_size = extended_memmap[i].size;
|
||||
|
||||
- base = ROUND_UP(base, region_size);
|
||||
- vms->memmap[i].base = base;
|
||||
+ vms->memmap[i].base = region_base;
|
||||
vms->memmap[i].size = region_size;
|
||||
|
||||
/*
|
||||
@@ -1756,9 +1756,9 @@ static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
*
|
||||
* For each device that doesn't fit, disable it.
|
||||
*/
|
||||
- fits = (base + region_size) <= BIT_ULL(pa_bits);
|
||||
+ fits = (region_base + region_size) <= BIT_ULL(pa_bits);
|
||||
if (fits) {
|
||||
- vms->highest_gpa = base + region_size - 1;
|
||||
+ vms->highest_gpa = region_base + region_size - 1;
|
||||
}
|
||||
|
||||
switch (i) {
|
||||
@@ -1773,7 +1773,7 @@ static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
break;
|
||||
}
|
||||
|
||||
- base += region_size;
|
||||
+ base = region_base + region_size;
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,95 @@
|
||||
From a2ddd68c8365ec602db6b2a9cf83bb441ca701cc Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Wed, 21 Dec 2022 08:48:45 +0800
|
||||
Subject: [PATCH 4/8] hw/arm/virt: Introduce virt_get_high_memmap_enabled()
|
||||
helper
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
|
||||
RH-Bugzilla: 2113840
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [4/8] 65524de2fc106600bbaff641caa8c4f2f8027114
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
|
||||
|
||||
This introduces virt_get_high_memmap_enabled() helper, which returns
|
||||
the pointer to vms->highmem_{redists, ecam, mmio}. The pointer will
|
||||
be used in the subsequent patches.
|
||||
|
||||
No functional change intended.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Reviewed-by: Marc Zyngier <maz@kernel.org>
|
||||
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
Message-id: 20221029224307.138822-5-gshan@redhat.com
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit a5cb1350b19a5c2a58ab4edddf609ed429c13085)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 32 +++++++++++++++++++-------------
|
||||
1 file changed, 19 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index ddcf7ee2f8..6e3b9fc060 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -1736,14 +1736,31 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
|
||||
return arm_cpu_mp_affinity(idx, clustersz);
|
||||
}
|
||||
|
||||
+static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms,
|
||||
+ int index)
|
||||
+{
|
||||
+ bool *enabled_array[] = {
|
||||
+ &vms->highmem_redists,
|
||||
+ &vms->highmem_ecam,
|
||||
+ &vms->highmem_mmio,
|
||||
+ };
|
||||
+
|
||||
+ assert(ARRAY_SIZE(extended_memmap) - VIRT_LOWMEMMAP_LAST ==
|
||||
+ ARRAY_SIZE(enabled_array));
|
||||
+ assert(index - VIRT_LOWMEMMAP_LAST < ARRAY_SIZE(enabled_array));
|
||||
+
|
||||
+ return enabled_array[index - VIRT_LOWMEMMAP_LAST];
|
||||
+}
|
||||
+
|
||||
static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
hwaddr base, int pa_bits)
|
||||
{
|
||||
hwaddr region_base, region_size;
|
||||
- bool fits;
|
||||
+ bool *region_enabled, fits;
|
||||
int i;
|
||||
|
||||
for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
|
||||
+ region_enabled = virt_get_high_memmap_enabled(vms, i);
|
||||
region_base = ROUND_UP(base, extended_memmap[i].size);
|
||||
region_size = extended_memmap[i].size;
|
||||
|
||||
@@ -1761,18 +1778,7 @@ static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
vms->highest_gpa = region_base + region_size - 1;
|
||||
}
|
||||
|
||||
- switch (i) {
|
||||
- case VIRT_HIGH_GIC_REDIST2:
|
||||
- vms->highmem_redists &= fits;
|
||||
- break;
|
||||
- case VIRT_HIGH_PCIE_ECAM:
|
||||
- vms->highmem_ecam &= fits;
|
||||
- break;
|
||||
- case VIRT_HIGH_PCIE_MMIO:
|
||||
- vms->highmem_mmio &= fits;
|
||||
- break;
|
||||
- }
|
||||
-
|
||||
+ *region_enabled &= fits;
|
||||
base = region_base + region_size;
|
||||
}
|
||||
}
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,130 @@
|
||||
From 5dff87c5ea60054709021025c9513ec259433ce2 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Wed, 21 Dec 2022 08:48:45 +0800
|
||||
Subject: [PATCH 1/8] hw/arm/virt: Introduce virt_set_high_memmap() helper
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
|
||||
RH-Bugzilla: 2113840
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/8] 5f6ba5af7a2c21d8473c58e088ee99b11336c673
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
|
||||
|
||||
This introduces virt_set_high_memmap() helper. The logic of high
|
||||
memory region address assignment is moved to the helper. The intention
|
||||
is to make the subsequent optimization for high memory region address
|
||||
assignment easier.
|
||||
|
||||
No functional change intended.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Reviewed-by: Marc Zyngier <maz@kernel.org>
|
||||
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
Message-id: 20221029224307.138822-2-gshan@redhat.com
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit 4af6b6edece5ef273d29972d53547f823d2bc1c0)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 74 ++++++++++++++++++++++++++++-----------------------
|
||||
1 file changed, 41 insertions(+), 33 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index bf18838b87..bea5f54720 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -1736,6 +1736,46 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
|
||||
return arm_cpu_mp_affinity(idx, clustersz);
|
||||
}
|
||||
|
||||
+static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
+ hwaddr base, int pa_bits)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
|
||||
+ hwaddr size = extended_memmap[i].size;
|
||||
+ bool fits;
|
||||
+
|
||||
+ base = ROUND_UP(base, size);
|
||||
+ vms->memmap[i].base = base;
|
||||
+ vms->memmap[i].size = size;
|
||||
+
|
||||
+ /*
|
||||
+ * Check each device to see if they fit in the PA space,
|
||||
+ * moving highest_gpa as we go.
|
||||
+ *
|
||||
+ * For each device that doesn't fit, disable it.
|
||||
+ */
|
||||
+ fits = (base + size) <= BIT_ULL(pa_bits);
|
||||
+ if (fits) {
|
||||
+ vms->highest_gpa = base + size - 1;
|
||||
+ }
|
||||
+
|
||||
+ switch (i) {
|
||||
+ case VIRT_HIGH_GIC_REDIST2:
|
||||
+ vms->highmem_redists &= fits;
|
||||
+ break;
|
||||
+ case VIRT_HIGH_PCIE_ECAM:
|
||||
+ vms->highmem_ecam &= fits;
|
||||
+ break;
|
||||
+ case VIRT_HIGH_PCIE_MMIO:
|
||||
+ vms->highmem_mmio &= fits;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ base += size;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
|
||||
{
|
||||
MachineState *ms = MACHINE(vms);
|
||||
@@ -1791,39 +1831,7 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
|
||||
/* We know for sure that at least the memory fits in the PA space */
|
||||
vms->highest_gpa = memtop - 1;
|
||||
|
||||
- for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
|
||||
- hwaddr size = extended_memmap[i].size;
|
||||
- bool fits;
|
||||
-
|
||||
- base = ROUND_UP(base, size);
|
||||
- vms->memmap[i].base = base;
|
||||
- vms->memmap[i].size = size;
|
||||
-
|
||||
- /*
|
||||
- * Check each device to see if they fit in the PA space,
|
||||
- * moving highest_gpa as we go.
|
||||
- *
|
||||
- * For each device that doesn't fit, disable it.
|
||||
- */
|
||||
- fits = (base + size) <= BIT_ULL(pa_bits);
|
||||
- if (fits) {
|
||||
- vms->highest_gpa = base + size - 1;
|
||||
- }
|
||||
-
|
||||
- switch (i) {
|
||||
- case VIRT_HIGH_GIC_REDIST2:
|
||||
- vms->highmem_redists &= fits;
|
||||
- break;
|
||||
- case VIRT_HIGH_PCIE_ECAM:
|
||||
- vms->highmem_ecam &= fits;
|
||||
- break;
|
||||
- case VIRT_HIGH_PCIE_MMIO:
|
||||
- vms->highmem_mmio &= fits;
|
||||
- break;
|
||||
- }
|
||||
-
|
||||
- base += size;
|
||||
- }
|
||||
+ virt_set_high_memmap(vms, base, pa_bits);
|
||||
|
||||
if (device_memory_size > 0) {
|
||||
ms->device_memory = g_malloc0(sizeof(*ms->device_memory));
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,76 +0,0 @@
|
||||
From 69f771c3dc641431f3e98497cbd3832edb69284f Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Tue, 3 May 2022 08:56:52 +0200
|
||||
Subject: [PATCH 1/5] hw/arm/virt: Remove the dtb-kaslr-seed machine option
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 82: hw/arm/virt: Remove the dtb-kaslr-seed machine option
|
||||
RH-Commit: [1/2] a89dcd7f22e04ae39de99795d3f34cdd0b831bc0 (eauger1/centos-qemu-kvm)
|
||||
RH-Bugzilla: 2046029
|
||||
RH-Acked-by: Gavin Shan <gshan@redhat.com>
|
||||
RH-Acked-by: Andrew Jones <drjones@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2046029
|
||||
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45133161
|
||||
Upstream Status: RHEL-only
|
||||
Tested: Boot RHEL guest and check the option is not available
|
||||
|
||||
In RHEL we do not want to expose the dtb-kaslr-seed virt machine
|
||||
option. Indeed the default 'on' value matches our need as
|
||||
random data in the DTB does not cause any boot failure and we
|
||||
want to support KASLR for the guest.
|
||||
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
|
||||
---
|
||||
---
|
||||
hw/arm/virt.c | 11 +++--------
|
||||
1 file changed, 3 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index e06862d22a..bde4f77994 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -2350,6 +2350,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp)
|
||||
vms->its = value;
|
||||
}
|
||||
|
||||
+#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
static bool virt_get_dtb_kaslr_seed(Object *obj, Error **errp)
|
||||
{
|
||||
VirtMachineState *vms = VIRT_MACHINE(obj);
|
||||
@@ -2363,6 +2364,7 @@ static void virt_set_dtb_kaslr_seed(Object *obj, bool value, Error **errp)
|
||||
|
||||
vms->dtb_kaslr_seed = value;
|
||||
}
|
||||
+#endif /* disabled for RHEL */
|
||||
|
||||
static char *virt_get_oem_id(Object *obj, Error **errp)
|
||||
{
|
||||
@@ -3346,13 +3348,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data)
|
||||
"Override the default value of field OEM Table ID "
|
||||
"in ACPI table header."
|
||||
"The string may be up to 8 bytes in size");
|
||||
-
|
||||
- object_class_property_add_bool(oc, "dtb-kaslr-seed",
|
||||
- virt_get_dtb_kaslr_seed,
|
||||
- virt_set_dtb_kaslr_seed);
|
||||
- object_class_property_set_description(oc, "dtb-kaslr-seed",
|
||||
- "Set off to disable passing of kaslr-seed "
|
||||
- "dtb node to guest");
|
||||
}
|
||||
|
||||
static void rhel_virt_instance_init(Object *obj)
|
||||
@@ -3397,7 +3392,7 @@ static void rhel_virt_instance_init(Object *obj)
|
||||
/* MTE is disabled by default and non-configurable for RHEL */
|
||||
vms->mte = false;
|
||||
|
||||
- /* Supply a kaslr-seed by default */
|
||||
+ /* Supply a kaslr-seed by default and non-configurable for RHEL */
|
||||
vms->dtb_kaslr_seed = true;
|
||||
|
||||
vms->irqmap = a15irqmap;
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,83 @@
|
||||
From bd5b7edbf8f4425f4b4e0d49a00cbdd48d9c6f48 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Wed, 21 Dec 2022 08:48:45 +0800
|
||||
Subject: [PATCH 2/8] hw/arm/virt: Rename variable size to region_size in
|
||||
virt_set_high_memmap()
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
|
||||
RH-Bugzilla: 2113840
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/8] 1cadf1b00686cceb45821a58fdcb509bc5da335d
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
|
||||
|
||||
This renames variable 'size' to 'region_size' in virt_set_high_memmap().
|
||||
Its counterpart ('region_base') will be introduced in next patch.
|
||||
|
||||
No functional change intended.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Reviewed-by: Marc Zyngier <maz@kernel.org>
|
||||
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
Message-id: 20221029224307.138822-3-gshan@redhat.com
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit 370bea9d1c78796eec235ed6cb4310f489931a62)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 15 ++++++++-------
|
||||
1 file changed, 8 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index bea5f54720..ca098d40b8 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -1739,15 +1739,16 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
|
||||
static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
hwaddr base, int pa_bits)
|
||||
{
|
||||
+ hwaddr region_size;
|
||||
+ bool fits;
|
||||
int i;
|
||||
|
||||
for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
|
||||
- hwaddr size = extended_memmap[i].size;
|
||||
- bool fits;
|
||||
+ region_size = extended_memmap[i].size;
|
||||
|
||||
- base = ROUND_UP(base, size);
|
||||
+ base = ROUND_UP(base, region_size);
|
||||
vms->memmap[i].base = base;
|
||||
- vms->memmap[i].size = size;
|
||||
+ vms->memmap[i].size = region_size;
|
||||
|
||||
/*
|
||||
* Check each device to see if they fit in the PA space,
|
||||
@@ -1755,9 +1756,9 @@ static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
*
|
||||
* For each device that doesn't fit, disable it.
|
||||
*/
|
||||
- fits = (base + size) <= BIT_ULL(pa_bits);
|
||||
+ fits = (base + region_size) <= BIT_ULL(pa_bits);
|
||||
if (fits) {
|
||||
- vms->highest_gpa = base + size - 1;
|
||||
+ vms->highest_gpa = base + region_size - 1;
|
||||
}
|
||||
|
||||
switch (i) {
|
||||
@@ -1772,7 +1773,7 @@ static void virt_set_high_memmap(VirtMachineState *vms,
|
||||
break;
|
||||
}
|
||||
|
||||
- base += size;
|
||||
+ base += region_size;
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,96 +0,0 @@
|
||||
From 6ee4a8718dcce2d6da43ee200534b75baf1d7bbe Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
|
||||
Date: Thu, 18 Nov 2021 12:57:32 +0100
|
||||
Subject: [PATCH 16/17] hw/block/fdc: Prevent end-of-track overrun
|
||||
(CVE-2021-3507)
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 107: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507)
|
||||
RH-Commit: [1/2] 9ffc5290348884d20b894fa79f4d0c8089247f8b (mrezanin/centos-src-qemu-kvm)
|
||||
RH-Bugzilla: 1951522
|
||||
RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
|
||||
Per the 82078 datasheet, if the end-of-track (EOT byte in
|
||||
the FIFO) is more than the number of sectors per side, the
|
||||
command is terminated unsuccessfully:
|
||||
|
||||
* 5.2.5 DATA TRANSFER TERMINATION
|
||||
|
||||
The 82078 supports terminal count explicitly through
|
||||
the TC pin and implicitly through the underrun/over-
|
||||
run and end-of-track (EOT) functions. For full sector
|
||||
transfers, the EOT parameter can define the last
|
||||
sector to be transferred in a single or multisector
|
||||
transfer. If the last sector to be transferred is a par-
|
||||
tial sector, the host can stop transferring the data in
|
||||
mid-sector, and the 82078 will continue to complete
|
||||
the sector as if a hardware TC was received. The
|
||||
only difference between these implicit functions and
|
||||
TC is that they return "abnormal termination" result
|
||||
status. Such status indications can be ignored if they
|
||||
were expected.
|
||||
|
||||
* 6.1.3 READ TRACK
|
||||
|
||||
This command terminates when the EOT specified
|
||||
number of sectors have been read. If the 82078
|
||||
does not find an I D Address Mark on the diskette
|
||||
after the second· occurrence of a pulse on the
|
||||
INDX# pin, then it sets the IC code in Status Regis-
|
||||
ter 0 to "01" (Abnormal termination), sets the MA bit
|
||||
in Status Register 1 to "1", and terminates the com-
|
||||
mand.
|
||||
|
||||
* 6.1.6 VERIFY
|
||||
|
||||
Refer to Table 6-6 and Table 6-7 for information
|
||||
concerning the values of MT and EC versus SC and
|
||||
EOT value.
|
||||
|
||||
* Table 6·6. Result Phase Table
|
||||
|
||||
* Table 6-7. Verify Command Result Phase Table
|
||||
|
||||
Fix by aborting the transfer when EOT > # Sectors Per Side.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Cc: Hervé Poussineau <hpoussin@reactos.org>
|
||||
Fixes: baca51faff0 ("floppy driver: disk geometry auto detect")
|
||||
Reported-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/339
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
|
||||
Message-Id: <20211118115733.4038610-2-philmd@redhat.com>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit defac5e2fbddf8423a354ff0454283a2115e1367)
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/block/fdc.c | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
|
||||
index ca1776121f..6481ec0cfb 100644
|
||||
--- a/hw/block/fdc.c
|
||||
+++ b/hw/block/fdc.c
|
||||
@@ -1532,6 +1532,14 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction)
|
||||
int tmp;
|
||||
fdctrl->data_len = 128 << (fdctrl->fifo[5] > 7 ? 7 : fdctrl->fifo[5]);
|
||||
tmp = (fdctrl->fifo[6] - ks + 1);
|
||||
+ if (tmp < 0) {
|
||||
+ FLOPPY_DPRINTF("invalid EOT: %d\n", tmp);
|
||||
+ fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00);
|
||||
+ fdctrl->fifo[3] = kt;
|
||||
+ fdctrl->fifo[4] = kh;
|
||||
+ fdctrl->fifo[5] = ks;
|
||||
+ return;
|
||||
+ }
|
||||
if (fdctrl->fifo[0] & 0x80)
|
||||
tmp += fdctrl->fifo[6];
|
||||
fdctrl->data_len *= tmp;
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,59 @@
|
||||
From 8b0c5c6d356fd6cce9092727e20097b70e07bba9 Mon Sep 17 00:00:00 2001
|
||||
From: Julia Suvorova <jusual@redhat.com>
|
||||
Date: Thu, 23 Feb 2023 13:57:47 +0100
|
||||
Subject: [PATCH] hw/smbios: fix field corruption in type 4 table
|
||||
|
||||
RH-Author: Julia Suvorova <None>
|
||||
RH-MergeRequest: 156: hw/smbios: fix field corruption in type 4 table
|
||||
RH-Bugzilla: 2169904
|
||||
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Acked-by: Ani Sinha <None>
|
||||
RH-Commit: [1/1] ee6d9bb6dfa0fb2625915947072cb91a0926c4ec
|
||||
|
||||
Since table type 4 of SMBIOS version 2.6 is shorter than 3.0, the
|
||||
strings which follow immediately after the struct fields have been
|
||||
overwritten by unconditional filling of later fields such as core_count2.
|
||||
Make these fields dependent on the SMBIOS version.
|
||||
|
||||
Fixes: 05e27d74c7 ("hw/smbios: add core_count2 to smbios table type 4")
|
||||
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2169904
|
||||
|
||||
Signed-off-by: Julia Suvorova <jusual@redhat.com>
|
||||
Message-Id: <20230223125747.254914-1-jusual@redhat.com>
|
||||
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Reviewed-by: Ani Sinha <ani@anisinha.ca>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit 60d09b8dc7dd4256d664ad680795cb1327805b2b)
|
||||
---
|
||||
hw/smbios/smbios.c | 8 +++++---
|
||||
1 file changed, 5 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
|
||||
index c5ad69237e..2d2ece3edb 100644
|
||||
--- a/hw/smbios/smbios.c
|
||||
+++ b/hw/smbios/smbios.c
|
||||
@@ -752,14 +752,16 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance)
|
||||
t->core_count = (ms->smp.cores > 255) ? 0xFF : ms->smp.cores;
|
||||
t->core_enabled = t->core_count;
|
||||
|
||||
- t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores);
|
||||
-
|
||||
t->thread_count = (ms->smp.threads > 255) ? 0xFF : ms->smp.threads;
|
||||
- t->thread_count2 = cpu_to_le16(ms->smp.threads);
|
||||
|
||||
t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */
|
||||
t->processor_family2 = cpu_to_le16(0x01); /* Other */
|
||||
|
||||
+ if (tbl_len == SMBIOS_TYPE_4_LEN_V30) {
|
||||
+ t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores);
|
||||
+ t->thread_count2 = cpu_to_le16(ms->smp.threads);
|
||||
+ }
|
||||
+
|
||||
SMBIOS_BUILD_TABLE_POST;
|
||||
smbios_type4_count++;
|
||||
}
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,95 +0,0 @@
|
||||
From 4dad0e9abbc843fba4e5fee6e7aa1b0db13f5898 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= <eperezma@redhat.com>
|
||||
Date: Thu, 21 Jul 2022 15:27:35 +0200
|
||||
Subject: [PATCH 03/32] hw/virtio: Replace g_memdup() by g_memdup2()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eugenio Pérez <eperezma@redhat.com>
|
||||
RH-MergeRequest: 108: Net Control Virtqueue shadow Support
|
||||
RH-Commit: [3/27] ae196903eb1a7aebbf999100e997cf82e5024cb6 (eperezmartin/qemu-kvm)
|
||||
RH-Bugzilla: 1939363
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Acked-by: Cindy Lu <lulu@redhat.com>
|
||||
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/1939363
|
||||
|
||||
Upstream Status: git://git.qemu.org/qemu.git
|
||||
|
||||
commit d792199de55ca5cb5334016884039c740290b5c7
|
||||
Author: Philippe Mathieu-Daudé <f4bug@amsat.org>
|
||||
Date: Thu May 12 19:57:46 2022 +0200
|
||||
|
||||
hw/virtio: Replace g_memdup() by g_memdup2()
|
||||
|
||||
Per https://discourse.gnome.org/t/port-your-module-from-g-memdup-to-g-memdup2-now/5538
|
||||
|
||||
The old API took the size of the memory to duplicate as a guint,
|
||||
whereas most memory functions take memory sizes as a gsize. This
|
||||
made it easy to accidentally pass a gsize to g_memdup(). For large
|
||||
values, that would lead to a silent truncation of the size from 64
|
||||
to 32 bits, and result in a heap area being returned which is
|
||||
significantly smaller than what the caller expects. This can likely
|
||||
be exploited in various modules to cause a heap buffer overflow.
|
||||
|
||||
Replace g_memdup() by the safer g_memdup2() wrapper.
|
||||
|
||||
Acked-by: Jason Wang <jasowang@redhat.com>
|
||||
Acked-by: Eugenio Pérez <eperezma@redhat.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
|
||||
Message-Id: <20220512175747.142058-6-eperezma@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
|
||||
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
|
||||
---
|
||||
hw/net/virtio-net.c | 3 ++-
|
||||
hw/virtio/virtio-crypto.c | 6 +++---
|
||||
2 files changed, 5 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
|
||||
index 099e65036d..633de61513 100644
|
||||
--- a/hw/net/virtio-net.c
|
||||
+++ b/hw/net/virtio-net.c
|
||||
@@ -1458,7 +1458,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
|
||||
}
|
||||
|
||||
iov_cnt = elem->out_num;
|
||||
- iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
|
||||
+ iov2 = iov = g_memdup2(elem->out_sg,
|
||||
+ sizeof(struct iovec) * elem->out_num);
|
||||
s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
|
||||
iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
|
||||
if (s != sizeof(ctrl)) {
|
||||
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
|
||||
index dcd80b904d..0e31e3cc04 100644
|
||||
--- a/hw/virtio/virtio-crypto.c
|
||||
+++ b/hw/virtio/virtio-crypto.c
|
||||
@@ -242,7 +242,7 @@ static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
|
||||
}
|
||||
|
||||
out_num = elem->out_num;
|
||||
- out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num);
|
||||
+ out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num);
|
||||
out_iov = out_iov_copy;
|
||||
|
||||
in_num = elem->in_num;
|
||||
@@ -605,11 +605,11 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request)
|
||||
}
|
||||
|
||||
out_num = elem->out_num;
|
||||
- out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num);
|
||||
+ out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num);
|
||||
out_iov = out_iov_copy;
|
||||
|
||||
in_num = elem->in_num;
|
||||
- in_iov_copy = g_memdup(elem->in_sg, sizeof(in_iov[0]) * in_num);
|
||||
+ in_iov_copy = g_memdup2(elem->in_sg, sizeof(in_iov[0]) * in_num);
|
||||
in_iov = in_iov_copy;
|
||||
|
||||
if (unlikely(iov_to_buf(out_iov, out_num, 0, &req, sizeof(req))
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,68 +0,0 @@
|
||||
From 8452a7925e18d6d57e2ac787b192097d4136b104 Mon Sep 17 00:00:00 2001
|
||||
From: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
Date: Thu, 18 Aug 2022 17:01:13 +0200
|
||||
Subject: [PATCH 2/2] i386: do kvm_put_msr_feature_control() first thing when
|
||||
vCPU is reset
|
||||
|
||||
RH-Author: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-MergeRequest: 217: i386: fix 'system_reset' when the VM is in VMX root operation
|
||||
RH-Bugzilla: 2117546
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
RH-Commit: [2/2] 08d5992691ba70561ce0a5b7f4504618f96a2ee6
|
||||
|
||||
kvm_put_sregs2() fails to reset 'locked' CR4/CR0 bits upon vCPU reset when
|
||||
it is in VMX root operation. Do kvm_put_msr_feature_control() before
|
||||
kvm_put_sregs2() to (possibly) kick vCPU out of VMX root operation. It also
|
||||
seems logical to do kvm_put_msr_feature_control() before
|
||||
kvm_put_nested_state() and not after it, especially when 'real' nested
|
||||
state is set.
|
||||
|
||||
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
Message-Id: <20220818150113.479917-3-vkuznets@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 45ed68a1a3a19754ade954d75a3c9d13ff560e5c)
|
||||
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
---
|
||||
target/i386/kvm/kvm.c | 17 ++++++++++++-----
|
||||
1 file changed, 12 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
|
||||
index 9feb98fe0b..ef70e2c85f 100644
|
||||
--- a/target/i386/kvm/kvm.c
|
||||
+++ b/target/i386/kvm/kvm.c
|
||||
@@ -4356,6 +4356,18 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
|
||||
|
||||
assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
|
||||
|
||||
+ /*
|
||||
+ * Put MSR_IA32_FEATURE_CONTROL first, this ensures the VM gets out of VMX
|
||||
+ * root operation upon vCPU reset. kvm_put_msr_feature_control() should also
|
||||
+ * preceed kvm_put_nested_state() when 'real' nested state is set.
|
||||
+ */
|
||||
+ if (level >= KVM_PUT_RESET_STATE) {
|
||||
+ ret = kvm_put_msr_feature_control(x86_cpu);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
/* must be before kvm_put_nested_state so that EFER.SVME is set */
|
||||
ret = has_sregs2 ? kvm_put_sregs2(x86_cpu) : kvm_put_sregs(x86_cpu);
|
||||
if (ret < 0) {
|
||||
@@ -4367,11 +4379,6 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
-
|
||||
- ret = kvm_put_msr_feature_control(x86_cpu);
|
||||
- if (ret < 0) {
|
||||
- return ret;
|
||||
- }
|
||||
}
|
||||
|
||||
if (level == KVM_PUT_FULL_STATE) {
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,95 +0,0 @@
|
||||
From b84bb71165c97b475548edc1c07decccca53cf16 Mon Sep 17 00:00:00 2001
|
||||
From: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
Date: Thu, 18 Aug 2022 17:01:12 +0200
|
||||
Subject: [PATCH 1/2] i386: reset KVM nested state upon CPU reset
|
||||
|
||||
RH-Author: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-MergeRequest: 217: i386: fix 'system_reset' when the VM is in VMX root operation
|
||||
RH-Bugzilla: 2117546
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
RH-Commit: [1/2] b329f053a027761f50187e4ca7fd6b50ac32d2ad
|
||||
|
||||
Make sure env->nested_state is cleaned up when a vCPU is reset, it may
|
||||
be stale after an incoming migration, kvm_arch_put_registers() may
|
||||
end up failing or putting vCPU in a weird state.
|
||||
|
||||
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
|
||||
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
Message-Id: <20220818150113.479917-2-vkuznets@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 3cafdb67504a34a0305260f0c86a73d5a3fb000b)
|
||||
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
---
|
||||
target/i386/kvm/kvm.c | 37 +++++++++++++++++++++++++++----------
|
||||
1 file changed, 27 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
|
||||
index 6d1e009443..9feb98fe0b 100644
|
||||
--- a/target/i386/kvm/kvm.c
|
||||
+++ b/target/i386/kvm/kvm.c
|
||||
@@ -1617,6 +1617,30 @@ static void kvm_init_xsave(CPUX86State *env)
|
||||
env->xsave_buf_len);
|
||||
}
|
||||
|
||||
+static void kvm_init_nested_state(CPUX86State *env)
|
||||
+{
|
||||
+ struct kvm_vmx_nested_state_hdr *vmx_hdr;
|
||||
+ uint32_t size;
|
||||
+
|
||||
+ if (!env->nested_state) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ size = env->nested_state->size;
|
||||
+
|
||||
+ memset(env->nested_state, 0, size);
|
||||
+ env->nested_state->size = size;
|
||||
+
|
||||
+ if (cpu_has_vmx(env)) {
|
||||
+ env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX;
|
||||
+ vmx_hdr = &env->nested_state->hdr.vmx;
|
||||
+ vmx_hdr->vmxon_pa = -1ull;
|
||||
+ vmx_hdr->vmcs12_pa = -1ull;
|
||||
+ } else if (cpu_has_svm(env)) {
|
||||
+ env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
int kvm_arch_init_vcpu(CPUState *cs)
|
||||
{
|
||||
struct {
|
||||
@@ -2044,19 +2068,10 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data));
|
||||
|
||||
if (cpu_has_vmx(env) || cpu_has_svm(env)) {
|
||||
- struct kvm_vmx_nested_state_hdr *vmx_hdr;
|
||||
-
|
||||
env->nested_state = g_malloc0(max_nested_state_len);
|
||||
env->nested_state->size = max_nested_state_len;
|
||||
|
||||
- if (cpu_has_vmx(env)) {
|
||||
- env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX;
|
||||
- vmx_hdr = &env->nested_state->hdr.vmx;
|
||||
- vmx_hdr->vmxon_pa = -1ull;
|
||||
- vmx_hdr->vmcs12_pa = -1ull;
|
||||
- } else {
|
||||
- env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM;
|
||||
- }
|
||||
+ kvm_init_nested_state(env);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2121,6 +2136,8 @@ void kvm_arch_reset_vcpu(X86CPU *cpu)
|
||||
/* enabled by default */
|
||||
env->poll_control_msr = 1;
|
||||
|
||||
+ kvm_init_nested_state(env);
|
||||
+
|
||||
sev_es_set_reset_vector(CPU(cpu));
|
||||
}
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,64 @@
|
||||
From cadcc1c6a001622d971c86d44925516905e3d104 Mon Sep 17 00:00:00 2001
|
||||
From: Jason Wang <jasowang@redhat.com>
|
||||
Date: Thu, 23 Feb 2023 14:59:21 +0800
|
||||
Subject: [PATCH 8/8] intel-iommu: fail DEVIOTLB_UNMAP without dt mode
|
||||
|
||||
RH-Author: Laurent Vivier <lvivier@redhat.com>
|
||||
RH-MergeRequest: 157: intel-iommu: fail DEVIOTLB_UNMAP without dt mode
|
||||
RH-Bugzilla: 2156876
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Commit: [1/1] eb9dbae6140ef4ba10d90b9e66abd75540f6892d (lvivier/qemu-kvm-centos)
|
||||
|
||||
Without dt mode, device IOTLB notifier won't work since guest won't
|
||||
send device IOTLB invalidation descriptor in this case. Let's fail
|
||||
early instead of misbehaving silently.
|
||||
|
||||
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
|
||||
Tested-by: Laurent Vivier <lvivier@redhat.com>
|
||||
Tested-by: Viktor Prutyanov <viktor@daynix.com>
|
||||
Buglink: https://bugzilla.redhat.com/2156876
|
||||
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
||||
Message-Id: <20230223065924.42503-3-jasowang@redhat.com>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit 09adb0e021207b60a0c51a68939b4539d98d3ef3)
|
||||
|
||||
Conflict in hw/i386/intel_iommu.c because of missing commit:
|
||||
|
||||
4ce27463ccce ("intel-iommu: fail MAP notifier without caching mode")
|
||||
---
|
||||
hw/i386/intel_iommu.c | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
|
||||
index a08ee85edf..d2983f40d3 100644
|
||||
--- a/hw/i386/intel_iommu.c
|
||||
+++ b/hw/i386/intel_iommu.c
|
||||
@@ -3179,6 +3179,7 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
|
||||
{
|
||||
VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
|
||||
IntelIOMMUState *s = vtd_as->iommu_state;
|
||||
+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
|
||||
|
||||
/* TODO: add support for VFIO and vhost users */
|
||||
if (s->snoop_control) {
|
||||
@@ -3186,6 +3187,13 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
|
||||
"Snoop Control with vhost or VFIO is not supported");
|
||||
return -ENOTSUP;
|
||||
}
|
||||
+ if (!x86_iommu->dt_supported && (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP)) {
|
||||
+ error_setg_errno(errp, ENOTSUP,
|
||||
+ "device %02x.%02x.%x requires device IOTLB mode",
|
||||
+ pci_bus_num(vtd_as->bus), PCI_SLOT(vtd_as->devfn),
|
||||
+ PCI_FUNC(vtd_as->devfn));
|
||||
+ return -ENOTSUP;
|
||||
+ }
|
||||
|
||||
/* Update per-address-space notifier flags */
|
||||
vtd_as->notifier_flags = new;
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,386 @@
|
||||
From 3a29b50036b972caae5bca0e5dfc34d910b1d5e9 Mon Sep 17 00:00:00 2001
|
||||
From: "manish.mishra" <manish.mishra@nutanix.com>
|
||||
Date: Tue, 20 Dec 2022 18:44:17 +0000
|
||||
Subject: [PATCH 6/8] io: Add support for MSG_PEEK for socket channel
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Peter Xu <peterx@redhat.com>
|
||||
RH-MergeRequest: 150: migration: Fix multifd crash on channel disorders
|
||||
RH-Bugzilla: 2169732
|
||||
RH-Acked-by: quintela1 <quintela@redhat.com>
|
||||
RH-Acked-by: Leonardo Brás <leobras@redhat.com>
|
||||
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
RH-Commit: [1/2] 266563f3e387e97ec710d9bc179e5de26dfd09f1 (peterx/qemu-kvm)
|
||||
|
||||
MSG_PEEK peeks at the channel, The data is treated as unread and
|
||||
the next read shall still return this data. This support is
|
||||
currently added only for socket class. Extra parameter 'flags'
|
||||
is added to io_readv calls to pass extra read flags like MSG_PEEK.
|
||||
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
|
||||
Reviewed-by: Juan Quintela <quintela@redhat.com>
|
||||
Suggested-by: Daniel P. Berrange <berrange@redhat.com>
|
||||
Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
|
||||
Signed-off-by: Juan Quintela <quintela@redhat.com>
|
||||
(cherry picked from commit 84615a19ddf2bfb38d7b3a0d487d2397ee55e4f3)
|
||||
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||
---
|
||||
chardev/char-socket.c | 4 ++--
|
||||
include/io/channel.h | 6 ++++++
|
||||
io/channel-buffer.c | 1 +
|
||||
io/channel-command.c | 1 +
|
||||
io/channel-file.c | 1 +
|
||||
io/channel-null.c | 1 +
|
||||
io/channel-socket.c | 19 ++++++++++++++++++-
|
||||
io/channel-tls.c | 1 +
|
||||
io/channel-websock.c | 1 +
|
||||
io/channel.c | 16 ++++++++++++----
|
||||
migration/channel-block.c | 1 +
|
||||
migration/rdma.c | 1 +
|
||||
scsi/qemu-pr-helper.c | 2 +-
|
||||
tests/qtest/tpm-emu.c | 2 +-
|
||||
tests/unit/test-io-channel-socket.c | 1 +
|
||||
util/vhost-user-server.c | 2 +-
|
||||
16 files changed, 50 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
|
||||
index 879564aa8a..5afce9a464 100644
|
||||
--- a/chardev/char-socket.c
|
||||
+++ b/chardev/char-socket.c
|
||||
@@ -283,11 +283,11 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len)
|
||||
if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
|
||||
ret = qio_channel_readv_full(s->ioc, &iov, 1,
|
||||
&msgfds, &msgfds_num,
|
||||
- NULL);
|
||||
+ 0, NULL);
|
||||
} else {
|
||||
ret = qio_channel_readv_full(s->ioc, &iov, 1,
|
||||
NULL, NULL,
|
||||
- NULL);
|
||||
+ 0, NULL);
|
||||
}
|
||||
|
||||
if (msgfds_num) {
|
||||
diff --git a/include/io/channel.h b/include/io/channel.h
|
||||
index c680ee7480..716235d496 100644
|
||||
--- a/include/io/channel.h
|
||||
+++ b/include/io/channel.h
|
||||
@@ -34,6 +34,8 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
|
||||
|
||||
#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
|
||||
|
||||
+#define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1
|
||||
+
|
||||
typedef enum QIOChannelFeature QIOChannelFeature;
|
||||
|
||||
enum QIOChannelFeature {
|
||||
@@ -41,6 +43,7 @@ enum QIOChannelFeature {
|
||||
QIO_CHANNEL_FEATURE_SHUTDOWN,
|
||||
QIO_CHANNEL_FEATURE_LISTEN,
|
||||
QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY,
|
||||
+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK,
|
||||
};
|
||||
|
||||
|
||||
@@ -114,6 +117,7 @@ struct QIOChannelClass {
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp);
|
||||
int (*io_close)(QIOChannel *ioc,
|
||||
Error **errp);
|
||||
@@ -188,6 +192,7 @@ void qio_channel_set_name(QIOChannel *ioc,
|
||||
* @niov: the length of the @iov array
|
||||
* @fds: pointer to an array that will received file handles
|
||||
* @nfds: pointer filled with number of elements in @fds on return
|
||||
+ * @flags: read flags (QIO_CHANNEL_READ_FLAG_*)
|
||||
* @errp: pointer to a NULL-initialized error object
|
||||
*
|
||||
* Read data from the IO channel, storing it in the
|
||||
@@ -224,6 +229,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp);
|
||||
|
||||
|
||||
diff --git a/io/channel-buffer.c b/io/channel-buffer.c
|
||||
index bf52011be2..8096180f85 100644
|
||||
--- a/io/channel-buffer.c
|
||||
+++ b/io/channel-buffer.c
|
||||
@@ -54,6 +54,7 @@ static ssize_t qio_channel_buffer_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc);
|
||||
diff --git a/io/channel-command.c b/io/channel-command.c
|
||||
index 74516252ba..e7edd091af 100644
|
||||
--- a/io/channel-command.c
|
||||
+++ b/io/channel-command.c
|
||||
@@ -203,6 +203,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
|
||||
diff --git a/io/channel-file.c b/io/channel-file.c
|
||||
index b67687c2aa..d76663e6ae 100644
|
||||
--- a/io/channel-file.c
|
||||
+++ b/io/channel-file.c
|
||||
@@ -86,6 +86,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
|
||||
diff --git a/io/channel-null.c b/io/channel-null.c
|
||||
index 75e3781507..4fafdb770d 100644
|
||||
--- a/io/channel-null.c
|
||||
+++ b/io/channel-null.c
|
||||
@@ -60,6 +60,7 @@ qio_channel_null_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds G_GNUC_UNUSED,
|
||||
size_t *nfds G_GNUC_UNUSED,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc);
|
||||
diff --git a/io/channel-socket.c b/io/channel-socket.c
|
||||
index b76dca9cc1..7aca84f61a 100644
|
||||
--- a/io/channel-socket.c
|
||||
+++ b/io/channel-socket.c
|
||||
@@ -173,6 +173,9 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc,
|
||||
}
|
||||
#endif
|
||||
|
||||
+ qio_channel_set_feature(QIO_CHANNEL(ioc),
|
||||
+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -406,6 +409,9 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
|
||||
}
|
||||
#endif /* WIN32 */
|
||||
|
||||
+ qio_channel_set_feature(QIO_CHANNEL(cioc),
|
||||
+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
|
||||
+
|
||||
trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd);
|
||||
return cioc;
|
||||
|
||||
@@ -496,6 +502,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
|
||||
@@ -517,6 +524,10 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
|
||||
|
||||
}
|
||||
|
||||
+ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
|
||||
+ sflags |= MSG_PEEK;
|
||||
+ }
|
||||
+
|
||||
retry:
|
||||
ret = recvmsg(sioc->fd, &msg, sflags);
|
||||
if (ret < 0) {
|
||||
@@ -624,11 +635,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
|
||||
ssize_t done = 0;
|
||||
ssize_t i;
|
||||
+ int sflags = 0;
|
||||
+
|
||||
+ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
|
||||
+ sflags |= MSG_PEEK;
|
||||
+ }
|
||||
|
||||
for (i = 0; i < niov; i++) {
|
||||
ssize_t ret;
|
||||
@@ -636,7 +653,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
|
||||
ret = recv(sioc->fd,
|
||||
iov[i].iov_base,
|
||||
iov[i].iov_len,
|
||||
- 0);
|
||||
+ sflags);
|
||||
if (ret < 0) {
|
||||
if (errno == EAGAIN) {
|
||||
if (done) {
|
||||
diff --git a/io/channel-tls.c b/io/channel-tls.c
|
||||
index 4ce890a538..c730cb8ec5 100644
|
||||
--- a/io/channel-tls.c
|
||||
+++ b/io/channel-tls.c
|
||||
@@ -260,6 +260,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
|
||||
diff --git a/io/channel-websock.c b/io/channel-websock.c
|
||||
index fb4932ade7..a12acc27cf 100644
|
||||
--- a/io/channel-websock.c
|
||||
+++ b/io/channel-websock.c
|
||||
@@ -1081,6 +1081,7 @@ static ssize_t qio_channel_websock_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc);
|
||||
diff --git a/io/channel.c b/io/channel.c
|
||||
index 0640941ac5..a8c7f11649 100644
|
||||
--- a/io/channel.c
|
||||
+++ b/io/channel.c
|
||||
@@ -52,6 +52,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
|
||||
@@ -63,7 +64,14 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
|
||||
return -1;
|
||||
}
|
||||
|
||||
- return klass->io_readv(ioc, iov, niov, fds, nfds, errp);
|
||||
+ if ((flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) &&
|
||||
+ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
|
||||
+ error_setg_errno(errp, EINVAL,
|
||||
+ "Channel does not support peek read");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return klass->io_readv(ioc, iov, niov, fds, nfds, flags, errp);
|
||||
}
|
||||
|
||||
|
||||
@@ -146,7 +154,7 @@ int qio_channel_readv_full_all_eof(QIOChannel *ioc,
|
||||
while ((nlocal_iov > 0) || local_fds) {
|
||||
ssize_t len;
|
||||
len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds,
|
||||
- local_nfds, errp);
|
||||
+ local_nfds, 0, errp);
|
||||
if (len == QIO_CHANNEL_ERR_BLOCK) {
|
||||
if (qemu_in_coroutine()) {
|
||||
qio_channel_yield(ioc, G_IO_IN);
|
||||
@@ -284,7 +292,7 @@ ssize_t qio_channel_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
Error **errp)
|
||||
{
|
||||
- return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp);
|
||||
+ return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, 0, errp);
|
||||
}
|
||||
|
||||
|
||||
@@ -303,7 +311,7 @@ ssize_t qio_channel_read(QIOChannel *ioc,
|
||||
Error **errp)
|
||||
{
|
||||
struct iovec iov = { .iov_base = buf, .iov_len = buflen };
|
||||
- return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp);
|
||||
+ return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, 0, errp);
|
||||
}
|
||||
|
||||
|
||||
diff --git a/migration/channel-block.c b/migration/channel-block.c
|
||||
index f4ab53acdb..b7374363c3 100644
|
||||
--- a/migration/channel-block.c
|
||||
+++ b/migration/channel-block.c
|
||||
@@ -53,6 +53,7 @@ qio_channel_block_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelBlock *bioc = QIO_CHANNEL_BLOCK(ioc);
|
||||
diff --git a/migration/rdma.c b/migration/rdma.c
|
||||
index 94a55dd95b..d8b4632094 100644
|
||||
--- a/migration/rdma.c
|
||||
+++ b/migration/rdma.c
|
||||
@@ -2854,6 +2854,7 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
|
||||
diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
|
||||
index 196b78c00d..199227a556 100644
|
||||
--- a/scsi/qemu-pr-helper.c
|
||||
+++ b/scsi/qemu-pr-helper.c
|
||||
@@ -614,7 +614,7 @@ static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz,
|
||||
iov.iov_base = buf;
|
||||
iov.iov_len = sz;
|
||||
n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1,
|
||||
- &fds, &nfds, errp);
|
||||
+ &fds, &nfds, 0, errp);
|
||||
|
||||
if (n_read == QIO_CHANNEL_ERR_BLOCK) {
|
||||
qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN);
|
||||
diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c
|
||||
index 2994d1cf42..3cf1acaf7d 100644
|
||||
--- a/tests/qtest/tpm-emu.c
|
||||
+++ b/tests/qtest/tpm-emu.c
|
||||
@@ -106,7 +106,7 @@ void *tpm_emu_ctrl_thread(void *data)
|
||||
int *pfd = NULL;
|
||||
size_t nfd = 0;
|
||||
|
||||
- qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, &error_abort);
|
||||
+ qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, 0, &error_abort);
|
||||
cmd = be32_to_cpu(cmd);
|
||||
g_assert_cmpint(cmd, ==, CMD_SET_DATAFD);
|
||||
g_assert_cmpint(nfd, ==, 1);
|
||||
diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c
|
||||
index b36a5d972a..b964bb202d 100644
|
||||
--- a/tests/unit/test-io-channel-socket.c
|
||||
+++ b/tests/unit/test-io-channel-socket.c
|
||||
@@ -460,6 +460,7 @@ static void test_io_channel_unix_fd_pass(void)
|
||||
G_N_ELEMENTS(iorecv),
|
||||
&fdrecv,
|
||||
&nfdrecv,
|
||||
+ 0,
|
||||
&error_abort);
|
||||
|
||||
g_assert(nfdrecv == G_N_ELEMENTS(fdsend));
|
||||
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
|
||||
index 232984ace6..145eb17c08 100644
|
||||
--- a/util/vhost-user-server.c
|
||||
+++ b/util/vhost-user-server.c
|
||||
@@ -116,7 +116,7 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
|
||||
* qio_channel_readv_full may have short reads, keeping calling it
|
||||
* until getting VHOST_USER_HDR_SIZE or 0 bytes in total
|
||||
*/
|
||||
- rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err);
|
||||
+ rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err);
|
||||
if (rc < 0) {
|
||||
if (rc == QIO_CHANNEL_ERR_BLOCK) {
|
||||
assert(local_err == NULL);
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,99 @@
|
||||
From 6727e92a97f8ee9f367a41111bef3f5cad4a479a Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Mon, 20 Jun 2022 18:27:02 +0200
|
||||
Subject: [PATCH 15/20] iotests/106, 214, 308: Read only one size line
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
|
||||
RH-Bugzilla: 1860292
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [10/12] 1554e0a92b92ed101a251478ccae43f45f6e071e (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
These tests read size information (sometimes disk size, sometimes
|
||||
virtual size) from qemu-img info's output. Once qemu-img starts
|
||||
printing info about child nodes, we are going to see multiple instances
|
||||
of that per image, but these tests are only interested in the first one,
|
||||
so use "head -n 1" to get it.
|
||||
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20220620162704.80987-11-hreitz@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 74163adda3101b127943f7cbbf8fcccd2d472426)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/106 | 4 ++--
|
||||
tests/qemu-iotests/214 | 6 ++++--
|
||||
tests/qemu-iotests/308 | 4 ++--
|
||||
3 files changed, 8 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/tests/qemu-iotests/106 b/tests/qemu-iotests/106
|
||||
index 9d6adb542d..ae0fc46691 100755
|
||||
--- a/tests/qemu-iotests/106
|
||||
+++ b/tests/qemu-iotests/106
|
||||
@@ -66,7 +66,7 @@ for create_mode in off falloc full; do
|
||||
expected_size=$((expected_size + $GROWTH_SIZE))
|
||||
fi
|
||||
|
||||
- actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size')
|
||||
+ actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size' | head -n 1)
|
||||
actual_size=$(echo "$actual_size" | sed -e 's/^[^0-9]*\([0-9]\+\).*$/\1/')
|
||||
|
||||
# The actual size may exceed the expected size, depending on the file
|
||||
@@ -105,7 +105,7 @@ for growth_mode in falloc full; do
|
||||
_make_test_img -o "extent_size_hint=0" 2G
|
||||
$QEMU_IMG resize -f "$IMGFMT" --preallocation=$growth_mode "$TEST_IMG" +${GROWTH_SIZE}K
|
||||
|
||||
- actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size')
|
||||
+ actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size' | head -n 1)
|
||||
actual_size=$(echo "$actual_size" | sed -e 's/^[^0-9]*\([0-9]\+\).*$/\1/')
|
||||
|
||||
if [ $actual_size -lt $GROWTH_SIZE ]; then
|
||||
diff --git a/tests/qemu-iotests/214 b/tests/qemu-iotests/214
|
||||
index c66e246ba2..55ffcd7f44 100755
|
||||
--- a/tests/qemu-iotests/214
|
||||
+++ b/tests/qemu-iotests/214
|
||||
@@ -102,7 +102,8 @@ let data_size="8 * $cluster_size"
|
||||
$QEMU_IO -c "write -P 0xaa 0 $data_size" "$TEST_IMG" \
|
||||
2>&1 | _filter_qemu_io | _filter_testdir
|
||||
sizeA=$($QEMU_IMG info --output=json "$TEST_IMG" |
|
||||
- sed -n '/"actual-size":/ s/[^0-9]//gp')
|
||||
+ sed -n '/"actual-size":/ s/[^0-9]//gp' |
|
||||
+ head -n 1)
|
||||
|
||||
_make_test_img 2M -o cluster_size=$cluster_size
|
||||
echo "Write compressed data:"
|
||||
@@ -124,7 +125,8 @@ $QEMU_IO -c "write -P 0xcc $offset $data_size" "json:{\
|
||||
_filter_qemu_io | _filter_testdir
|
||||
|
||||
sizeB=$($QEMU_IMG info --output=json "$TEST_IMG" |
|
||||
- sed -n '/"actual-size":/ s/[^0-9]//gp')
|
||||
+ sed -n '/"actual-size":/ s/[^0-9]//gp' |
|
||||
+ head -n 1)
|
||||
|
||||
if [ $sizeA -lt $sizeB ]
|
||||
then
|
||||
diff --git a/tests/qemu-iotests/308 b/tests/qemu-iotests/308
|
||||
index bde4aac2fa..09275e9a10 100755
|
||||
--- a/tests/qemu-iotests/308
|
||||
+++ b/tests/qemu-iotests/308
|
||||
@@ -217,12 +217,12 @@ echo
|
||||
echo '=== Remove export ==='
|
||||
|
||||
# Double-check that $EXT_MP appears as a non-empty file (the raw image)
|
||||
-$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size'
|
||||
+$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' | head -n 1
|
||||
|
||||
fuse_export_del 'export-mp'
|
||||
|
||||
# See that the file appears empty again
|
||||
-$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size'
|
||||
+$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' | head -n 1
|
||||
|
||||
echo
|
||||
echo '=== Writable export ==='
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,52 +0,0 @@
|
||||
From 447bca651c9156d7aba6b7495c75f19b5e4ed53f Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Thu, 21 Apr 2022 16:24:35 +0200
|
||||
Subject: [PATCH 07/16] iotests/108: Fix when missing user_allow_other
|
||||
|
||||
RH-Author: Hanna Reitz <hreitz@redhat.com>
|
||||
RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding
|
||||
RH-Commit: [4/4] a51ab8606fc9d8dea2b6539f4e795d5813892a5c (hreitz/qemu-kvm-c-9-s)
|
||||
RH-Bugzilla: 2072379
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
|
||||
FUSE exports' allow-other option defaults to "auto", which means that it
|
||||
will try passing allow_other as a mount option, and fall back to not
|
||||
using it when an error occurs. We make no effort to hide fusermount's
|
||||
error message (because it would be difficult, and because users might
|
||||
want to know about the fallback occurring), and so when allow_other does
|
||||
not work (primarily when /etc/fuse.conf does not contain
|
||||
user_allow_other), this error message will appear and break the
|
||||
reference output.
|
||||
|
||||
We do not need allow_other here, though, so we can just pass
|
||||
allow-other=off to fix that.
|
||||
|
||||
Reported-by: Markus Armbruster <armbru@redhat.com>
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20220421142435.569600-1-hreitz@redhat.com>
|
||||
Tested-by: Markus Armbruster <armbru@redhat.com>
|
||||
Tested-by: Eric Blake <eblake@redhat.com>
|
||||
(cherry picked from commit 348a0740afc5b313599533eb69bbb2b95d2f1bba)
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/108 | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108
|
||||
index a3090e2875..4681c7c769 100755
|
||||
--- a/tests/qemu-iotests/108
|
||||
+++ b/tests/qemu-iotests/108
|
||||
@@ -326,7 +326,7 @@ else
|
||||
|
||||
$QSD \
|
||||
--blockdev file,node-name=export-node,filename="$TEST_IMG" \
|
||||
- --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \
|
||||
+ --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off,allow-other=off \
|
||||
--pidfile "$TEST_DIR/qsd.pid" \
|
||||
&
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
@ -1,445 +0,0 @@
|
||||
From ed69e01352b5e9a06173daab53bfa373c8535732 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Tue, 5 Apr 2022 15:46:51 +0200
|
||||
Subject: [PATCH 05/16] iotests/108: Test new refcount rebuild algorithm
|
||||
|
||||
RH-Author: Hanna Reitz <hreitz@redhat.com>
|
||||
RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding
|
||||
RH-Commit: [2/4] b68310a9fee8465dd3f568c8e867e1b7ae52bdaf (hreitz/qemu-kvm-c-9-s)
|
||||
RH-Bugzilla: 2072379
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
|
||||
One clear problem with how qcow2's refcount structure rebuild algorithm
|
||||
used to be before "qcow2: Improve refcount structure rebuilding" was
|
||||
that it is prone to failure for qcow2 images on block devices: There is
|
||||
generally unused space after the actual image, and if that exceeds what
|
||||
one refblock covers, the old algorithm would invariably write the
|
||||
reftable past the block device's end, which cannot work. The new
|
||||
algorithm does not have this problem.
|
||||
|
||||
Test it with three tests:
|
||||
(1) Create an image with more empty space at the end than what one
|
||||
refblock covers, see whether rebuilding the refcount structures
|
||||
results in a change in the image file length. (It should not.)
|
||||
|
||||
(2) Leave precisely enough space somewhere at the beginning of the image
|
||||
for the new reftable (and the refblock for that place), see whether
|
||||
the new algorithm puts the reftable there. (It should.)
|
||||
|
||||
(3) Test the original problem: Create (something like) a block device
|
||||
with a fixed size, then create a qcow2 image in there, write some
|
||||
data, and then have qemu-img check rebuild the refcount structures.
|
||||
Before HEAD^, the reftable would have been written past the image
|
||||
file end, i.e. outside of what the block device provides, which
|
||||
cannot work. HEAD^ should have fixed that.
|
||||
("Something like a block device" means a loop device if we can use
|
||||
one ("sudo -n losetup" works), or a FUSE block export with
|
||||
growable=false otherwise.)
|
||||
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20220405134652.19278-3-hreitz@redhat.com>
|
||||
(cherry picked from commit 9ffd6d646d1d5ee9087a8cbf0b7d2f96c5656162)
|
||||
|
||||
Conflicts:
|
||||
- 108: The downstream qemu-storage-daemon does not support --daemonize,
|
||||
so this switch has been replaced by a loop waiting for the PID file to
|
||||
appear
|
||||
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/108 | 263 ++++++++++++++++++++++++++++++++++++-
|
||||
tests/qemu-iotests/108.out | 81 ++++++++++++
|
||||
2 files changed, 343 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108
|
||||
index 56339ab2c5..a3090e2875 100755
|
||||
--- a/tests/qemu-iotests/108
|
||||
+++ b/tests/qemu-iotests/108
|
||||
@@ -30,13 +30,20 @@ status=1 # failure is the default!
|
||||
|
||||
_cleanup()
|
||||
{
|
||||
- _cleanup_test_img
|
||||
+ _cleanup_test_img
|
||||
+ if [ -f "$TEST_DIR/qsd.pid" ]; then
|
||||
+ qsd_pid=$(cat "$TEST_DIR/qsd.pid")
|
||||
+ kill -KILL "$qsd_pid"
|
||||
+ fusermount -u "$TEST_DIR/fuse-export" &>/dev/null
|
||||
+ fi
|
||||
+ rm -f "$TEST_DIR/fuse-export"
|
||||
}
|
||||
trap "_cleanup; exit \$status" 0 1 2 3 15
|
||||
|
||||
# get standard environment, filters and checks
|
||||
. ./common.rc
|
||||
. ./common.filter
|
||||
+. ./common.qemu
|
||||
|
||||
# This tests qcow2-specific low-level functionality
|
||||
_supported_fmt qcow2
|
||||
@@ -47,6 +54,22 @@ _supported_os Linux
|
||||
# files
|
||||
_unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' data_file
|
||||
|
||||
+# This test either needs sudo -n losetup or FUSE exports to work
|
||||
+if sudo -n losetup &>/dev/null; then
|
||||
+ loopdev=true
|
||||
+else
|
||||
+ loopdev=false
|
||||
+
|
||||
+ # QSD --export fuse will either yield "Parameter 'id' is missing"
|
||||
+ # or "Invalid parameter 'fuse'", depending on whether there is
|
||||
+ # FUSE support or not.
|
||||
+ error=$($QSD --export fuse 2>&1)
|
||||
+ if [[ $error = *"'fuse'"* ]]; then
|
||||
+ _notrun 'Passwordless sudo for losetup or FUSE support required, but' \
|
||||
+ 'neither is available'
|
||||
+ fi
|
||||
+fi
|
||||
+
|
||||
echo
|
||||
echo '=== Repairing an image without any refcount table ==='
|
||||
echo
|
||||
@@ -138,6 +161,244 @@ _make_test_img 64M
|
||||
poke_file "$TEST_IMG" $((0x10008)) "\xff\xff\xff\xff\xff\xff\x00\x00"
|
||||
_check_test_img -r all
|
||||
|
||||
+echo
|
||||
+echo '=== Check rebuilt reftable location ==='
|
||||
+
|
||||
+# In an earlier version of the refcount rebuild algorithm, the
|
||||
+# reftable was generally placed at the image end (unless something was
|
||||
+# allocated in the area covered by the refblock right before the image
|
||||
+# file end, then we would try to place the reftable in that refblock).
|
||||
+# This was later changed so the reftable would be placed in the
|
||||
+# earliest possible location. Test this.
|
||||
+
|
||||
+echo
|
||||
+echo '--- Does the image size increase? ---'
|
||||
+echo
|
||||
+
|
||||
+# First test: Just create some image, write some data to it, and
|
||||
+# resize it so there is free space at the end of the image (enough
|
||||
+# that it spans at least one full refblock, which for cluster_size=512
|
||||
+# images, spans 128k). With the old algorithm, the reftable would
|
||||
+# have then been placed at the end of the image file, but with the new
|
||||
+# one, it will be put in that free space.
|
||||
+# We want to check whether the size of the image file increases due to
|
||||
+# rebuilding the refcount structures (it should not).
|
||||
+
|
||||
+_make_test_img -o 'cluster_size=512' 1M
|
||||
+# Write something
|
||||
+$QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io
|
||||
+
|
||||
+# Add free space
|
||||
+file_len=$(stat -c '%s' "$TEST_IMG")
|
||||
+truncate -s $((file_len + 256 * 1024)) "$TEST_IMG"
|
||||
+
|
||||
+# Corrupt the image by saying the image header was not allocated
|
||||
+rt_offset=$(peek_file_be "$TEST_IMG" 48 8)
|
||||
+rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8)
|
||||
+poke_file "$TEST_IMG" $rb_offset "\x00\x00"
|
||||
+
|
||||
+# Check whether rebuilding the refcount structures increases the image
|
||||
+# file size
|
||||
+file_len=$(stat -c '%s' "$TEST_IMG")
|
||||
+echo
|
||||
+# The only leaks there can be are the old refcount structures that are
|
||||
+# leaked during rebuilding, no need to clutter the output with them
|
||||
+_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0'
|
||||
+echo
|
||||
+post_repair_file_len=$(stat -c '%s' "$TEST_IMG")
|
||||
+
|
||||
+if [[ $file_len -eq $post_repair_file_len ]]; then
|
||||
+ echo 'OK: Image size did not change'
|
||||
+else
|
||||
+ echo 'ERROR: Image size differs' \
|
||||
+ "($file_len before, $post_repair_file_len after)"
|
||||
+fi
|
||||
+
|
||||
+echo
|
||||
+echo '--- Will the reftable occupy a hole specifically left for it? ---'
|
||||
+echo
|
||||
+
|
||||
+# Note: With cluster_size=512, every refblock covers 128k.
|
||||
+# The reftable covers 8M per reftable cluster.
|
||||
+
|
||||
+# Create an image that requires two reftable clusters (just because
|
||||
+# this is more interesting than a single-clustered reftable).
|
||||
+_make_test_img -o 'cluster_size=512' 9M
|
||||
+$QEMU_IO -c 'write 0 8M' "$TEST_IMG" | _filter_qemu_io
|
||||
+
|
||||
+# Writing 8M will have resized the reftable. Unfortunately, doing so
|
||||
+# will leave holes in the file, so we need to fill them up so we can
|
||||
+# be sure the whole file is allocated. Do that by writing
|
||||
+# consecutively smaller chunks starting from 8 MB, until the file
|
||||
+# length increases even with a chunk size of 512. Then we must have
|
||||
+# filled all holes.
|
||||
+ofs=$((8 * 1024 * 1024))
|
||||
+block_len=$((16 * 1024))
|
||||
+while [[ $block_len -ge 512 ]]; do
|
||||
+ file_len=$(stat -c '%s' "$TEST_IMG")
|
||||
+ while [[ $(stat -c '%s' "$TEST_IMG") -eq $file_len ]]; do
|
||||
+ # Do not include this in the reference output, it does not
|
||||
+ # really matter which qemu-io calls we do here exactly
|
||||
+ $QEMU_IO -c "write $ofs $block_len" "$TEST_IMG" >/dev/null
|
||||
+ ofs=$((ofs + block_len))
|
||||
+ done
|
||||
+ block_len=$((block_len / 2))
|
||||
+done
|
||||
+
|
||||
+# Fill up to 9M (do not include this in the reference output either,
|
||||
+# $ofs is random for all we know)
|
||||
+$QEMU_IO -c "write $ofs $((9 * 1024 * 1024 - ofs))" "$TEST_IMG" >/dev/null
|
||||
+
|
||||
+# Make space as follows:
|
||||
+# - For the first refblock: Right at the beginning of the image (this
|
||||
+# refblock is placed in the first place possible),
|
||||
+# - For the reftable somewhere soon afterwards, still near the
|
||||
+# beginning of the image (i.e. covered by the first refblock); the
|
||||
+# reftable too is placed in the first place possible, but only after
|
||||
+# all refblocks have been placed)
|
||||
+# No space is needed for the other refblocks, because no refblock is
|
||||
+# put before the space it covers. In this test case, we do not mind
|
||||
+# if they are placed at the image file's end.
|
||||
+
|
||||
+# Before we make that space, we have to find out the host offset of
|
||||
+# the area that belonged to the two data clusters at guest offset 4k,
|
||||
+# because we expect the reftable to be placed there, and we will have
|
||||
+# to verify that it is.
|
||||
+
|
||||
+l1_offset=$(peek_file_be "$TEST_IMG" 40 8)
|
||||
+l2_offset=$(peek_file_be "$TEST_IMG" $l1_offset 8)
|
||||
+l2_offset=$((l2_offset & 0x00fffffffffffe00))
|
||||
+data_4k_offset=$(peek_file_be "$TEST_IMG" \
|
||||
+ $((l2_offset + 4096 / 512 * 8)) 8)
|
||||
+data_4k_offset=$((data_4k_offset & 0x00fffffffffffe00))
|
||||
+
|
||||
+$QEMU_IO -c "discard 0 512" -c "discard 4k 1k" "$TEST_IMG" | _filter_qemu_io
|
||||
+
|
||||
+# Corrupt the image by saying the image header was not allocated
|
||||
+rt_offset=$(peek_file_be "$TEST_IMG" 48 8)
|
||||
+rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8)
|
||||
+poke_file "$TEST_IMG" $rb_offset "\x00\x00"
|
||||
+
|
||||
+echo
|
||||
+# The only leaks there can be are the old refcount structures that are
|
||||
+# leaked during rebuilding, no need to clutter the output with them
|
||||
+_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0'
|
||||
+echo
|
||||
+
|
||||
+# Check whether the reftable was put where we expected
|
||||
+rt_offset=$(peek_file_be "$TEST_IMG" 48 8)
|
||||
+if [[ $rt_offset -eq $data_4k_offset ]]; then
|
||||
+ echo 'OK: Reftable is where we expect it'
|
||||
+else
|
||||
+ echo "ERROR: Reftable is at $rt_offset, but was expected at $data_4k_offset"
|
||||
+fi
|
||||
+
|
||||
+echo
|
||||
+echo '--- Rebuilding refcount structures on block devices ---'
|
||||
+echo
|
||||
+
|
||||
+# A block device cannot really grow, at least not during qemu-img
|
||||
+# check. As mentioned in the above cases, rebuilding the refcount
|
||||
+# structure may lead to new refcount structures being written after
|
||||
+# the end of the image, and in the past that happened even if there
|
||||
+# was more than sufficient space in the image. Such post-EOF writes
|
||||
+# will not work on block devices, so test that the new algorithm
|
||||
+# avoids it.
|
||||
+
|
||||
+# If we have passwordless sudo and losetup, we can use those to create
|
||||
+# a block device. Otherwise, we can resort to qemu's FUSE export to
|
||||
+# create a file that isn't growable, which effectively tests the same
|
||||
+# thing.
|
||||
+
|
||||
+_cleanup_test_img
|
||||
+truncate -s $((64 * 1024 * 1024)) "$TEST_IMG"
|
||||
+
|
||||
+if $loopdev; then
|
||||
+ export_mp=$(sudo -n losetup --show -f "$TEST_IMG")
|
||||
+ export_mp_driver=host_device
|
||||
+ sudo -n chmod go+rw "$export_mp"
|
||||
+else
|
||||
+ # Create non-growable FUSE export that is a bit like an empty
|
||||
+ # block device
|
||||
+ export_mp="$TEST_DIR/fuse-export"
|
||||
+ export_mp_driver=file
|
||||
+ touch "$export_mp"
|
||||
+
|
||||
+ $QSD \
|
||||
+ --blockdev file,node-name=export-node,filename="$TEST_IMG" \
|
||||
+ --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \
|
||||
+ --pidfile "$TEST_DIR/qsd.pid" \
|
||||
+ &
|
||||
+
|
||||
+ while [ ! -f "$TEST_DIR/qsd.pid" ]; do
|
||||
+ sleep 0.1
|
||||
+ done
|
||||
+fi
|
||||
+
|
||||
+# Now create a qcow2 image on the device -- unfortunately, qemu-img
|
||||
+# create force-creates the file, so we have to resort to the
|
||||
+# blockdev-create job.
|
||||
+_launch_qemu \
|
||||
+ --blockdev $export_mp_driver,node-name=file,filename="$export_mp"
|
||||
+
|
||||
+_send_qemu_cmd \
|
||||
+ $QEMU_HANDLE \
|
||||
+ '{ "execute": "qmp_capabilities" }' \
|
||||
+ 'return'
|
||||
+
|
||||
+# Small cluster size again, so the image needs multiple refblocks
|
||||
+_send_qemu_cmd \
|
||||
+ $QEMU_HANDLE \
|
||||
+ '{ "execute": "blockdev-create",
|
||||
+ "arguments": {
|
||||
+ "job-id": "create",
|
||||
+ "options": {
|
||||
+ "driver": "qcow2",
|
||||
+ "file": "file",
|
||||
+ "size": '$((64 * 1024 * 1024))',
|
||||
+ "cluster-size": 512
|
||||
+ } } }' \
|
||||
+ '"concluded"'
|
||||
+
|
||||
+_send_qemu_cmd \
|
||||
+ $QEMU_HANDLE \
|
||||
+ '{ "execute": "job-dismiss", "arguments": { "id": "create" } }' \
|
||||
+ 'return'
|
||||
+
|
||||
+_send_qemu_cmd \
|
||||
+ $QEMU_HANDLE \
|
||||
+ '{ "execute": "quit" }' \
|
||||
+ 'return'
|
||||
+
|
||||
+wait=y _cleanup_qemu
|
||||
+echo
|
||||
+
|
||||
+# Write some data
|
||||
+$QEMU_IO -c 'write 0 64k' "$export_mp" | _filter_qemu_io
|
||||
+
|
||||
+# Corrupt the image by saying the image header was not allocated
|
||||
+rt_offset=$(peek_file_be "$export_mp" 48 8)
|
||||
+rb_offset=$(peek_file_be "$export_mp" $rt_offset 8)
|
||||
+poke_file "$export_mp" $rb_offset "\x00\x00"
|
||||
+
|
||||
+# Repairing such a simple case should just work
|
||||
+# (We used to put the reftable at the end of the image file, which can
|
||||
+# never work for non-growable devices.)
|
||||
+echo
|
||||
+TEST_IMG="$export_mp" _check_test_img -r all \
|
||||
+ | grep -v '^Repairing cluster.*refcount=1 reference=0'
|
||||
+
|
||||
+if $loopdev; then
|
||||
+ sudo -n losetup -d "$export_mp"
|
||||
+else
|
||||
+ qsd_pid=$(cat "$TEST_DIR/qsd.pid")
|
||||
+ kill -TERM "$qsd_pid"
|
||||
+ # Wait for process to exit (cannot `wait` because the QSD is daemonized)
|
||||
+ while [ -f "$TEST_DIR/qsd.pid" ]; do
|
||||
+ true
|
||||
+ done
|
||||
+fi
|
||||
+
|
||||
# success, all done
|
||||
echo '*** done'
|
||||
rm -f $seq.full
|
||||
diff --git a/tests/qemu-iotests/108.out b/tests/qemu-iotests/108.out
|
||||
index 75bab8dc84..b5401d788d 100644
|
||||
--- a/tests/qemu-iotests/108.out
|
||||
+++ b/tests/qemu-iotests/108.out
|
||||
@@ -105,6 +105,87 @@ The following inconsistencies were found and repaired:
|
||||
0 leaked clusters
|
||||
1 corruptions
|
||||
|
||||
+Double checking the fixed image now...
|
||||
+No errors were found on the image.
|
||||
+
|
||||
+=== Check rebuilt reftable location ===
|
||||
+
|
||||
+--- Does the image size increase? ---
|
||||
+
|
||||
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
|
||||
+wrote 65536/65536 bytes at offset 0
|
||||
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+
|
||||
+ERROR cluster 0 refcount=0 reference=1
|
||||
+Rebuilding refcount structure
|
||||
+The following inconsistencies were found and repaired:
|
||||
+
|
||||
+ 0 leaked clusters
|
||||
+ 1 corruptions
|
||||
+
|
||||
+Double checking the fixed image now...
|
||||
+No errors were found on the image.
|
||||
+
|
||||
+OK: Image size did not change
|
||||
+
|
||||
+--- Will the reftable occupy a hole specifically left for it? ---
|
||||
+
|
||||
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=9437184
|
||||
+wrote 8388608/8388608 bytes at offset 0
|
||||
+8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+discard 512/512 bytes at offset 0
|
||||
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+discard 1024/1024 bytes at offset 4096
|
||||
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+
|
||||
+ERROR cluster 0 refcount=0 reference=1
|
||||
+Rebuilding refcount structure
|
||||
+The following inconsistencies were found and repaired:
|
||||
+
|
||||
+ 0 leaked clusters
|
||||
+ 1 corruptions
|
||||
+
|
||||
+Double checking the fixed image now...
|
||||
+No errors were found on the image.
|
||||
+
|
||||
+OK: Reftable is where we expect it
|
||||
+
|
||||
+--- Rebuilding refcount structures on block devices ---
|
||||
+
|
||||
+{ "execute": "qmp_capabilities" }
|
||||
+{"return": {}}
|
||||
+{ "execute": "blockdev-create",
|
||||
+ "arguments": {
|
||||
+ "job-id": "create",
|
||||
+ "options": {
|
||||
+ "driver": "IMGFMT",
|
||||
+ "file": "file",
|
||||
+ "size": 67108864,
|
||||
+ "cluster-size": 512
|
||||
+ } } }
|
||||
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "create"}}
|
||||
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "create"}}
|
||||
+{"return": {}}
|
||||
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "create"}}
|
||||
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "create"}}
|
||||
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "create"}}
|
||||
+{ "execute": "job-dismiss", "arguments": { "id": "create" } }
|
||||
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "create"}}
|
||||
+{"return": {}}
|
||||
+{ "execute": "quit" }
|
||||
+{"return": {}}
|
||||
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
|
||||
+
|
||||
+wrote 65536/65536 bytes at offset 0
|
||||
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+
|
||||
+ERROR cluster 0 refcount=0 reference=1
|
||||
+Rebuilding refcount structure
|
||||
+The following inconsistencies were found and repaired:
|
||||
+
|
||||
+ 0 leaked clusters
|
||||
+ 1 corruptions
|
||||
+
|
||||
Double checking the fixed image now...
|
||||
No errors were found on the image.
|
||||
*** done
|
||||
--
|
||||
2.31.1
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue