You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
qemu-kvm/SOURCES/kvm-stream-Replace-subtree-...

160 lines
6.0 KiB

From 5defda06ec4c24818a34126c5048be5e274b63f5 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:04 +0100
Subject: [PATCH 22/31] stream: Replace subtree drain with a single node drain
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [10/16] a93250b1f6ef296e903df0ba5d8b29bc2ed540a8 (sgarzarella/qemu-kvm-c-9-s)
The subtree drain was introduced in commit b1e1af394d9 as a way to avoid
graph changes between finding the base node and changing the block graph
as necessary on completion of the image streaming job.
The block graph could change between these two points because
bdrv_set_backing_hd() first drains the parent node, which involved
polling and can do anything.
Subtree draining was an imperfect way to make this less likely (because
with it, fewer callbacks are called during this window). Everyone agreed
that it's not really the right solution, and it was only committed as a
stopgap solution.
This replaces the subtree drain with a solution that simply drains the
parent node before we try to find the base node, and then call a version
of bdrv_set_backing_hd() that doesn't drain, but just asserts that the
parent node is already drained.
This way, any graph changes caused by draining happen before we start
looking at the graph and things stay consistent between finding the base
node and changing the graph.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20221118174110.55183-10-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 92140b9f3f07d80e2c27edcc6e32f392be2135e6)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 17 ++++++++++++++---
block/stream.c | 26 ++++++++++++++++----------
include/block/block-global-state.h | 3 +++
3 files changed, 33 insertions(+), 13 deletions(-)
diff --git a/block.c b/block.c
index b3449a312e..5330e89903 100644
--- a/block.c
+++ b/block.c
@@ -3403,14 +3403,15 @@ static int bdrv_set_backing_noperm(BlockDriverState *bs,
return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp);
}
-int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
- Error **errp)
+int bdrv_set_backing_hd_drained(BlockDriverState *bs,
+ BlockDriverState *backing_hd,
+ Error **errp)
{
int ret;
Transaction *tran = tran_new();
GLOBAL_STATE_CODE();
- bdrv_drained_begin(bs);
+ assert(bs->quiesce_counter > 0);
ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
if (ret < 0) {
@@ -3420,7 +3421,17 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
ret = bdrv_refresh_perms(bs, errp);
out:
tran_finalize(tran, ret);
+ return ret;
+}
+int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+ Error **errp)
+{
+ int ret;
+ GLOBAL_STATE_CODE();
+
+ bdrv_drained_begin(bs);
+ ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp);
bdrv_drained_end(bs);
return ret;
diff --git a/block/stream.c b/block/stream.c
index 694709bd25..8744ad103f 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -64,13 +64,16 @@ static int stream_prepare(Job *job)
bdrv_cor_filter_drop(s->cor_filter_bs);
s->cor_filter_bs = NULL;
- bdrv_subtree_drained_begin(s->above_base);
+ /*
+ * bdrv_set_backing_hd() requires that unfiltered_bs is drained. Drain
+ * already here and use bdrv_set_backing_hd_drained() instead because
+ * the polling during drained_begin() might change the graph, and if we do
+ * this only later, we may end up working with the wrong base node (or it
+ * might even have gone away by the time we want to use it).
+ */
+ bdrv_drained_begin(unfiltered_bs);
base = bdrv_filter_or_cow_bs(s->above_base);
- if (base) {
- bdrv_ref(base);
- }
-
unfiltered_base = bdrv_skip_filters(base);
if (bdrv_cow_child(unfiltered_bs)) {
@@ -82,7 +85,13 @@ static int stream_prepare(Job *job)
}
}
- bdrv_set_backing_hd(unfiltered_bs, base, &local_err);
+ bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err);
+
+ /*
+ * This call will do I/O, so the graph can change again from here on.
+ * We have already completed the graph change, so we are not in danger
+ * of operating on the wrong node any more if this happens.
+ */
ret = bdrv_change_backing_file(unfiltered_bs, base_id, base_fmt, false);
if (local_err) {
error_report_err(local_err);
@@ -92,10 +101,7 @@ static int stream_prepare(Job *job)
}
out:
- if (base) {
- bdrv_unref(base);
- }
- bdrv_subtree_drained_end(s->above_base);
+ bdrv_drained_end(unfiltered_bs);
return ret;
}
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
index c7bd4a2088..00e0cf8aea 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
@@ -82,6 +82,9 @@ int bdrv_open_file_child(const char *filename,
BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
Error **errp);
+int bdrv_set_backing_hd_drained(BlockDriverState *bs,
+ BlockDriverState *backing_hd,
+ Error **errp);
int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
const char *bdref_key, Error **errp);
BlockDriverState *bdrv_open(const char *filename, const char *reference,
--
2.31.1