parent
7bcbf342c2
commit
3ab0440bfc
@ -0,0 +1,89 @@
|
|||||||
|
From d913ecc85156d25f2df5317615eef7144aa26af5 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Peter Xu <peterx@redhat.com>
|
||||||
|
Date: Wed, 19 Jun 2024 18:30:39 -0400
|
||||||
|
Subject: [PATCH 04/11] migration: Cleanup incoming migration setup state
|
||||||
|
change
|
||||||
|
|
||||||
|
RH-Author: Juraj Marcin <None>
|
||||||
|
RH-MergeRequest: 419: migration: New postcopy state, and some cleanups [rhel-9.5.z]
|
||||||
|
RH-Jira: RHEL-63874
|
||||||
|
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Commit: [4/11] d485ab0a9d091ce98f1487fd8a3882f8b0130747
|
||||||
|
|
||||||
|
Destination QEMU can setup incoming ports for two purposes: either a fresh
|
||||||
|
new incoming migration, in which QEMU will switch to SETUP for channel
|
||||||
|
establishment, or a paused postcopy migration, in which QEMU will stay in
|
||||||
|
POSTCOPY_PAUSED until kicking off the RECOVER phase.
|
||||||
|
|
||||||
|
Now the state machine worked on dest node for the latter, only because
|
||||||
|
migrate_set_state() implicitly will become a noop if the current state
|
||||||
|
check failed. It wasn't clear at all.
|
||||||
|
|
||||||
|
Clean it up by providing a helper migration_incoming_state_setup() doing
|
||||||
|
proper checks over current status. Postcopy-paused will be explicitly
|
||||||
|
checked now, and then we can bail out for unknown states.
|
||||||
|
|
||||||
|
Reviewed-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
Signed-off-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
|
||||||
|
(cherry picked from commit 4dd5f7b8d568116b3ce594b0055a47c6db50f49c)
|
||||||
|
|
||||||
|
JIRA: https://issues.redhat.com/browse/RHEL-63874
|
||||||
|
Y-JIRA: https://issues.redhat.com/browse/RHEL-38485
|
||||||
|
|
||||||
|
Signed-off-by: Juraj Marcin <jmarcin@redhat.com>
|
||||||
|
---
|
||||||
|
migration/migration.c | 28 ++++++++++++++++++++++++++--
|
||||||
|
1 file changed, 26 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/migration/migration.c b/migration/migration.c
|
||||||
|
index b6cf04e043..21f20a8e1c 100644
|
||||||
|
--- a/migration/migration.c
|
||||||
|
+++ b/migration/migration.c
|
||||||
|
@@ -595,6 +595,29 @@ bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static bool
|
||||||
|
+migration_incoming_state_setup(MigrationIncomingState *mis, Error **errp)
|
||||||
|
+{
|
||||||
|
+ MigrationStatus current = mis->state;
|
||||||
|
+
|
||||||
|
+ if (current == MIGRATION_STATUS_POSTCOPY_PAUSED) {
|
||||||
|
+ /*
|
||||||
|
+ * Incoming postcopy migration will stay in PAUSED state even if
|
||||||
|
+ * reconnection happened.
|
||||||
|
+ */
|
||||||
|
+ return true;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (current != MIGRATION_STATUS_NONE) {
|
||||||
|
+ error_setg(errp, "Illegal migration incoming state: %s",
|
||||||
|
+ MigrationStatus_str(current));
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ migrate_set_state(&mis->state, current, MIGRATION_STATUS_SETUP);
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void qemu_start_incoming_migration(const char *uri, bool has_channels,
|
||||||
|
MigrationChannelList *channels,
|
||||||
|
Error **errp)
|
||||||
|
@@ -633,8 +656,9 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels,
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
|
||||||
|
- MIGRATION_STATUS_SETUP);
|
||||||
|
+ if (!migration_incoming_state_setup(mis, errp)) {
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
|
||||||
|
SocketAddress *saddr = &addr->u.socket;
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,157 @@
|
|||||||
|
From 210d413ed90983f8a29576cd13c02b8598dc3b2b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Peter Xu <peterx@redhat.com>
|
||||||
|
Date: Wed, 19 Jun 2024 18:30:37 -0400
|
||||||
|
Subject: [PATCH 02/11] migration: Rename thread debug names
|
||||||
|
|
||||||
|
RH-Author: Juraj Marcin <None>
|
||||||
|
RH-MergeRequest: 419: migration: New postcopy state, and some cleanups [rhel-9.5.z]
|
||||||
|
RH-Jira: RHEL-63874
|
||||||
|
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Commit: [2/11] b038b81af86b7b18642f81a3e23528912d1bd4ea
|
||||||
|
|
||||||
|
The postcopy thread names on dest QEMU are slightly confusing, partly I'll
|
||||||
|
need to blame myself on 36f62f11e4 ("migration: Postcopy preemption
|
||||||
|
preparation on channel creation"). E.g., "fault-fast" reads like a fast
|
||||||
|
version of "fault-default", but it's actually the fast version of
|
||||||
|
"postcopy/listen".
|
||||||
|
|
||||||
|
Taking this chance, rename all the migration threads with proper rules.
|
||||||
|
Considering we only have 15 chars usable, prefix all threads with "mig/",
|
||||||
|
meanwhile identify src/dst threads properly this time. So now most thread
|
||||||
|
names will look like "mig/DIR/xxx", where DIR will be "src"/"dst", except
|
||||||
|
the bg-snapshot thread which doesn't have a direction.
|
||||||
|
|
||||||
|
For multifd threads, making them "mig/{src|dst}/{send|recv}_%d".
|
||||||
|
|
||||||
|
We used to have "live_migration" thread for a very long time, now it's
|
||||||
|
called "mig/src/main". We may hope to have "mig/dst/main" soon but not
|
||||||
|
yet.
|
||||||
|
|
||||||
|
Reviewed-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
Reviewed-by: Zhijian Li (Fujitsu) <lizhijian@fujitsu.com>
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
Signed-off-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
|
||||||
|
(cherry picked from commit 60ce47675d74ddae3f13a32767d097d9fecbda4b)
|
||||||
|
|
||||||
|
JIRA: https://issues.redhat.com/browse/RHEL-63874
|
||||||
|
Y-JIRA: https://issues.redhat.com/browse/RHEL-38485
|
||||||
|
|
||||||
|
Signed-off-by: Juraj Marcin <jmarcin@redhat.com>
|
||||||
|
---
|
||||||
|
migration/colo.c | 2 +-
|
||||||
|
migration/migration.c | 6 +++---
|
||||||
|
migration/multifd.c | 6 +++---
|
||||||
|
migration/postcopy-ram.c | 4 ++--
|
||||||
|
migration/savevm.c | 2 +-
|
||||||
|
5 files changed, 10 insertions(+), 10 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/migration/colo.c b/migration/colo.c
|
||||||
|
index 84632a603e..560f910fb0 100644
|
||||||
|
--- a/migration/colo.c
|
||||||
|
+++ b/migration/colo.c
|
||||||
|
@@ -938,7 +938,7 @@ int coroutine_fn colo_incoming_co(void)
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
- qemu_thread_create(&th, "COLO incoming", colo_process_incoming_thread,
|
||||||
|
+ qemu_thread_create(&th, "mig/dst/colo", colo_process_incoming_thread,
|
||||||
|
mis, QEMU_THREAD_JOINABLE);
|
||||||
|
|
||||||
|
mis->colo_incoming_co = qemu_coroutine_self();
|
||||||
|
diff --git a/migration/migration.c b/migration/migration.c
|
||||||
|
index 86bf76e925..4e9d3522be 100644
|
||||||
|
--- a/migration/migration.c
|
||||||
|
+++ b/migration/migration.c
|
||||||
|
@@ -2447,7 +2447,7 @@ static int open_return_path_on_source(MigrationState *ms)
|
||||||
|
|
||||||
|
trace_open_return_path_on_source();
|
||||||
|
|
||||||
|
- qemu_thread_create(&ms->rp_state.rp_thread, "return path",
|
||||||
|
+ qemu_thread_create(&ms->rp_state.rp_thread, "mig/src/rp-thr",
|
||||||
|
source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
|
||||||
|
ms->rp_state.rp_thread_created = true;
|
||||||
|
|
||||||
|
@@ -3755,10 +3755,10 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (migrate_background_snapshot()) {
|
||||||
|
- qemu_thread_create(&s->thread, "bg_snapshot",
|
||||||
|
+ qemu_thread_create(&s->thread, "mig/snapshot",
|
||||||
|
bg_migration_thread, s, QEMU_THREAD_JOINABLE);
|
||||||
|
} else {
|
||||||
|
- qemu_thread_create(&s->thread, "live_migration",
|
||||||
|
+ qemu_thread_create(&s->thread, "mig/src/main",
|
||||||
|
migration_thread, s, QEMU_THREAD_JOINABLE);
|
||||||
|
}
|
||||||
|
s->migration_thread_running = true;
|
||||||
|
diff --git a/migration/multifd.c b/migration/multifd.c
|
||||||
|
index 2802afe79d..8b5be2a17e 100644
|
||||||
|
--- a/migration/multifd.c
|
||||||
|
+++ b/migration/multifd.c
|
||||||
|
@@ -1058,7 +1058,7 @@ static bool multifd_tls_channel_connect(MultiFDSendParams *p,
|
||||||
|
args->p = p;
|
||||||
|
|
||||||
|
p->tls_thread_created = true;
|
||||||
|
- qemu_thread_create(&p->tls_thread, "multifd-tls-handshake-worker",
|
||||||
|
+ qemu_thread_create(&p->tls_thread, "mig/src/tls",
|
||||||
|
multifd_tls_handshake_thread, args,
|
||||||
|
QEMU_THREAD_JOINABLE);
|
||||||
|
return true;
|
||||||
|
@@ -1184,7 +1184,7 @@ bool multifd_send_setup(void)
|
||||||
|
} else {
|
||||||
|
p->iov = g_new0(struct iovec, page_count);
|
||||||
|
}
|
||||||
|
- p->name = g_strdup_printf("multifdsend_%d", i);
|
||||||
|
+ p->name = g_strdup_printf("mig/src/send_%d", i);
|
||||||
|
p->page_size = qemu_target_page_size();
|
||||||
|
p->page_count = page_count;
|
||||||
|
p->write_flags = 0;
|
||||||
|
@@ -1600,7 +1600,7 @@ int multifd_recv_setup(Error **errp)
|
||||||
|
+ sizeof(uint64_t) * page_count;
|
||||||
|
p->packet = g_malloc0(p->packet_len);
|
||||||
|
}
|
||||||
|
- p->name = g_strdup_printf("multifdrecv_%d", i);
|
||||||
|
+ p->name = g_strdup_printf("mig/dst/recv_%d", i);
|
||||||
|
p->iov = g_new0(struct iovec, page_count);
|
||||||
|
p->normal = g_new0(ram_addr_t, page_count);
|
||||||
|
p->zero = g_new0(ram_addr_t, page_count);
|
||||||
|
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
|
||||||
|
index eccff499cb..ef184d8d08 100644
|
||||||
|
--- a/migration/postcopy-ram.c
|
||||||
|
+++ b/migration/postcopy-ram.c
|
||||||
|
@@ -1238,7 +1238,7 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
- postcopy_thread_create(mis, &mis->fault_thread, "fault-default",
|
||||||
|
+ postcopy_thread_create(mis, &mis->fault_thread, "mig/dst/fault",
|
||||||
|
postcopy_ram_fault_thread, QEMU_THREAD_JOINABLE);
|
||||||
|
mis->have_fault_thread = true;
|
||||||
|
|
||||||
|
@@ -1258,7 +1258,7 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
|
||||||
|
* This thread needs to be created after the temp pages because
|
||||||
|
* it'll fetch RAM_CHANNEL_POSTCOPY PostcopyTmpPage immediately.
|
||||||
|
*/
|
||||||
|
- postcopy_thread_create(mis, &mis->postcopy_prio_thread, "fault-fast",
|
||||||
|
+ postcopy_thread_create(mis, &mis->postcopy_prio_thread, "mig/dst/preempt",
|
||||||
|
postcopy_preempt_thread, QEMU_THREAD_JOINABLE);
|
||||||
|
mis->preempt_thread_status = PREEMPT_THREAD_CREATED;
|
||||||
|
}
|
||||||
|
diff --git a/migration/savevm.c b/migration/savevm.c
|
||||||
|
index e7c1215671..5aa595e365 100644
|
||||||
|
--- a/migration/savevm.c
|
||||||
|
+++ b/migration/savevm.c
|
||||||
|
@@ -2127,7 +2127,7 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
|
||||||
|
}
|
||||||
|
|
||||||
|
mis->have_listen_thread = true;
|
||||||
|
- postcopy_thread_create(mis, &mis->listen_thread, "postcopy/listen",
|
||||||
|
+ postcopy_thread_create(mis, &mis->listen_thread, "mig/dst/listen",
|
||||||
|
postcopy_ram_listen_thread, QEMU_THREAD_DETACHED);
|
||||||
|
trace_loadvm_postcopy_handle_listen("return");
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,146 @@
|
|||||||
|
From 04b4c59f81eb7547c6baa5e269c795b98ddce3ef Mon Sep 17 00:00:00 2001
|
||||||
|
From: Peter Xu <peterx@redhat.com>
|
||||||
|
Date: Wed, 19 Jun 2024 18:30:38 -0400
|
||||||
|
Subject: [PATCH 03/11] migration: Use MigrationStatus instead of int
|
||||||
|
|
||||||
|
RH-Author: Juraj Marcin <None>
|
||||||
|
RH-MergeRequest: 419: migration: New postcopy state, and some cleanups [rhel-9.5.z]
|
||||||
|
RH-Jira: RHEL-63874
|
||||||
|
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Commit: [3/11] 47e144753584750732f716b13c172cd67806cb17
|
||||||
|
|
||||||
|
QEMU uses "int" in most cases even if it stores MigrationStatus. I don't
|
||||||
|
know why, so let's try to do that right and see what blows up..
|
||||||
|
|
||||||
|
Reviewed-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
Signed-off-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
|
||||||
|
(cherry picked from commit a5c24e13e9f176901058b460e61425756322f3e8)
|
||||||
|
|
||||||
|
JIRA: https://issues.redhat.com/browse/RHEL-63874
|
||||||
|
Y-JIRA: https://issues.redhat.com/browse/RHEL-38485
|
||||||
|
|
||||||
|
Signed-off-by: Juraj Marcin <jmarcin@redhat.com>
|
||||||
|
---
|
||||||
|
migration/migration.c | 24 +++++++-----------------
|
||||||
|
migration/migration.h | 9 +++++----
|
||||||
|
2 files changed, 12 insertions(+), 21 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/migration/migration.c b/migration/migration.c
|
||||||
|
index 4e9d3522be..b6cf04e043 100644
|
||||||
|
--- a/migration/migration.c
|
||||||
|
+++ b/migration/migration.c
|
||||||
|
@@ -390,7 +390,7 @@ void migration_incoming_state_destroy(void)
|
||||||
|
yank_unregister_instance(MIGRATION_YANK_INSTANCE);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void migrate_generate_event(int new_state)
|
||||||
|
+static void migrate_generate_event(MigrationStatus new_state)
|
||||||
|
{
|
||||||
|
if (migrate_events()) {
|
||||||
|
qapi_event_send_migration(new_state);
|
||||||
|
@@ -1294,8 +1294,6 @@ static void fill_destination_migration_info(MigrationInfo *info)
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (mis->state) {
|
||||||
|
- case MIGRATION_STATUS_NONE:
|
||||||
|
- return;
|
||||||
|
case MIGRATION_STATUS_SETUP:
|
||||||
|
case MIGRATION_STATUS_CANCELLING:
|
||||||
|
case MIGRATION_STATUS_CANCELLED:
|
||||||
|
@@ -1311,6 +1309,8 @@ static void fill_destination_migration_info(MigrationInfo *info)
|
||||||
|
info->has_status = true;
|
||||||
|
fill_destination_postcopy_migration_info(info);
|
||||||
|
break;
|
||||||
|
+ default:
|
||||||
|
+ return;
|
||||||
|
}
|
||||||
|
info->status = mis->state;
|
||||||
|
}
|
||||||
|
@@ -1349,7 +1349,8 @@ void qmp_migrate_start_postcopy(Error **errp)
|
||||||
|
|
||||||
|
/* shared migration helpers */
|
||||||
|
|
||||||
|
-void migrate_set_state(int *state, int old_state, int new_state)
|
||||||
|
+void migrate_set_state(MigrationStatus *state, MigrationStatus old_state,
|
||||||
|
+ MigrationStatus new_state)
|
||||||
|
{
|
||||||
|
assert(new_state < MIGRATION_STATUS__MAX);
|
||||||
|
if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
|
||||||
|
@@ -1555,7 +1556,7 @@ bool migration_in_postcopy(void)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
-bool migration_postcopy_is_alive(int state)
|
||||||
|
+bool migration_postcopy_is_alive(MigrationStatus state)
|
||||||
|
{
|
||||||
|
switch (state) {
|
||||||
|
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
|
||||||
|
@@ -1600,20 +1601,9 @@ bool migration_is_idle(void)
|
||||||
|
case MIGRATION_STATUS_COMPLETED:
|
||||||
|
case MIGRATION_STATUS_FAILED:
|
||||||
|
return true;
|
||||||
|
- case MIGRATION_STATUS_SETUP:
|
||||||
|
- case MIGRATION_STATUS_CANCELLING:
|
||||||
|
- case MIGRATION_STATUS_ACTIVE:
|
||||||
|
- case MIGRATION_STATUS_POSTCOPY_ACTIVE:
|
||||||
|
- case MIGRATION_STATUS_COLO:
|
||||||
|
- case MIGRATION_STATUS_PRE_SWITCHOVER:
|
||||||
|
- case MIGRATION_STATUS_DEVICE:
|
||||||
|
- case MIGRATION_STATUS_WAIT_UNPLUG:
|
||||||
|
+ default:
|
||||||
|
return false;
|
||||||
|
- case MIGRATION_STATUS__MAX:
|
||||||
|
- g_assert_not_reached();
|
||||||
|
}
|
||||||
|
-
|
||||||
|
- return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool migration_is_active(void)
|
||||||
|
diff --git a/migration/migration.h b/migration/migration.h
|
||||||
|
index 8045e39c26..bc9c802595 100644
|
||||||
|
--- a/migration/migration.h
|
||||||
|
+++ b/migration/migration.h
|
||||||
|
@@ -160,7 +160,7 @@ struct MigrationIncomingState {
|
||||||
|
/* PostCopyFD's for external userfaultfds & handlers of shared memory */
|
||||||
|
GArray *postcopy_remote_fds;
|
||||||
|
|
||||||
|
- int state;
|
||||||
|
+ MigrationStatus state;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The incoming migration coroutine, non-NULL during qemu_loadvm_state().
|
||||||
|
@@ -298,7 +298,7 @@ struct MigrationState {
|
||||||
|
/* params from 'migrate-set-parameters' */
|
||||||
|
MigrationParameters parameters;
|
||||||
|
|
||||||
|
- int state;
|
||||||
|
+ MigrationStatus state;
|
||||||
|
|
||||||
|
/* State related to return path */
|
||||||
|
struct {
|
||||||
|
@@ -467,7 +467,8 @@ struct MigrationState {
|
||||||
|
bool rdma_migration;
|
||||||
|
};
|
||||||
|
|
||||||
|
-void migrate_set_state(int *state, int old_state, int new_state);
|
||||||
|
+void migrate_set_state(MigrationStatus *state, MigrationStatus old_state,
|
||||||
|
+ MigrationStatus new_state);
|
||||||
|
|
||||||
|
void migration_fd_process_incoming(QEMUFile *f);
|
||||||
|
void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp);
|
||||||
|
@@ -487,7 +488,7 @@ int migrate_init(MigrationState *s, Error **errp);
|
||||||
|
bool migration_is_blocked(Error **errp);
|
||||||
|
/* True if outgoing migration has entered postcopy phase */
|
||||||
|
bool migration_in_postcopy(void);
|
||||||
|
-bool migration_postcopy_is_alive(int state);
|
||||||
|
+bool migration_postcopy_is_alive(MigrationStatus state);
|
||||||
|
MigrationState *migrate_get_current(void);
|
||||||
|
bool migration_has_failed(MigrationState *);
|
||||||
|
bool migrate_mode_is_cpr(MigrationState *);
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,94 @@
|
|||||||
|
From a35f4af0c143c0b6655bb1123e1734a5a9dd890e Mon Sep 17 00:00:00 2001
|
||||||
|
From: Peter Xu <peterx@redhat.com>
|
||||||
|
Date: Wed, 19 Jun 2024 18:30:41 -0400
|
||||||
|
Subject: [PATCH 06/11] migration/docs: Update postcopy recover session for
|
||||||
|
SETUP phase
|
||||||
|
|
||||||
|
RH-Author: Juraj Marcin <None>
|
||||||
|
RH-MergeRequest: 419: migration: New postcopy state, and some cleanups [rhel-9.5.z]
|
||||||
|
RH-Jira: RHEL-63874
|
||||||
|
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Commit: [6/11] f84c228f019a30f23313cbfe7cb39ca8aa0aee84
|
||||||
|
|
||||||
|
Firstly, the "Paused" state was added in the wrong place before. The state
|
||||||
|
machine section was describing PostcopyState, rather than MigrationStatus.
|
||||||
|
Drop the Paused state descriptions.
|
||||||
|
|
||||||
|
Then in the postcopy recover session, add more information on the state
|
||||||
|
machine for MigrationStatus in the lines. Add the new RECOVER_SETUP phase.
|
||||||
|
|
||||||
|
Reviewed-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
[fix typo s/reconnects/reconnect]
|
||||||
|
Signed-off-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
|
||||||
|
(cherry picked from commit 21e89f7ad526f0dddfc722e615bfb0fcdb705c87)
|
||||||
|
|
||||||
|
JIRA: https://issues.redhat.com/browse/RHEL-63874
|
||||||
|
Y-JIRA: https://issues.redhat.com/browse/RHEL-38485
|
||||||
|
|
||||||
|
Signed-off-by: Juraj Marcin <jmarcin@redhat.com>
|
||||||
|
---
|
||||||
|
docs/devel/migration/postcopy.rst | 31 ++++++++++++++++---------------
|
||||||
|
1 file changed, 16 insertions(+), 15 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/docs/devel/migration/postcopy.rst b/docs/devel/migration/postcopy.rst
|
||||||
|
index 6c51e96d79..82e7a848c6 100644
|
||||||
|
--- a/docs/devel/migration/postcopy.rst
|
||||||
|
+++ b/docs/devel/migration/postcopy.rst
|
||||||
|
@@ -99,17 +99,6 @@ ADVISE->DISCARD->LISTEN->RUNNING->END
|
||||||
|
(although it can't do the cleanup it would do as it
|
||||||
|
finishes a normal migration).
|
||||||
|
|
||||||
|
- - Paused
|
||||||
|
-
|
||||||
|
- Postcopy can run into a paused state (normally on both sides when
|
||||||
|
- happens), where all threads will be temporarily halted mostly due to
|
||||||
|
- network errors. When reaching paused state, migration will make sure
|
||||||
|
- the qemu binary on both sides maintain the data without corrupting
|
||||||
|
- the VM. To continue the migration, the admin needs to fix the
|
||||||
|
- migration channel using the QMP command 'migrate-recover' on the
|
||||||
|
- destination node, then resume the migration using QMP command 'migrate'
|
||||||
|
- again on source node, with resume=true flag set.
|
||||||
|
-
|
||||||
|
- End
|
||||||
|
|
||||||
|
The listen thread can now quit, and perform the cleanup of migration
|
||||||
|
@@ -221,7 +210,8 @@ paused postcopy migration.
|
||||||
|
|
||||||
|
The recovery phase normally contains a few steps:
|
||||||
|
|
||||||
|
- - When network issue occurs, both QEMU will go into PAUSED state
|
||||||
|
+ - When network issue occurs, both QEMU will go into **POSTCOPY_PAUSED**
|
||||||
|
+ migration state.
|
||||||
|
|
||||||
|
- When the network is recovered (or a new network is provided), the admin
|
||||||
|
can setup the new channel for migration using QMP command
|
||||||
|
@@ -229,9 +219,20 @@ The recovery phase normally contains a few steps:
|
||||||
|
|
||||||
|
- On source host, the admin can continue the interrupted postcopy
|
||||||
|
migration using QMP command 'migrate' with resume=true flag set.
|
||||||
|
-
|
||||||
|
- - After the connection is re-established, QEMU will continue the postcopy
|
||||||
|
- migration on both sides.
|
||||||
|
+ Source QEMU will go into **POSTCOPY_RECOVER_SETUP** state trying to
|
||||||
|
+ re-establish the channels.
|
||||||
|
+
|
||||||
|
+ - When both sides of QEMU successfully reconnect using a new or fixed up
|
||||||
|
+ channel, they will go into **POSTCOPY_RECOVER** state, some handshake
|
||||||
|
+ procedure will be needed to properly synchronize the VM states between
|
||||||
|
+ the two QEMUs to continue the postcopy migration. For example, there
|
||||||
|
+ can be pages sent right during the window when the network is
|
||||||
|
+ interrupted, then the handshake will guarantee pages lost in-flight
|
||||||
|
+ will be resent again.
|
||||||
|
+
|
||||||
|
+ - After a proper handshake synchronization, QEMU will continue the
|
||||||
|
+ postcopy migration on both sides and go back to **POSTCOPY_ACTIVE**
|
||||||
|
+ state. Postcopy migration will continue.
|
||||||
|
|
||||||
|
During a paused postcopy migration, the VM can logically still continue
|
||||||
|
running, and it will not be impacted from any page access to pages that
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,50 @@
|
|||||||
|
From ee276dfcc7d4b25214ec6745ebf55c4666b3bd0a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Peter Xu <peterx@redhat.com>
|
||||||
|
Date: Wed, 19 Jun 2024 18:30:36 -0400
|
||||||
|
Subject: [PATCH 01/11] migration/multifd: Avoid the final FLUSH in complete()
|
||||||
|
|
||||||
|
RH-Author: Juraj Marcin <None>
|
||||||
|
RH-MergeRequest: 419: migration: New postcopy state, and some cleanups [rhel-9.5.z]
|
||||||
|
RH-Jira: RHEL-63874
|
||||||
|
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Commit: [1/11] 028e310f65eaad098ef62bdb8a5d30b9a5cd32e2
|
||||||
|
|
||||||
|
We always do the flush when finishing one round of scan, and during
|
||||||
|
complete() phase we should scan one more round making sure no dirty page
|
||||||
|
existed. In that case we shouldn't need one explicit FLUSH at the end of
|
||||||
|
complete(), as when reaching there all pages should have been flushed.
|
||||||
|
|
||||||
|
Reviewed-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
Tested-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
Signed-off-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
|
||||||
|
(cherry picked from commit 637280aeb242517ede480aa2d5ba1c29d41eac11)
|
||||||
|
|
||||||
|
JIRA: https://issues.redhat.com/browse/RHEL-63874
|
||||||
|
Y-JIRA: https://issues.redhat.com/browse/RHEL-38485
|
||||||
|
|
||||||
|
Signed-off-by: Juraj Marcin <jmarcin@redhat.com>
|
||||||
|
---
|
||||||
|
migration/ram.c | 4 ----
|
||||||
|
1 file changed, 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/migration/ram.c b/migration/ram.c
|
||||||
|
index 8deb84984f..3ef84e7036 100644
|
||||||
|
--- a/migration/ram.c
|
||||||
|
+++ b/migration/ram.c
|
||||||
|
@@ -3383,10 +3383,6 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (migrate_multifd() && !migrate_multifd_flush_after_each_section() &&
|
||||||
|
- !migrate_mapped_ram()) {
|
||||||
|
- qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
|
||||||
|
- }
|
||||||
|
qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
|
||||||
|
return qemu_fflush(f);
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,280 @@
|
|||||||
|
From c9eb5f8e86d031060c72aeb9d995844c6f842c58 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Peter Xu <peterx@redhat.com>
|
||||||
|
Date: Wed, 19 Jun 2024 18:30:40 -0400
|
||||||
|
Subject: [PATCH 05/11] migration/postcopy: Add postcopy-recover-setup phase
|
||||||
|
|
||||||
|
RH-Author: Juraj Marcin <None>
|
||||||
|
RH-MergeRequest: 419: migration: New postcopy state, and some cleanups [rhel-9.5.z]
|
||||||
|
RH-Jira: RHEL-63874
|
||||||
|
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Commit: [5/11] ce81d3b247b9f9541a75265a07082394ce419f3a
|
||||||
|
|
||||||
|
This patch adds a migration state on src called "postcopy-recover-setup".
|
||||||
|
The new state will describe the intermediate step starting from when the
|
||||||
|
src QEMU received a postcopy recovery request, until the migration channels
|
||||||
|
are properly established, but before the recovery process take place.
|
||||||
|
|
||||||
|
The request came from Libvirt where Libvirt currently rely on the migration
|
||||||
|
state events to detect migration state changes. That works for most of the
|
||||||
|
migration process but except postcopy recovery failures at the beginning.
|
||||||
|
|
||||||
|
Currently postcopy recovery only has two major states:
|
||||||
|
|
||||||
|
- postcopy-paused: this is the state that both sides of QEMU will be in
|
||||||
|
for a long time as long as the migration channel was interrupted.
|
||||||
|
|
||||||
|
- postcopy-recover: this is the state where both sides of QEMU handshake
|
||||||
|
with each other, preparing for a continuation of postcopy which used to
|
||||||
|
be interrupted.
|
||||||
|
|
||||||
|
The issue here is when the recovery port is invalid, the src QEMU will take
|
||||||
|
the URI/channels, noticing the ports are not valid, and it'll silently keep
|
||||||
|
in the postcopy-paused state, with no event sent to Libvirt. In this case,
|
||||||
|
the only thing Libvirt can do is to poll the migration status with a proper
|
||||||
|
interval, however that's less optimal.
|
||||||
|
|
||||||
|
Considering that this is the only case where Libvirt won't get a
|
||||||
|
notification from QEMU on such events, let's add postcopy-recover-setup
|
||||||
|
state to mimic what we have with the "setup" state of a newly initialized
|
||||||
|
migration, describing the phase of connection establishment.
|
||||||
|
|
||||||
|
With that, postcopy recovery will have two paths to go now, and either path
|
||||||
|
will guarantee an event generated. Now the events will look like this
|
||||||
|
during a recovery process on src QEMU:
|
||||||
|
|
||||||
|
- Initially when the recovery is initiated on src, QEMU will go from
|
||||||
|
"postcopy-paused" -> "postcopy-recover-setup". Old QEMUs don't have
|
||||||
|
this event.
|
||||||
|
|
||||||
|
- Depending on whether the channel re-establishment is succeeded:
|
||||||
|
|
||||||
|
- In succeeded case, src QEMU will move from "postcopy-recover-setup"
|
||||||
|
to "postcopy-recover". Old QEMUs also have this event.
|
||||||
|
|
||||||
|
- In failure case, src QEMU will move from "postcopy-recover-setup" to
|
||||||
|
"postcopy-paused" again. Old QEMUs don't have this event.
|
||||||
|
|
||||||
|
This guarantees that Libvirt will always receive a notification for
|
||||||
|
recovery process properly.
|
||||||
|
|
||||||
|
One thing to mention is, such new status is only needed on src QEMU not
|
||||||
|
both. On dest QEMU, the state machine doesn't change. Hence the events
|
||||||
|
don't change either. It's done like so because dest QEMU may not have an
|
||||||
|
explicit point of setup start. E.g., it can happen that when dest QEMUs
|
||||||
|
doesn't use migrate-recover command to use a new URI/channel, but the old
|
||||||
|
URI/channels can be reused in recovery, in which case the old ports simply
|
||||||
|
can work again after the network routes are fixed up.
|
||||||
|
|
||||||
|
Add a new helper postcopy_is_paused() detecting whether postcopy is still
|
||||||
|
paused, taking RECOVER_SETUP into account too. When using it on both
|
||||||
|
src/dst, a slight change is done altogether to always wait for the
|
||||||
|
semaphore before checking the status, because for both sides a sem_post()
|
||||||
|
will be required for a recovery.
|
||||||
|
|
||||||
|
Cc: Jiri Denemark <jdenemar@redhat.com>
|
||||||
|
Cc: Prasad Pandit <ppandit@redhat.com>
|
||||||
|
Reviewed-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
Buglink: https://issues.redhat.com/browse/RHEL-38485
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
Signed-off-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
|
||||||
|
(cherry picked from commit 4146b77ec7640d3c30d42558e13423594b114385)
|
||||||
|
|
||||||
|
JIRA: https://issues.redhat.com/browse/RHEL-63874
|
||||||
|
Y-JIRA: https://issues.redhat.com/browse/RHEL-38485
|
||||||
|
|
||||||
|
Signed-off-by: Juraj Marcin <jmarcin@redhat.com>
|
||||||
|
---
|
||||||
|
migration/migration.c | 40 ++++++++++++++++++++++++++++++++++------
|
||||||
|
migration/postcopy-ram.c | 6 ++++++
|
||||||
|
migration/postcopy-ram.h | 3 +++
|
||||||
|
migration/savevm.c | 4 ++--
|
||||||
|
qapi/migration.json | 4 ++++
|
||||||
|
5 files changed, 49 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/migration/migration.c b/migration/migration.c
|
||||||
|
index 21f20a8e1c..03e151a045 100644
|
||||||
|
--- a/migration/migration.c
|
||||||
|
+++ b/migration/migration.c
|
||||||
|
@@ -1100,6 +1100,7 @@ bool migration_is_setup_or_active(void)
|
||||||
|
case MIGRATION_STATUS_ACTIVE:
|
||||||
|
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
|
||||||
|
case MIGRATION_STATUS_POSTCOPY_PAUSED:
|
||||||
|
+ case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
|
||||||
|
case MIGRATION_STATUS_POSTCOPY_RECOVER:
|
||||||
|
case MIGRATION_STATUS_SETUP:
|
||||||
|
case MIGRATION_STATUS_PRE_SWITCHOVER:
|
||||||
|
@@ -1122,6 +1123,7 @@ bool migration_is_running(void)
|
||||||
|
case MIGRATION_STATUS_ACTIVE:
|
||||||
|
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
|
||||||
|
case MIGRATION_STATUS_POSTCOPY_PAUSED:
|
||||||
|
+ case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
|
||||||
|
case MIGRATION_STATUS_POSTCOPY_RECOVER:
|
||||||
|
case MIGRATION_STATUS_SETUP:
|
||||||
|
case MIGRATION_STATUS_PRE_SWITCHOVER:
|
||||||
|
@@ -1273,6 +1275,7 @@ static void fill_source_migration_info(MigrationInfo *info)
|
||||||
|
case MIGRATION_STATUS_PRE_SWITCHOVER:
|
||||||
|
case MIGRATION_STATUS_DEVICE:
|
||||||
|
case MIGRATION_STATUS_POSTCOPY_PAUSED:
|
||||||
|
+ case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
|
||||||
|
case MIGRATION_STATUS_POSTCOPY_RECOVER:
|
||||||
|
/* TODO add some postcopy stats */
|
||||||
|
populate_time_info(info, s);
|
||||||
|
@@ -1469,10 +1472,31 @@ static void migrate_error_free(MigrationState *s)
|
||||||
|
|
||||||
|
static void migrate_fd_error(MigrationState *s, const Error *error)
|
||||||
|
{
|
||||||
|
+ MigrationStatus current = s->state;
|
||||||
|
+ MigrationStatus next;
|
||||||
|
+
|
||||||
|
trace_migrate_fd_error(error_get_pretty(error));
|
||||||
|
assert(s->to_dst_file == NULL);
|
||||||
|
- migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
|
||||||
|
- MIGRATION_STATUS_FAILED);
|
||||||
|
+
|
||||||
|
+ switch (current) {
|
||||||
|
+ case MIGRATION_STATUS_SETUP:
|
||||||
|
+ next = MIGRATION_STATUS_FAILED;
|
||||||
|
+ break;
|
||||||
|
+ case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
|
||||||
|
+ /* Never fail a postcopy migration; switch back to PAUSED instead */
|
||||||
|
+ next = MIGRATION_STATUS_POSTCOPY_PAUSED;
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ /*
|
||||||
|
+ * This really shouldn't happen. Just be careful to not crash a VM
|
||||||
|
+ * just for this. Instead, dump something.
|
||||||
|
+ */
|
||||||
|
+ error_report("%s: Illegal migration status (%s) detected",
|
||||||
|
+ __func__, MigrationStatus_str(current));
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ migrate_set_state(&s->state, current, next);
|
||||||
|
migrate_set_error(s, error);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1573,6 +1597,7 @@ bool migration_in_postcopy(void)
|
||||||
|
switch (s->state) {
|
||||||
|
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
|
||||||
|
case MIGRATION_STATUS_POSTCOPY_PAUSED:
|
||||||
|
+ case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
|
||||||
|
case MIGRATION_STATUS_POSTCOPY_RECOVER:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
@@ -1965,6 +1990,9 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
|
||||||
|
+ MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP);
|
||||||
|
+
|
||||||
|
/* This is a resume, skip init status */
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
@@ -3020,9 +3048,9 @@ static MigThrError postcopy_pause(MigrationState *s)
|
||||||
|
* We wait until things fixed up. Then someone will setup the
|
||||||
|
* status back for us.
|
||||||
|
*/
|
||||||
|
- while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
|
||||||
|
+ do {
|
||||||
|
qemu_sem_wait(&s->postcopy_pause_sem);
|
||||||
|
- }
|
||||||
|
+ } while (postcopy_is_paused(s->state));
|
||||||
|
|
||||||
|
if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
|
||||||
|
/* Woken up by a recover procedure. Give it a shot */
|
||||||
|
@@ -3687,7 +3715,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
|
||||||
|
{
|
||||||
|
Error *local_err = NULL;
|
||||||
|
uint64_t rate_limit;
|
||||||
|
- bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED;
|
||||||
|
+ bool resume = (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
@@ -3754,7 +3782,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
|
||||||
|
|
||||||
|
if (resume) {
|
||||||
|
/* Wakeup the main migration thread to do the recovery */
|
||||||
|
- migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
|
||||||
|
+ migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP,
|
||||||
|
MIGRATION_STATUS_POSTCOPY_RECOVER);
|
||||||
|
qemu_sem_post(&s->postcopy_pause_sem);
|
||||||
|
return;
|
||||||
|
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
|
||||||
|
index ef184d8d08..be10611048 100644
|
||||||
|
--- a/migration/postcopy-ram.c
|
||||||
|
+++ b/migration/postcopy-ram.c
|
||||||
|
@@ -1770,3 +1770,9 @@ void *postcopy_preempt_thread(void *opaque)
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+bool postcopy_is_paused(MigrationStatus status)
|
||||||
|
+{
|
||||||
|
+ return status == MIGRATION_STATUS_POSTCOPY_PAUSED ||
|
||||||
|
+ status == MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP;
|
||||||
|
+}
|
||||||
|
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
|
||||||
|
index ecae941211..a6df1b2811 100644
|
||||||
|
--- a/migration/postcopy-ram.h
|
||||||
|
+++ b/migration/postcopy-ram.h
|
||||||
|
@@ -13,6 +13,8 @@
|
||||||
|
#ifndef QEMU_POSTCOPY_RAM_H
|
||||||
|
#define QEMU_POSTCOPY_RAM_H
|
||||||
|
|
||||||
|
+#include "qapi/qapi-types-migration.h"
|
||||||
|
+
|
||||||
|
/* Return true if the host supports everything we need to do postcopy-ram */
|
||||||
|
bool postcopy_ram_supported_by_host(MigrationIncomingState *mis,
|
||||||
|
Error **errp);
|
||||||
|
@@ -193,5 +195,6 @@ enum PostcopyChannels {
|
||||||
|
void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
|
||||||
|
void postcopy_preempt_setup(MigrationState *s);
|
||||||
|
int postcopy_preempt_establish_channel(MigrationState *s);
|
||||||
|
+bool postcopy_is_paused(MigrationStatus status);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
diff --git a/migration/savevm.c b/migration/savevm.c
|
||||||
|
index 5aa595e365..a0f7a9dceb 100644
|
||||||
|
--- a/migration/savevm.c
|
||||||
|
+++ b/migration/savevm.c
|
||||||
|
@@ -2860,9 +2860,9 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
|
||||||
|
error_report("Detected IO failure for postcopy. "
|
||||||
|
"Migration paused.");
|
||||||
|
|
||||||
|
- while (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
|
||||||
|
+ do {
|
||||||
|
qemu_sem_wait(&mis->postcopy_pause_sem_dst);
|
||||||
|
- }
|
||||||
|
+ } while (postcopy_is_paused(mis->state));
|
||||||
|
|
||||||
|
trace_postcopy_pause_incoming_continued();
|
||||||
|
|
||||||
|
diff --git a/qapi/migration.json b/qapi/migration.json
|
||||||
|
index 8c65b90328..e518563f67 100644
|
||||||
|
--- a/qapi/migration.json
|
||||||
|
+++ b/qapi/migration.json
|
||||||
|
@@ -150,6 +150,9 @@
|
||||||
|
#
|
||||||
|
# @postcopy-paused: during postcopy but paused. (since 3.0)
|
||||||
|
#
|
||||||
|
+# @postcopy-recover-setup: setup phase for a postcopy recovery process,
|
||||||
|
+# preparing for a recovery phase to start. (since 9.1)
|
||||||
|
+#
|
||||||
|
# @postcopy-recover: trying to recover from a paused postcopy. (since
|
||||||
|
# 3.0)
|
||||||
|
#
|
||||||
|
@@ -174,6 +177,7 @@
|
||||||
|
{ 'enum': 'MigrationStatus',
|
||||||
|
'data': [ 'none', 'setup', 'cancelling', 'cancelled',
|
||||||
|
'active', 'postcopy-active', 'postcopy-paused',
|
||||||
|
+ 'postcopy-recover-setup',
|
||||||
|
'postcopy-recover', 'completed', 'failed', 'colo',
|
||||||
|
'pre-switchover', 'device', 'wait-unplug' ] }
|
||||||
|
##
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,69 @@
|
|||||||
|
From 16ba989e9c7606719bb1ab4d5511bac6c2c0d625 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Peter Xu <peterx@redhat.com>
|
||||||
|
Date: Wed, 19 Jun 2024 18:30:43 -0400
|
||||||
|
Subject: [PATCH 08/11] tests/migration-tests: Always enable migration events
|
||||||
|
|
||||||
|
RH-Author: Juraj Marcin <None>
|
||||||
|
RH-MergeRequest: 419: migration: New postcopy state, and some cleanups [rhel-9.5.z]
|
||||||
|
RH-Jira: RHEL-63874
|
||||||
|
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Commit: [8/11] 02182a6c1e9b492ca90b86e0568657c55bac121d
|
||||||
|
|
||||||
|
Libvirt should always enable it, so it'll be nice qtest also cover that for
|
||||||
|
all tests on both sides. migrate_incoming_qmp() used to enable it only on
|
||||||
|
dst, now we enable them on both, as we'll start to sanity check events even
|
||||||
|
on the src QEMU.
|
||||||
|
|
||||||
|
We'll need to leave the one in migrate_incoming_qmp(), because
|
||||||
|
virtio-net-failover test uses that one only, and it relies on the events to
|
||||||
|
work.
|
||||||
|
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
Reviewed-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
Signed-off-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
|
||||||
|
(cherry picked from commit cd313b66f203381f2f2f984d5155d7942d26725d)
|
||||||
|
|
||||||
|
JIRA: https://issues.redhat.com/browse/RHEL-63874
|
||||||
|
Y-JIRA: https://issues.redhat.com/browse/RHEL-38485
|
||||||
|
|
||||||
|
Signed-off-by: Juraj Marcin <jmarcin@redhat.com>
|
||||||
|
---
|
||||||
|
tests/qtest/migration-helpers.c | 1 +
|
||||||
|
tests/qtest/migration-test.c | 7 +++++++
|
||||||
|
2 files changed, 8 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
|
||||||
|
index e451dbdbed..50a6bc2569 100644
|
||||||
|
--- a/tests/qtest/migration-helpers.c
|
||||||
|
+++ b/tests/qtest/migration-helpers.c
|
||||||
|
@@ -107,6 +107,7 @@ void migrate_incoming_qmp(QTestState *to, const char *uri, const char *fmt, ...)
|
||||||
|
g_assert(!qdict_haskey(args, "uri"));
|
||||||
|
qdict_put_str(args, "uri", uri);
|
||||||
|
|
||||||
|
+ /* This function relies on the event to work, make sure it's enabled */
|
||||||
|
migrate_set_capability(to, "events", true);
|
||||||
|
|
||||||
|
rsp = qtest_qmp(to, "{ 'execute': 'migrate-incoming', 'arguments': %p}",
|
||||||
|
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
|
||||||
|
index 0808300f5b..9f29f4e4f3 100644
|
||||||
|
--- a/tests/qtest/migration-test.c
|
||||||
|
+++ b/tests/qtest/migration-test.c
|
||||||
|
@@ -908,6 +908,13 @@ static int test_migrate_start(QTestState **from, QTestState **to,
|
||||||
|
unlink(shmem_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
+ /*
|
||||||
|
+ * Always enable migration events. Libvirt always uses it, let's try
|
||||||
|
+ * to mimic as closer as that.
|
||||||
|
+ */
|
||||||
|
+ migrate_set_capability(*from, "events", true);
|
||||||
|
+ migrate_set_capability(*to, "events", true);
|
||||||
|
+
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,219 @@
|
|||||||
|
From e503d6466ec8dd6c51b5891bd52f6f4076210f8b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Peter Xu <peterx@redhat.com>
|
||||||
|
Date: Wed, 19 Jun 2024 18:30:46 -0400
|
||||||
|
Subject: [PATCH 11/11] tests/migration-tests: Cover postcopy failure on
|
||||||
|
reconnect
|
||||||
|
|
||||||
|
RH-Author: Juraj Marcin <None>
|
||||||
|
RH-MergeRequest: 419: migration: New postcopy state, and some cleanups [rhel-9.5.z]
|
||||||
|
RH-Jira: RHEL-63874
|
||||||
|
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Commit: [11/11] 0295f627c0e8e7bae6dcd695c063b17717c7590f
|
||||||
|
|
||||||
|
Make sure there will be an event for postcopy recovery, irrelevant of
|
||||||
|
whether the reconnect will success, or when the failure happens.
|
||||||
|
|
||||||
|
The added new case is to fail early in postcopy recovery, in which case it
|
||||||
|
didn't even reach RECOVER stage on src (and in real life it'll be the same
|
||||||
|
to dest, but the test case is just slightly more involved due to the dual
|
||||||
|
socketpair setup).
|
||||||
|
|
||||||
|
To do that, rename the postcopy_recovery_test_fail to reflect either stage
|
||||||
|
to fail, instead of a boolean.
|
||||||
|
|
||||||
|
Reviewed-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
Signed-off-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
|
||||||
|
(cherry picked from commit 6cf56a87baf8b99c4296a943d220eb8276ca035a)
|
||||||
|
|
||||||
|
JIRA: https://issues.redhat.com/browse/RHEL-63874
|
||||||
|
Y-JIRA: https://issues.redhat.com/browse/RHEL-38485
|
||||||
|
|
||||||
|
Signed-off-by: Juraj Marcin <jmarcin@redhat.com>
|
||||||
|
---
|
||||||
|
tests/qtest/migration-test.c | 95 +++++++++++++++++++++++++++++-------
|
||||||
|
1 file changed, 77 insertions(+), 18 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
|
||||||
|
index afe8270dd0..d903e3e0fa 100644
|
||||||
|
--- a/tests/qtest/migration-test.c
|
||||||
|
+++ b/tests/qtest/migration-test.c
|
||||||
|
@@ -74,6 +74,17 @@ static QTestMigrationState dst_state;
|
||||||
|
#define QEMU_ENV_SRC "QTEST_QEMU_BINARY_SRC"
|
||||||
|
#define QEMU_ENV_DST "QTEST_QEMU_BINARY_DST"
|
||||||
|
|
||||||
|
+typedef enum PostcopyRecoveryFailStage {
|
||||||
|
+ /*
|
||||||
|
+ * "no failure" must be 0 as it's the default. OTOH, real failure
|
||||||
|
+ * cases must be >0 to make sure they trigger by a "if" test.
|
||||||
|
+ */
|
||||||
|
+ POSTCOPY_FAIL_NONE = 0,
|
||||||
|
+ POSTCOPY_FAIL_CHANNEL_ESTABLISH,
|
||||||
|
+ POSTCOPY_FAIL_RECOVERY,
|
||||||
|
+ POSTCOPY_FAIL_MAX
|
||||||
|
+} PostcopyRecoveryFailStage;
|
||||||
|
+
|
||||||
|
#if defined(__linux__)
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
#include <sys/vfs.h>
|
||||||
|
@@ -753,7 +764,7 @@ typedef struct {
|
||||||
|
/* Postcopy specific fields */
|
||||||
|
void *postcopy_data;
|
||||||
|
bool postcopy_preempt;
|
||||||
|
- bool postcopy_recovery_test_fail;
|
||||||
|
+ PostcopyRecoveryFailStage postcopy_recovery_fail_stage;
|
||||||
|
} MigrateCommon;
|
||||||
|
|
||||||
|
static int test_migrate_start(QTestState **from, QTestState **to,
|
||||||
|
@@ -1467,12 +1478,16 @@ static void wait_for_postcopy_status(QTestState *one, const char *status)
|
||||||
|
"completed", NULL });
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void postcopy_recover_fail(QTestState *from, QTestState *to)
|
||||||
|
+static void postcopy_recover_fail(QTestState *from, QTestState *to,
|
||||||
|
+ PostcopyRecoveryFailStage stage)
|
||||||
|
{
|
||||||
|
#ifndef _WIN32
|
||||||
|
+ bool fail_early = (stage == POSTCOPY_FAIL_CHANNEL_ESTABLISH);
|
||||||
|
int ret, pair1[2], pair2[2];
|
||||||
|
char c;
|
||||||
|
|
||||||
|
+ g_assert(stage > POSTCOPY_FAIL_NONE && stage < POSTCOPY_FAIL_MAX);
|
||||||
|
+
|
||||||
|
/* Create two unrelated socketpairs */
|
||||||
|
ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair1);
|
||||||
|
g_assert_cmpint(ret, ==, 0);
|
||||||
|
@@ -1506,6 +1521,14 @@ static void postcopy_recover_fail(QTestState *from, QTestState *to)
|
||||||
|
ret = send(pair2[1], &c, 1, 0);
|
||||||
|
g_assert_cmpint(ret, ==, 1);
|
||||||
|
|
||||||
|
+ if (stage == POSTCOPY_FAIL_CHANNEL_ESTABLISH) {
|
||||||
|
+ /*
|
||||||
|
+ * This will make src QEMU to fail at an early stage when trying to
|
||||||
|
+ * resume later, where it shouldn't reach RECOVER stage at all.
|
||||||
|
+ */
|
||||||
|
+ close(pair1[1]);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
migrate_recover(to, "fd:fd-mig");
|
||||||
|
migrate_qmp(from, "fd:fd-mig", "{'resume': true}");
|
||||||
|
|
||||||
|
@@ -1515,28 +1538,53 @@ static void postcopy_recover_fail(QTestState *from, QTestState *to)
|
||||||
|
*/
|
||||||
|
migration_event_wait(from, "postcopy-recover-setup");
|
||||||
|
|
||||||
|
+ if (fail_early) {
|
||||||
|
+ /*
|
||||||
|
+ * When fails at reconnection, src QEMU will automatically goes
|
||||||
|
+ * back to PAUSED state. Making sure there is an event in this
|
||||||
|
+ * case: Libvirt relies on this to detect early reconnection
|
||||||
|
+ * errors.
|
||||||
|
+ */
|
||||||
|
+ migration_event_wait(from, "postcopy-paused");
|
||||||
|
+ } else {
|
||||||
|
+ /*
|
||||||
|
+ * We want to test "fail later" at RECOVER stage here. Make sure
|
||||||
|
+ * both QEMU instances will go into RECOVER stage first, then test
|
||||||
|
+ * kicking them out using migrate-pause.
|
||||||
|
+ *
|
||||||
|
+ * Explicitly check the RECOVER event on src, that's what Libvirt
|
||||||
|
+ * relies on, rather than polling.
|
||||||
|
+ */
|
||||||
|
+ migration_event_wait(from, "postcopy-recover");
|
||||||
|
+ wait_for_postcopy_status(from, "postcopy-recover");
|
||||||
|
+
|
||||||
|
+ /* Need an explicit kick on src QEMU in this case */
|
||||||
|
+ migrate_pause(from);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
- * Make sure both QEMU instances will go into RECOVER stage, then test
|
||||||
|
- * kicking them out using migrate-pause.
|
||||||
|
+ * For all failure cases, we'll reach such states on both sides now.
|
||||||
|
+ * Check them.
|
||||||
|
*/
|
||||||
|
- wait_for_postcopy_status(from, "postcopy-recover");
|
||||||
|
+ wait_for_postcopy_status(from, "postcopy-paused");
|
||||||
|
wait_for_postcopy_status(to, "postcopy-recover");
|
||||||
|
|
||||||
|
/*
|
||||||
|
- * This would be issued by the admin upon noticing the hang, we should
|
||||||
|
- * make sure we're able to kick this out.
|
||||||
|
+ * Kick dest QEMU out too. This is normally not needed in reality
|
||||||
|
+ * because when the channel is shutdown it should also happen on src.
|
||||||
|
+ * However here we used separate socket pairs so we need to do that
|
||||||
|
+ * explicitly.
|
||||||
|
*/
|
||||||
|
- migrate_pause(from);
|
||||||
|
- wait_for_postcopy_status(from, "postcopy-paused");
|
||||||
|
-
|
||||||
|
- /* Do the same test on dest */
|
||||||
|
migrate_pause(to);
|
||||||
|
wait_for_postcopy_status(to, "postcopy-paused");
|
||||||
|
|
||||||
|
close(pair1[0]);
|
||||||
|
- close(pair1[1]);
|
||||||
|
close(pair2[0]);
|
||||||
|
close(pair2[1]);
|
||||||
|
+
|
||||||
|
+ if (stage != POSTCOPY_FAIL_CHANNEL_ESTABLISH) {
|
||||||
|
+ close(pair1[1]);
|
||||||
|
+ }
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1578,12 +1626,12 @@ static void test_postcopy_recovery_common(MigrateCommon *args)
|
||||||
|
wait_for_postcopy_status(to, "postcopy-paused");
|
||||||
|
wait_for_postcopy_status(from, "postcopy-paused");
|
||||||
|
|
||||||
|
- if (args->postcopy_recovery_test_fail) {
|
||||||
|
+ if (args->postcopy_recovery_fail_stage) {
|
||||||
|
/*
|
||||||
|
* Test when a wrong socket specified for recover, and then the
|
||||||
|
* ability to kick it out, and continue with a correct socket.
|
||||||
|
*/
|
||||||
|
- postcopy_recover_fail(from, to);
|
||||||
|
+ postcopy_recover_fail(from, to, args->postcopy_recovery_fail_stage);
|
||||||
|
/* continue with a good recovery */
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1623,10 +1671,19 @@ static void test_postcopy_recovery_compress(void)
|
||||||
|
test_postcopy_recovery_common(&args);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void test_postcopy_recovery_double_fail(void)
|
||||||
|
+static void test_postcopy_recovery_fail_handshake(void)
|
||||||
|
+{
|
||||||
|
+ MigrateCommon args = {
|
||||||
|
+ .postcopy_recovery_fail_stage = POSTCOPY_FAIL_RECOVERY,
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
+ test_postcopy_recovery_common(&args);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void test_postcopy_recovery_fail_reconnect(void)
|
||||||
|
{
|
||||||
|
MigrateCommon args = {
|
||||||
|
- .postcopy_recovery_test_fail = true,
|
||||||
|
+ .postcopy_recovery_fail_stage = POSTCOPY_FAIL_CHANNEL_ESTABLISH,
|
||||||
|
};
|
||||||
|
|
||||||
|
test_postcopy_recovery_common(&args);
|
||||||
|
@@ -3604,8 +3661,10 @@ int main(int argc, char **argv)
|
||||||
|
migration_test_add("/migration/postcopy/recovery/compress/plain",
|
||||||
|
test_postcopy_recovery_compress);
|
||||||
|
}
|
||||||
|
- migration_test_add("/migration/postcopy/recovery/double-failures",
|
||||||
|
- test_postcopy_recovery_double_fail);
|
||||||
|
+ migration_test_add("/migration/postcopy/recovery/double-failures/handshake",
|
||||||
|
+ test_postcopy_recovery_fail_handshake);
|
||||||
|
+ migration_test_add("/migration/postcopy/recovery/double-failures/reconnect",
|
||||||
|
+ test_postcopy_recovery_fail_reconnect);
|
||||||
|
if (is_x86) {
|
||||||
|
migration_test_add("/migration/postcopy/suspend",
|
||||||
|
test_postcopy_suspend);
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,102 @@
|
|||||||
|
From 13f85a7187ed25b41c6064b94fdcc798e3bf61a0 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Peter Xu <peterx@redhat.com>
|
||||||
|
Date: Wed, 19 Jun 2024 18:30:42 -0400
|
||||||
|
Subject: [PATCH 07/11] tests/migration-tests: Drop most WIN32 ifdefs for
|
||||||
|
postcopy failure tests
|
||||||
|
|
||||||
|
RH-Author: Juraj Marcin <None>
|
||||||
|
RH-MergeRequest: 419: migration: New postcopy state, and some cleanups [rhel-9.5.z]
|
||||||
|
RH-Jira: RHEL-63874
|
||||||
|
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Commit: [7/11] 86500403d1f4baef170d3bc6f6a9cd34862c9076
|
||||||
|
|
||||||
|
Most of them are not needed, we can stick with one ifdef inside
|
||||||
|
postcopy_recover_fail() so as to cover the scm right tricks only.
|
||||||
|
The tests won't run on windows anyway due to has_uffd always false.
|
||||||
|
|
||||||
|
Reviewed-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
Signed-off-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
|
||||||
|
(cherry picked from commit 0fd397359540a6622c5f2164e76fc2cefd811f2a)
|
||||||
|
|
||||||
|
JIRA: https://issues.redhat.com/browse/RHEL-63874
|
||||||
|
Y-JIRA: https://issues.redhat.com/browse/RHEL-38485
|
||||||
|
|
||||||
|
Signed-off-by: Juraj Marcin <jmarcin@redhat.com>
|
||||||
|
---
|
||||||
|
tests/qtest/migration-test.c | 10 ++--------
|
||||||
|
1 file changed, 2 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
|
||||||
|
index 1d2cee87ea..0808300f5b 100644
|
||||||
|
--- a/tests/qtest/migration-test.c
|
||||||
|
+++ b/tests/qtest/migration-test.c
|
||||||
|
@@ -1460,9 +1460,9 @@ static void wait_for_postcopy_status(QTestState *one, const char *status)
|
||||||
|
"completed", NULL });
|
||||||
|
}
|
||||||
|
|
||||||
|
-#ifndef _WIN32
|
||||||
|
static void postcopy_recover_fail(QTestState *from, QTestState *to)
|
||||||
|
{
|
||||||
|
+#ifndef _WIN32
|
||||||
|
int ret, pair1[2], pair2[2];
|
||||||
|
char c;
|
||||||
|
|
||||||
|
@@ -1524,8 +1524,8 @@ static void postcopy_recover_fail(QTestState *from, QTestState *to)
|
||||||
|
close(pair1[1]);
|
||||||
|
close(pair2[0]);
|
||||||
|
close(pair2[1]);
|
||||||
|
+#endif
|
||||||
|
}
|
||||||
|
-#endif /* _WIN32 */
|
||||||
|
|
||||||
|
static void test_postcopy_recovery_common(MigrateCommon *args)
|
||||||
|
{
|
||||||
|
@@ -1565,7 +1565,6 @@ static void test_postcopy_recovery_common(MigrateCommon *args)
|
||||||
|
wait_for_postcopy_status(to, "postcopy-paused");
|
||||||
|
wait_for_postcopy_status(from, "postcopy-paused");
|
||||||
|
|
||||||
|
-#ifndef _WIN32
|
||||||
|
if (args->postcopy_recovery_test_fail) {
|
||||||
|
/*
|
||||||
|
* Test when a wrong socket specified for recover, and then the
|
||||||
|
@@ -1574,7 +1573,6 @@ static void test_postcopy_recovery_common(MigrateCommon *args)
|
||||||
|
postcopy_recover_fail(from, to);
|
||||||
|
/* continue with a good recovery */
|
||||||
|
}
|
||||||
|
-#endif /* _WIN32 */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create a new socket to emulate a new channel that is different
|
||||||
|
@@ -1612,7 +1610,6 @@ static void test_postcopy_recovery_compress(void)
|
||||||
|
test_postcopy_recovery_common(&args);
|
||||||
|
}
|
||||||
|
|
||||||
|
-#ifndef _WIN32
|
||||||
|
static void test_postcopy_recovery_double_fail(void)
|
||||||
|
{
|
||||||
|
MigrateCommon args = {
|
||||||
|
@@ -1621,7 +1618,6 @@ static void test_postcopy_recovery_double_fail(void)
|
||||||
|
|
||||||
|
test_postcopy_recovery_common(&args);
|
||||||
|
}
|
||||||
|
-#endif /* _WIN32 */
|
||||||
|
|
||||||
|
#ifdef CONFIG_GNUTLS
|
||||||
|
static void test_postcopy_recovery_tls_psk(void)
|
||||||
|
@@ -3595,10 +3591,8 @@ int main(int argc, char **argv)
|
||||||
|
migration_test_add("/migration/postcopy/recovery/compress/plain",
|
||||||
|
test_postcopy_recovery_compress);
|
||||||
|
}
|
||||||
|
-#ifndef _WIN32
|
||||||
|
migration_test_add("/migration/postcopy/recovery/double-failures",
|
||||||
|
test_postcopy_recovery_double_fail);
|
||||||
|
-#endif /* _WIN32 */
|
||||||
|
if (is_x86) {
|
||||||
|
migration_test_add("/migration/postcopy/suspend",
|
||||||
|
test_postcopy_suspend);
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,50 @@
|
|||||||
|
From f779d9def0b1f3446054842373b994c3f60cec41 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Peter Xu <peterx@redhat.com>
|
||||||
|
Date: Wed, 19 Jun 2024 18:30:45 -0400
|
||||||
|
Subject: [PATCH 10/11] tests/migration-tests: Verify postcopy-recover-setup
|
||||||
|
status
|
||||||
|
|
||||||
|
RH-Author: Juraj Marcin <None>
|
||||||
|
RH-MergeRequest: 419: migration: New postcopy state, and some cleanups [rhel-9.5.z]
|
||||||
|
RH-Jira: RHEL-63874
|
||||||
|
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Commit: [10/11] 67ebd3ec3714510483101c84253d0b71ddb5632a
|
||||||
|
|
||||||
|
Making sure the postcopy-recover-setup status is present in the postcopy
|
||||||
|
failure unit test. Note that it only applies to src QEMU not dest.
|
||||||
|
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
Reviewed-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
Signed-off-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
|
||||||
|
(cherry picked from commit 8dbd24d3aa6d67b2d3576da016fb631fd1edfc2c)
|
||||||
|
|
||||||
|
JIRA: https://issues.redhat.com/browse/RHEL-63874
|
||||||
|
Y-JIRA: https://issues.redhat.com/browse/RHEL-38485
|
||||||
|
|
||||||
|
Signed-off-by: Juraj Marcin <jmarcin@redhat.com>
|
||||||
|
---
|
||||||
|
tests/qtest/migration-test.c | 6 ++++++
|
||||||
|
1 file changed, 6 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
|
||||||
|
index 9f29f4e4f3..afe8270dd0 100644
|
||||||
|
--- a/tests/qtest/migration-test.c
|
||||||
|
+++ b/tests/qtest/migration-test.c
|
||||||
|
@@ -1509,6 +1509,12 @@ static void postcopy_recover_fail(QTestState *from, QTestState *to)
|
||||||
|
migrate_recover(to, "fd:fd-mig");
|
||||||
|
migrate_qmp(from, "fd:fd-mig", "{'resume': true}");
|
||||||
|
|
||||||
|
+ /*
|
||||||
|
+ * Source QEMU has an extra RECOVER_SETUP phase, dest doesn't have it.
|
||||||
|
+ * Make sure it appears along the way.
|
||||||
|
+ */
|
||||||
|
+ migration_event_wait(from, "postcopy-recover-setup");
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Make sure both QEMU instances will go into RECOVER stage, then test
|
||||||
|
* kicking them out using migrate-pause.
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,98 @@
|
|||||||
|
From 0f824a811ff30b2d8bd78eb97ee835598c6be65f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Peter Xu <peterx@redhat.com>
|
||||||
|
Date: Wed, 19 Jun 2024 18:30:44 -0400
|
||||||
|
Subject: [PATCH 09/11] tests/migration-tests: migration_event_wait()
|
||||||
|
|
||||||
|
RH-Author: Juraj Marcin <None>
|
||||||
|
RH-MergeRequest: 419: migration: New postcopy state, and some cleanups [rhel-9.5.z]
|
||||||
|
RH-Jira: RHEL-63874
|
||||||
|
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Commit: [9/11] 417c600dfc6acbc125a82c1b56c9637041555c15
|
||||||
|
|
||||||
|
Introduce a small helper to wait for a migration event, generalized from
|
||||||
|
the incoming migration path. Make the helper easier to use by allowing it
|
||||||
|
to keep waiting until the expected event is received.
|
||||||
|
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
Reviewed-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
Signed-off-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
|
||||||
|
(cherry picked from commit d444e5673c223241bd2edbc207b02cc1b2114b71)
|
||||||
|
|
||||||
|
JIRA: https://issues.redhat.com/browse/RHEL-63874
|
||||||
|
Y-JIRA: https://issues.redhat.com/browse/RHEL-38485
|
||||||
|
|
||||||
|
Signed-off-by: Juraj Marcin <jmarcin@redhat.com>
|
||||||
|
---
|
||||||
|
tests/qtest/migration-helpers.c | 31 ++++++++++++++++++++++---------
|
||||||
|
tests/qtest/migration-helpers.h | 2 ++
|
||||||
|
2 files changed, 24 insertions(+), 9 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
|
||||||
|
index 50a6bc2569..31d83ab970 100644
|
||||||
|
--- a/tests/qtest/migration-helpers.c
|
||||||
|
+++ b/tests/qtest/migration-helpers.c
|
||||||
|
@@ -98,7 +98,7 @@ void migrate_set_capability(QTestState *who, const char *capability,
|
||||||
|
void migrate_incoming_qmp(QTestState *to, const char *uri, const char *fmt, ...)
|
||||||
|
{
|
||||||
|
va_list ap;
|
||||||
|
- QDict *args, *rsp, *data;
|
||||||
|
+ QDict *args, *rsp;
|
||||||
|
|
||||||
|
va_start(ap, fmt);
|
||||||
|
args = qdict_from_vjsonf_nofail(fmt, ap);
|
||||||
|
@@ -121,14 +121,7 @@ void migrate_incoming_qmp(QTestState *to, const char *uri, const char *fmt, ...)
|
||||||
|
g_assert(qdict_haskey(rsp, "return"));
|
||||||
|
qobject_unref(rsp);
|
||||||
|
|
||||||
|
- rsp = qtest_qmp_eventwait_ref(to, "MIGRATION");
|
||||||
|
- g_assert(qdict_haskey(rsp, "data"));
|
||||||
|
-
|
||||||
|
- data = qdict_get_qdict(rsp, "data");
|
||||||
|
- g_assert(qdict_haskey(data, "status"));
|
||||||
|
- g_assert_cmpstr(qdict_get_str(data, "status"), ==, "setup");
|
||||||
|
-
|
||||||
|
- qobject_unref(rsp);
|
||||||
|
+ migration_event_wait(to, "setup");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
@@ -324,3 +317,23 @@ void migration_test_add(const char *path, void (*fn)(void))
|
||||||
|
qtest_add_data_func_full(path, test, migration_test_wrapper,
|
||||||
|
migration_test_destroy);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * Wait for a "MIGRATION" event. This is what Libvirt uses to track
|
||||||
|
+ * migration status changes.
|
||||||
|
+ */
|
||||||
|
+void migration_event_wait(QTestState *s, const char *target)
|
||||||
|
+{
|
||||||
|
+ QDict *response, *data;
|
||||||
|
+ const char *status;
|
||||||
|
+ bool found;
|
||||||
|
+
|
||||||
|
+ do {
|
||||||
|
+ response = qtest_qmp_eventwait_ref(s, "MIGRATION");
|
||||||
|
+ data = qdict_get_qdict(response, "data");
|
||||||
|
+ g_assert(data);
|
||||||
|
+ status = qdict_get_str(data, "status");
|
||||||
|
+ found = (strcmp(status, target) == 0);
|
||||||
|
+ qobject_unref(response);
|
||||||
|
+ } while (!found);
|
||||||
|
+}
|
||||||
|
diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
|
||||||
|
index 3bf7ded1b9..83f277c054 100644
|
||||||
|
--- a/tests/qtest/migration-helpers.h
|
||||||
|
+++ b/tests/qtest/migration-helpers.h
|
||||||
|
@@ -53,4 +53,6 @@ char *find_common_machine_version(const char *mtype, const char *var1,
|
||||||
|
char *resolve_machine_version(const char *alias, const char *var1,
|
||||||
|
const char *var2);
|
||||||
|
void migration_test_add(const char *path, void (*fn)(void));
|
||||||
|
+void migration_event_wait(QTestState *s, const char *target);
|
||||||
|
+
|
||||||
|
#endif /* MIGRATION_HELPERS_H */
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,62 @@
|
|||||||
|
From 2052d94ffccde5d6eb5af8cca77aaf8bba650c68 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Prasad Pandit <pjp@fedoraproject.org>
|
||||||
|
Date: Thu, 7 Nov 2024 17:02:47 +0530
|
||||||
|
Subject: [PATCH] vhost: fail device start if iotlb update fails
|
||||||
|
|
||||||
|
RH-Author: Prasad Pandit <None>
|
||||||
|
RH-MergeRequest: 426: vhost: fail device start if iotlb update fails
|
||||||
|
RH-Jira: RHEL-73006
|
||||||
|
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||||
|
RH-Commit: [1/1] a96478385297d0559dd7dcaebd1834141bb5fb75
|
||||||
|
|
||||||
|
While starting a vhost device, updating iotlb entries
|
||||||
|
via 'vhost_device_iotlb_miss' may return an error.
|
||||||
|
|
||||||
|
qemu-kvm: vhost_device_iotlb_miss:
|
||||||
|
700871,700871: Fail to update device iotlb
|
||||||
|
|
||||||
|
Fail device start when such an error occurs.
|
||||||
|
|
||||||
|
Jira: https://issues.redhat.com/browse/RHEL-73006
|
||||||
|
Signed-off-by: Prasad Pandit <pjp@fedoraproject.org>
|
||||||
|
Message-Id: <20241107113247.46532-1-ppandit@redhat.com>
|
||||||
|
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||||
|
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||||
|
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||||
|
(cherry picked from commit 571bdc97b83646dfd3746ec56fb2f70bca55b9a2)
|
||||||
|
Signed-off-by: Prasad Pandit <pjp@fedoraproject.org>
|
||||||
|
---
|
||||||
|
hw/virtio/vhost.c | 13 ++++++++++++-
|
||||||
|
1 file changed, 12 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
|
||||||
|
index f50180e60e..da0f10c4dc 100644
|
||||||
|
--- a/hw/virtio/vhost.c
|
||||||
|
+++ b/hw/virtio/vhost.c
|
||||||
|
@@ -2074,11 +2074,22 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
|
||||||
|
* vhost-kernel code requires for this.*/
|
||||||
|
for (i = 0; i < hdev->nvqs; ++i) {
|
||||||
|
struct vhost_virtqueue *vq = hdev->vqs + i;
|
||||||
|
- vhost_device_iotlb_miss(hdev, vq->used_phys, true);
|
||||||
|
+ r = vhost_device_iotlb_miss(hdev, vq->used_phys, true);
|
||||||
|
+ if (r) {
|
||||||
|
+ goto fail_iotlb;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vhost_start_config_intr(hdev);
|
||||||
|
return 0;
|
||||||
|
+fail_iotlb:
|
||||||
|
+ if (vhost_dev_has_iommu(hdev) &&
|
||||||
|
+ hdev->vhost_ops->vhost_set_iotlb_callback) {
|
||||||
|
+ hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false);
|
||||||
|
+ }
|
||||||
|
+ if (hdev->vhost_ops->vhost_dev_start) {
|
||||||
|
+ hdev->vhost_ops->vhost_dev_start(hdev, false);
|
||||||
|
+ }
|
||||||
|
fail_start:
|
||||||
|
if (vrings) {
|
||||||
|
vhost_dev_set_vring_enable(hdev, false);
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
Loading…
Reference in new issue