forked from rpms/qemu-kvm
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
193 lines
7.6 KiB
193 lines
7.6 KiB
From 169dc1bb051b3aebc571936d956b49ba0621ae43 Mon Sep 17 00:00:00 2001
|
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
|
Date: Wed, 12 Jul 2023 17:46:57 +0200
|
|
Subject: [PATCH 16/37] vfio/migration: Add support for switchover ack
|
|
capability
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
RH-Author: Cédric Le Goater <clg@redhat.com>
|
|
RH-MergeRequest: 179: vfio: live migration support
|
|
RH-Bugzilla: 2192818
|
|
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
RH-Commit: [14/28] b3bd2eb2d0ca49ff05a0a82ae5bb956a354aed47 (clegoate/qemu-kvm-c9s)
|
|
|
|
Bugzilla: https://bugzilla.redhat.com/2192818
|
|
|
|
commit 745c42912a04
|
|
Author: Avihai Horon <avihaih@nvidia.com>
|
|
Date: Wed Jun 21 14:12:01 2023 +0300
|
|
|
|
vfio/migration: Add support for switchover ack capability
|
|
|
|
Loading of a VFIO device's data can take a substantial amount of time as
|
|
the device may need to allocate resources, prepare internal data
|
|
structures, etc. This can increase migration downtime, especially for
|
|
VFIO devices with a lot of resources.
|
|
|
|
To solve this, VFIO migration uAPI defines "initial bytes" as part of
|
|
its precopy data stream. Initial bytes can be used in various ways to
|
|
improve VFIO migration performance. For example, it can be used to
|
|
transfer device metadata to pre-allocate resources in the destination.
|
|
However, for this to work we need to make sure that all initial bytes
|
|
are sent and loaded in the destination before the source VM is stopped.
|
|
|
|
Use migration switchover ack capability to make sure a VFIO device's
|
|
initial bytes are sent and loaded in the destination before the source
|
|
stops the VM and attempts to complete the migration.
|
|
This can significantly reduce migration downtime for some devices.
|
|
|
|
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
|
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
|
Tested-by: YangHang Liu <yanghliu@redhat.com>
|
|
Acked-by: Alex Williamson <alex.williamson@redhat.com>
|
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
|
|
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
|
---
|
|
docs/devel/vfio-migration.rst | 10 +++++++++
|
|
hw/vfio/migration.c | 39 ++++++++++++++++++++++++++++++++++-
|
|
include/hw/vfio/vfio-common.h | 1 +
|
|
3 files changed, 49 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst
|
|
index e896b2a673..b433cb5bb2 100644
|
|
--- a/docs/devel/vfio-migration.rst
|
|
+++ b/docs/devel/vfio-migration.rst
|
|
@@ -16,6 +16,13 @@ helps to reduce the total downtime of the VM. VFIO devices opt-in to pre-copy
|
|
support by reporting the VFIO_MIGRATION_PRE_COPY flag in the
|
|
VFIO_DEVICE_FEATURE_MIGRATION ioctl.
|
|
|
|
+When pre-copy is supported, it's possible to further reduce downtime by
|
|
+enabling "switchover-ack" migration capability.
|
|
+VFIO migration uAPI defines "initial bytes" as part of its pre-copy data stream
|
|
+and recommends that the initial bytes are sent and loaded in the destination
|
|
+before stopping the source VM. Enabling this migration capability will
|
|
+guarantee that and thus, can potentially reduce downtime even further.
|
|
+
|
|
Note that currently VFIO migration is supported only for a single device. This
|
|
is due to VFIO migration's lack of P2P support. However, P2P support is planned
|
|
to be added later on.
|
|
@@ -45,6 +52,9 @@ VFIO implements the device hooks for the iterative approach as follows:
|
|
* A ``save_live_iterate`` function that reads the VFIO device's data from the
|
|
vendor driver during iterative pre-copy phase.
|
|
|
|
+* A ``switchover_ack_needed`` function that checks if the VFIO device uses
|
|
+ "switchover-ack" migration capability when this capability is enabled.
|
|
+
|
|
* A ``save_state`` function to save the device config space if it is present.
|
|
|
|
* A ``save_live_complete_precopy`` function that sets the VFIO device in
|
|
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
|
|
index d8f6a22ae1..acbf0bb7ab 100644
|
|
--- a/hw/vfio/migration.c
|
|
+++ b/hw/vfio/migration.c
|
|
@@ -18,6 +18,8 @@
|
|
#include "sysemu/runstate.h"
|
|
#include "hw/vfio/vfio-common.h"
|
|
#include "migration/migration.h"
|
|
+#include "migration/options.h"
|
|
+#include "migration/savevm.h"
|
|
#include "migration/vmstate.h"
|
|
#include "migration/qemu-file.h"
|
|
#include "migration/register.h"
|
|
@@ -45,6 +47,7 @@
|
|
#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL)
|
|
#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL)
|
|
#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL)
|
|
+#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL)
|
|
|
|
/*
|
|
* This is an arbitrary size based on migration of mlx5 devices, where typically
|
|
@@ -384,6 +387,7 @@ static void vfio_save_cleanup(void *opaque)
|
|
migration->data_buffer = NULL;
|
|
migration->precopy_init_size = 0;
|
|
migration->precopy_dirty_size = 0;
|
|
+ migration->initial_data_sent = false;
|
|
vfio_migration_cleanup(vbasedev);
|
|
trace_vfio_save_cleanup(vbasedev->name);
|
|
}
|
|
@@ -457,10 +461,17 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
|
|
if (data_size < 0) {
|
|
return data_size;
|
|
}
|
|
- qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
|
|
|
|
vfio_update_estimated_pending_data(migration, data_size);
|
|
|
|
+ if (migrate_switchover_ack() && !migration->precopy_init_size &&
|
|
+ !migration->initial_data_sent) {
|
|
+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_INIT_DATA_SENT);
|
|
+ migration->initial_data_sent = true;
|
|
+ } else {
|
|
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
|
|
+ }
|
|
+
|
|
trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size,
|
|
migration->precopy_dirty_size);
|
|
|
|
@@ -579,6 +590,24 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
|
|
}
|
|
break;
|
|
}
|
|
+ case VFIO_MIG_FLAG_DEV_INIT_DATA_SENT:
|
|
+ {
|
|
+ if (!vfio_precopy_supported(vbasedev) ||
|
|
+ !migrate_switchover_ack()) {
|
|
+ error_report("%s: Received INIT_DATA_SENT but switchover ack "
|
|
+ "is not used", vbasedev->name);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ ret = qemu_loadvm_approve_switchover();
|
|
+ if (ret) {
|
|
+ error_report(
|
|
+ "%s: qemu_loadvm_approve_switchover failed, err=%d (%s)",
|
|
+ vbasedev->name, ret, strerror(-ret));
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+ }
|
|
default:
|
|
error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
|
|
return -EINVAL;
|
|
@@ -593,6 +622,13 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
|
|
return ret;
|
|
}
|
|
|
|
+static bool vfio_switchover_ack_needed(void *opaque)
|
|
+{
|
|
+ VFIODevice *vbasedev = opaque;
|
|
+
|
|
+ return vfio_precopy_supported(vbasedev);
|
|
+}
|
|
+
|
|
static const SaveVMHandlers savevm_vfio_handlers = {
|
|
.save_setup = vfio_save_setup,
|
|
.save_cleanup = vfio_save_cleanup,
|
|
@@ -605,6 +641,7 @@ static const SaveVMHandlers savevm_vfio_handlers = {
|
|
.load_setup = vfio_load_setup,
|
|
.load_cleanup = vfio_load_cleanup,
|
|
.load_state = vfio_load_state,
|
|
+ .switchover_ack_needed = vfio_switchover_ack_needed,
|
|
};
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
|
index 1db901c194..3dc5f2104c 100644
|
|
--- a/include/hw/vfio/vfio-common.h
|
|
+++ b/include/hw/vfio/vfio-common.h
|
|
@@ -69,6 +69,7 @@ typedef struct VFIOMigration {
|
|
uint64_t mig_flags;
|
|
uint64_t precopy_init_size;
|
|
uint64_t precopy_dirty_size;
|
|
+ bool initial_data_sent;
|
|
} VFIOMigration;
|
|
|
|
typedef struct VFIOAddressSpace {
|
|
--
|
|
2.39.3
|
|
|