parent
13ad956357
commit
fe9b17a593
@ -0,0 +1,50 @@
|
|||||||
|
From 111d70a5bdc3ee0dde0a6def9e0c75ed20b4f093 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Peter Xu <peterx@redhat.com>
|
||||||
|
Date: Tue, 17 Sep 2024 12:38:33 -0400
|
||||||
|
Subject: [PATCH 6/9] KVM: Define KVM_MEMSLOTS_NUM_MAX_DEFAULT
|
||||||
|
|
||||||
|
RH-Author: Peter Xu <peterx@redhat.com>
|
||||||
|
RH-MergeRequest: 285: KVM: Dynamic sized kvm memslots array
|
||||||
|
RH-Jira: RHEL-57685
|
||||||
|
RH-Acked-by: Juraj Marcin <None>
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Commit: [5/7] e4c2a2c2f3a809c8efb709521c7a94ba0627c69b (peterx/qemu-kvm)
|
||||||
|
|
||||||
|
Make the default max nr_slots a macro, it's only used when KVM reports
|
||||||
|
nothing.
|
||||||
|
|
||||||
|
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
Link: https://lore.kernel.org/r/20240917163835.194664-3-peterx@redhat.com
|
||||||
|
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||||
|
(cherry picked from commit b34a908c8f24eedb0a8e5ff486b059b58fd793f4)
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
---
|
||||||
|
accel/kvm/kvm-all.c | 4 +++-
|
||||||
|
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||||
|
index 38393bc86b..87db0f9494 100644
|
||||||
|
--- a/accel/kvm/kvm-all.c
|
||||||
|
+++ b/accel/kvm/kvm-all.c
|
||||||
|
@@ -71,6 +71,8 @@
|
||||||
|
|
||||||
|
/* Default num of memslots to be allocated when VM starts */
|
||||||
|
#define KVM_MEMSLOTS_NR_ALLOC_DEFAULT 16
|
||||||
|
+/* Default max allowed memslots if kernel reported nothing */
|
||||||
|
+#define KVM_MEMSLOTS_NR_MAX_DEFAULT 32
|
||||||
|
|
||||||
|
struct KVMParkedVcpu {
|
||||||
|
unsigned long vcpu_id;
|
||||||
|
@@ -2617,7 +2619,7 @@ static int kvm_init(MachineState *ms)
|
||||||
|
|
||||||
|
/* If unspecified, use the default value */
|
||||||
|
if (!s->nr_slots) {
|
||||||
|
- s->nr_slots = 32;
|
||||||
|
+ s->nr_slots_max = KVM_MEMSLOTS_NR_MAX_DEFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
s->nr_as = kvm_check_extension(s, KVM_CAP_MULTI_ADDRESS_SPACE);
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,251 @@
|
|||||||
|
From c77a30265b8d0db43174b040ea82103f8fdb9911 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Peter Xu <peterx@redhat.com>
|
||||||
|
Date: Tue, 17 Sep 2024 12:38:32 -0400
|
||||||
|
Subject: [PATCH 5/9] KVM: Dynamic sized kvm memslots array
|
||||||
|
|
||||||
|
RH-Author: Peter Xu <peterx@redhat.com>
|
||||||
|
RH-MergeRequest: 285: KVM: Dynamic sized kvm memslots array
|
||||||
|
RH-Jira: RHEL-57685
|
||||||
|
RH-Acked-by: Juraj Marcin <None>
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Commit: [4/7] 46d4abec352a92112e593ea61b7cbf5ce5f94cdc (peterx/qemu-kvm)
|
||||||
|
|
||||||
|
Zhiyi reported an infinite loop issue in VFIO use case. The cause of that
|
||||||
|
was a separate discussion, however during that I found a regression of
|
||||||
|
dirty sync slowness when profiling.
|
||||||
|
|
||||||
|
Each KVMMemoryListerner maintains an array of kvm memslots. Currently it's
|
||||||
|
statically allocated to be the max supported by the kernel. However after
|
||||||
|
Linux commit 4fc096a99e ("KVM: Raise the maximum number of user memslots"),
|
||||||
|
the max supported memslots reported now grows to some number large enough
|
||||||
|
so that it may not be wise to always statically allocate with the max
|
||||||
|
reported.
|
||||||
|
|
||||||
|
What's worse, QEMU kvm code still walks all the allocated memslots entries
|
||||||
|
to do any form of lookups. It can drastically slow down all memslot
|
||||||
|
operations because each of such loop can run over 32K times on the new
|
||||||
|
kernels.
|
||||||
|
|
||||||
|
Fix this issue by making the memslots to be allocated dynamically.
|
||||||
|
|
||||||
|
Here the initial size was set to 16 because it should cover the basic VM
|
||||||
|
usages, so that the hope is the majority VM use case may not even need to
|
||||||
|
grow at all (e.g. if one starts a VM with ./qemu-system-x86_64 by default
|
||||||
|
it'll consume 9 memslots), however not too large to waste memory.
|
||||||
|
|
||||||
|
There can also be even better way to address this, but so far this is the
|
||||||
|
simplest and should be already better even than before we grow the max
|
||||||
|
supported memslots. For example, in the case of above issue when VFIO was
|
||||||
|
attached on a 32GB system, there are only ~10 memslots used. So it could
|
||||||
|
be good enough as of now.
|
||||||
|
|
||||||
|
In the above VFIO context, measurement shows that the precopy dirty sync
|
||||||
|
shrinked from ~86ms to ~3ms after this patch applied. It should also apply
|
||||||
|
to any KVM enabled VM even without VFIO.
|
||||||
|
|
||||||
|
NOTE: we don't have a FIXES tag for this patch because there's no real
|
||||||
|
commit that regressed this in QEMU. Such behavior existed for a long time,
|
||||||
|
but only start to be a problem when the kernel reports very large
|
||||||
|
nr_slots_max value. However that's pretty common now (the kernel change
|
||||||
|
was merged in 2021) so we attached cc:stable because we'll want this change
|
||||||
|
to be backported to stable branches.
|
||||||
|
|
||||||
|
Cc: qemu-stable <qemu-stable@nongnu.org>
|
||||||
|
Reported-by: Zhiyi Guo <zhguo@redhat.com>
|
||||||
|
Tested-by: Zhiyi Guo <zhguo@redhat.com>
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
Acked-by: David Hildenbrand <david@redhat.com>
|
||||||
|
Reviewed-by: Fabiano Rosas <farosas@suse.de>
|
||||||
|
Link: https://lore.kernel.org/r/20240917163835.194664-2-peterx@redhat.com
|
||||||
|
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||||
|
(cherry picked from commit 5504a8126115d173687b37e657312a8ffe29fc0c)
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
---
|
||||||
|
accel/kvm/kvm-all.c | 87 +++++++++++++++++++++++++++++++++-------
|
||||||
|
accel/kvm/trace-events | 1 +
|
||||||
|
include/sysemu/kvm_int.h | 1 +
|
||||||
|
3 files changed, 74 insertions(+), 15 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||||
|
index 8187ad3964..38393bc86b 100644
|
||||||
|
--- a/accel/kvm/kvm-all.c
|
||||||
|
+++ b/accel/kvm/kvm-all.c
|
||||||
|
@@ -69,6 +69,9 @@
|
||||||
|
#define KVM_GUESTDBG_BLOCKIRQ 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+/* Default num of memslots to be allocated when VM starts */
|
||||||
|
+#define KVM_MEMSLOTS_NR_ALLOC_DEFAULT 16
|
||||||
|
+
|
||||||
|
struct KVMParkedVcpu {
|
||||||
|
unsigned long vcpu_id;
|
||||||
|
int kvm_fd;
|
||||||
|
@@ -165,6 +168,57 @@ void kvm_resample_fd_notify(int gsi)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+/**
|
||||||
|
+ * kvm_slots_grow(): Grow the slots[] array in the KVMMemoryListener
|
||||||
|
+ *
|
||||||
|
+ * @kml: The KVMMemoryListener* to grow the slots[] array
|
||||||
|
+ * @nr_slots_new: The new size of slots[] array
|
||||||
|
+ *
|
||||||
|
+ * Returns: True if the array grows larger, false otherwise.
|
||||||
|
+ */
|
||||||
|
+static bool kvm_slots_grow(KVMMemoryListener *kml, unsigned int nr_slots_new)
|
||||||
|
+{
|
||||||
|
+ unsigned int i, cur = kml->nr_slots_allocated;
|
||||||
|
+ KVMSlot *slots;
|
||||||
|
+
|
||||||
|
+ if (nr_slots_new > kvm_state->nr_slots) {
|
||||||
|
+ nr_slots_new = kvm_state->nr_slots;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (cur >= nr_slots_new) {
|
||||||
|
+ /* Big enough, no need to grow, or we reached max */
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (cur == 0) {
|
||||||
|
+ slots = g_new0(KVMSlot, nr_slots_new);
|
||||||
|
+ } else {
|
||||||
|
+ assert(kml->slots);
|
||||||
|
+ slots = g_renew(KVMSlot, kml->slots, nr_slots_new);
|
||||||
|
+ /*
|
||||||
|
+ * g_renew() doesn't initialize extended buffers, however kvm
|
||||||
|
+ * memslots require fields to be zero-initialized. E.g. pointers,
|
||||||
|
+ * memory_size field, etc.
|
||||||
|
+ */
|
||||||
|
+ memset(&slots[cur], 0x0, sizeof(slots[0]) * (nr_slots_new - cur));
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ for (i = cur; i < nr_slots_new; i++) {
|
||||||
|
+ slots[i].slot = i;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ kml->slots = slots;
|
||||||
|
+ kml->nr_slots_allocated = nr_slots_new;
|
||||||
|
+ trace_kvm_slots_grow(cur, nr_slots_new);
|
||||||
|
+
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static bool kvm_slots_double(KVMMemoryListener *kml)
|
||||||
|
+{
|
||||||
|
+ return kvm_slots_grow(kml, kml->nr_slots_allocated * 2);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
unsigned int kvm_get_max_memslots(void)
|
||||||
|
{
|
||||||
|
KVMState *s = KVM_STATE(current_accel());
|
||||||
|
@@ -193,15 +247,26 @@ unsigned int kvm_get_free_memslots(void)
|
||||||
|
/* Called with KVMMemoryListener.slots_lock held */
|
||||||
|
static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml)
|
||||||
|
{
|
||||||
|
- KVMState *s = kvm_state;
|
||||||
|
+ unsigned int n;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
- for (i = 0; i < s->nr_slots; i++) {
|
||||||
|
+ for (i = 0; i < kml->nr_slots_allocated; i++) {
|
||||||
|
if (kml->slots[i].memory_size == 0) {
|
||||||
|
return &kml->slots[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+ /*
|
||||||
|
+ * If no free slots, try to grow first by doubling. Cache the old size
|
||||||
|
+ * here to avoid another round of search: if the grow succeeded, it
|
||||||
|
+ * means slots[] now must have the existing "n" slots occupied,
|
||||||
|
+ * followed by one or more free slots starting from slots[n].
|
||||||
|
+ */
|
||||||
|
+ n = kml->nr_slots_allocated;
|
||||||
|
+ if (kvm_slots_double(kml)) {
|
||||||
|
+ return &kml->slots[n];
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -222,10 +287,9 @@ static KVMSlot *kvm_lookup_matching_slot(KVMMemoryListener *kml,
|
||||||
|
hwaddr start_addr,
|
||||||
|
hwaddr size)
|
||||||
|
{
|
||||||
|
- KVMState *s = kvm_state;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
- for (i = 0; i < s->nr_slots; i++) {
|
||||||
|
+ for (i = 0; i < kml->nr_slots_allocated; i++) {
|
||||||
|
KVMSlot *mem = &kml->slots[i];
|
||||||
|
|
||||||
|
if (start_addr == mem->start_addr && size == mem->memory_size) {
|
||||||
|
@@ -267,7 +331,7 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
|
||||||
|
int i, ret = 0;
|
||||||
|
|
||||||
|
kvm_slots_lock();
|
||||||
|
- for (i = 0; i < s->nr_slots; i++) {
|
||||||
|
+ for (i = 0; i < kml->nr_slots_allocated; i++) {
|
||||||
|
KVMSlot *mem = &kml->slots[i];
|
||||||
|
|
||||||
|
if (ram >= mem->ram && ram < mem->ram + mem->memory_size) {
|
||||||
|
@@ -1071,7 +1135,7 @@ static int kvm_physical_log_clear(KVMMemoryListener *kml,
|
||||||
|
|
||||||
|
kvm_slots_lock();
|
||||||
|
|
||||||
|
- for (i = 0; i < s->nr_slots; i++) {
|
||||||
|
+ for (i = 0; i < kml->nr_slots_allocated; i++) {
|
||||||
|
mem = &kml->slots[i];
|
||||||
|
/* Discard slots that are empty or do not overlap the section */
|
||||||
|
if (!mem->memory_size ||
|
||||||
|
@@ -1719,12 +1783,8 @@ static void kvm_log_sync_global(MemoryListener *l, bool last_stage)
|
||||||
|
/* Flush all kernel dirty addresses into KVMSlot dirty bitmap */
|
||||||
|
kvm_dirty_ring_flush();
|
||||||
|
|
||||||
|
- /*
|
||||||
|
- * TODO: make this faster when nr_slots is big while there are
|
||||||
|
- * only a few used slots (small VMs).
|
||||||
|
- */
|
||||||
|
kvm_slots_lock();
|
||||||
|
- for (i = 0; i < s->nr_slots; i++) {
|
||||||
|
+ for (i = 0; i < kml->nr_slots_allocated; i++) {
|
||||||
|
mem = &kml->slots[i];
|
||||||
|
if (mem->memory_size && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
||||||
|
kvm_slot_sync_dirty_pages(mem);
|
||||||
|
@@ -1839,12 +1899,9 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
- kml->slots = g_new0(KVMSlot, s->nr_slots);
|
||||||
|
kml->as_id = as_id;
|
||||||
|
|
||||||
|
- for (i = 0; i < s->nr_slots; i++) {
|
||||||
|
- kml->slots[i].slot = i;
|
||||||
|
- }
|
||||||
|
+ kvm_slots_grow(kml, KVM_MEMSLOTS_NR_ALLOC_DEFAULT);
|
||||||
|
|
||||||
|
QSIMPLEQ_INIT(&kml->transaction_add);
|
||||||
|
QSIMPLEQ_INIT(&kml->transaction_del);
|
||||||
|
diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
|
||||||
|
index 37626c1ac5..ad2ae6fca5 100644
|
||||||
|
--- a/accel/kvm/trace-events
|
||||||
|
+++ b/accel/kvm/trace-events
|
||||||
|
@@ -36,3 +36,4 @@ kvm_io_window_exit(void) ""
|
||||||
|
kvm_run_exit_system_event(int cpu_index, uint32_t event_type) "cpu_index %d, system_even_type %"PRIu32
|
||||||
|
kvm_convert_memory(uint64_t start, uint64_t size, const char *msg) "start 0x%" PRIx64 " size 0x%" PRIx64 " %s"
|
||||||
|
kvm_memory_fault(uint64_t start, uint64_t size, uint64_t flags) "start 0x%" PRIx64 " size 0x%" PRIx64 " flags 0x%" PRIx64
|
||||||
|
+kvm_slots_grow(unsigned int old, unsigned int new) "%u -> %u"
|
||||||
|
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
|
||||||
|
index 1d8fb1473b..48e496b3d4 100644
|
||||||
|
--- a/include/sysemu/kvm_int.h
|
||||||
|
+++ b/include/sysemu/kvm_int.h
|
||||||
|
@@ -46,6 +46,7 @@ typedef struct KVMMemoryListener {
|
||||||
|
MemoryListener listener;
|
||||||
|
KVMSlot *slots;
|
||||||
|
unsigned int nr_used_slots;
|
||||||
|
+ unsigned int nr_slots_allocated;
|
||||||
|
int as_id;
|
||||||
|
QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add;
|
||||||
|
QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del;
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,73 @@
|
|||||||
|
From b1d082cfad79245ac0ffed45f723092388d1cf45 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Peter Xu <peterx@redhat.com>
|
||||||
|
Date: Tue, 17 Sep 2024 12:38:34 -0400
|
||||||
|
Subject: [PATCH 7/9] KVM: Rename KVMMemoryListener.nr_used_slots to
|
||||||
|
nr_slots_used
|
||||||
|
|
||||||
|
RH-Author: Peter Xu <peterx@redhat.com>
|
||||||
|
RH-MergeRequest: 285: KVM: Dynamic sized kvm memslots array
|
||||||
|
RH-Jira: RHEL-57685
|
||||||
|
RH-Acked-by: Juraj Marcin <None>
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Commit: [6/7] ed173123ee23edcf62a6c1940ca74cdfd6b545e9 (peterx/qemu-kvm)
|
||||||
|
|
||||||
|
This will make all nr_slots counters to be named in the same manner.
|
||||||
|
|
||||||
|
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
Link: https://lore.kernel.org/r/20240917163835.194664-4-peterx@redhat.com
|
||||||
|
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||||
|
(cherry picked from commit dbdc00ba5b136bba80d850f61cc79a9cafaae1cd)
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
---
|
||||||
|
accel/kvm/kvm-all.c | 6 +++---
|
||||||
|
include/sysemu/kvm_int.h | 2 +-
|
||||||
|
2 files changed, 4 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||||
|
index 87db0f9494..e99aaba486 100644
|
||||||
|
--- a/accel/kvm/kvm-all.c
|
||||||
|
+++ b/accel/kvm/kvm-all.c
|
||||||
|
@@ -239,7 +239,7 @@ unsigned int kvm_get_free_memslots(void)
|
||||||
|
if (!s->as[i].ml) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
- used_slots = MAX(used_slots, s->as[i].ml->nr_used_slots);
|
||||||
|
+ used_slots = MAX(used_slots, s->as[i].ml->nr_slots_used);
|
||||||
|
}
|
||||||
|
kvm_slots_unlock();
|
||||||
|
|
||||||
|
@@ -1516,7 +1516,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
|
||||||
|
}
|
||||||
|
start_addr += slot_size;
|
||||||
|
size -= slot_size;
|
||||||
|
- kml->nr_used_slots--;
|
||||||
|
+ kml->nr_slots_used--;
|
||||||
|
} while (size);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
@@ -1555,7 +1555,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
|
||||||
|
ram_start_offset += slot_size;
|
||||||
|
ram += slot_size;
|
||||||
|
size -= slot_size;
|
||||||
|
- kml->nr_used_slots++;
|
||||||
|
+ kml->nr_slots_used++;
|
||||||
|
} while (size);
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
|
||||||
|
index 48e496b3d4..b705dfc9b4 100644
|
||||||
|
--- a/include/sysemu/kvm_int.h
|
||||||
|
+++ b/include/sysemu/kvm_int.h
|
||||||
|
@@ -45,7 +45,7 @@ typedef struct KVMMemoryUpdate {
|
||||||
|
typedef struct KVMMemoryListener {
|
||||||
|
MemoryListener listener;
|
||||||
|
KVMSlot *slots;
|
||||||
|
- unsigned int nr_used_slots;
|
||||||
|
+ unsigned int nr_slots_used;
|
||||||
|
unsigned int nr_slots_allocated;
|
||||||
|
int as_id;
|
||||||
|
QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add;
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,90 @@
|
|||||||
|
From 891fb13363d168760cd21d0c57368e1a413cad27 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Peter Xu <peterx@redhat.com>
|
||||||
|
Date: Tue, 17 Sep 2024 12:38:35 -0400
|
||||||
|
Subject: [PATCH 8/9] KVM: Rename KVMState->nr_slots to nr_slots_max
|
||||||
|
|
||||||
|
RH-Author: Peter Xu <peterx@redhat.com>
|
||||||
|
RH-MergeRequest: 285: KVM: Dynamic sized kvm memslots array
|
||||||
|
RH-Jira: RHEL-57685
|
||||||
|
RH-Acked-by: Juraj Marcin <None>
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Commit: [7/7] 7a1b28f04ee6a2c80b07db241fc88cb40f54e376 (peterx/qemu-kvm)
|
||||||
|
|
||||||
|
This value used to reflect the maximum supported memslots from KVM kernel.
|
||||||
|
Rename it to be clearer.
|
||||||
|
|
||||||
|
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
Link: https://lore.kernel.org/r/20240917163835.194664-5-peterx@redhat.com
|
||||||
|
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||||
|
(cherry picked from commit 943c742868c739c0b14fd996bad3adf744156fec)
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
---
|
||||||
|
accel/kvm/kvm-all.c | 12 ++++++------
|
||||||
|
include/sysemu/kvm_int.h | 4 ++--
|
||||||
|
2 files changed, 8 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||||
|
index e99aaba486..dc6253895d 100644
|
||||||
|
--- a/accel/kvm/kvm-all.c
|
||||||
|
+++ b/accel/kvm/kvm-all.c
|
||||||
|
@@ -183,8 +183,8 @@ static bool kvm_slots_grow(KVMMemoryListener *kml, unsigned int nr_slots_new)
|
||||||
|
unsigned int i, cur = kml->nr_slots_allocated;
|
||||||
|
KVMSlot *slots;
|
||||||
|
|
||||||
|
- if (nr_slots_new > kvm_state->nr_slots) {
|
||||||
|
- nr_slots_new = kvm_state->nr_slots;
|
||||||
|
+ if (nr_slots_new > kvm_state->nr_slots_max) {
|
||||||
|
+ nr_slots_new = kvm_state->nr_slots_max;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cur >= nr_slots_new) {
|
||||||
|
@@ -225,7 +225,7 @@ unsigned int kvm_get_max_memslots(void)
|
||||||
|
{
|
||||||
|
KVMState *s = KVM_STATE(current_accel());
|
||||||
|
|
||||||
|
- return s->nr_slots;
|
||||||
|
+ return s->nr_slots_max;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int kvm_get_free_memslots(void)
|
||||||
|
@@ -243,7 +243,7 @@ unsigned int kvm_get_free_memslots(void)
|
||||||
|
}
|
||||||
|
kvm_slots_unlock();
|
||||||
|
|
||||||
|
- return s->nr_slots - used_slots;
|
||||||
|
+ return s->nr_slots_max - used_slots;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Called with KVMMemoryListener.slots_lock held */
|
||||||
|
@@ -2615,10 +2615,10 @@ static int kvm_init(MachineState *ms)
|
||||||
|
(kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE);
|
||||||
|
|
||||||
|
kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT);
|
||||||
|
- s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS);
|
||||||
|
+ s->nr_slots_max = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS);
|
||||||
|
|
||||||
|
/* If unspecified, use the default value */
|
||||||
|
- if (!s->nr_slots) {
|
||||||
|
+ if (!s->nr_slots_max) {
|
||||||
|
s->nr_slots_max = KVM_MEMSLOTS_NR_MAX_DEFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
|
||||||
|
index b705dfc9b4..2c57194b6b 100644
|
||||||
|
--- a/include/sysemu/kvm_int.h
|
||||||
|
+++ b/include/sysemu/kvm_int.h
|
||||||
|
@@ -103,8 +103,8 @@ struct KVMDirtyRingReaper {
|
||||||
|
struct KVMState
|
||||||
|
{
|
||||||
|
AccelState parent_obj;
|
||||||
|
-
|
||||||
|
- int nr_slots;
|
||||||
|
+ /* Max number of KVM slots supported */
|
||||||
|
+ int nr_slots_max;
|
||||||
|
int fd;
|
||||||
|
int vmfd;
|
||||||
|
int coalesced_mmio;
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,144 @@
|
|||||||
|
From 00a2dbf483a077bb31b1c9f70cced36319d22628 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ani Sinha <anisinha@redhat.com>
|
||||||
|
Date: Thu, 12 Sep 2024 11:48:38 +0530
|
||||||
|
Subject: [PATCH 4/9] accel/kvm: refactor dirty ring setup
|
||||||
|
|
||||||
|
RH-Author: Peter Xu <peterx@redhat.com>
|
||||||
|
RH-MergeRequest: 285: KVM: Dynamic sized kvm memslots array
|
||||||
|
RH-Jira: RHEL-57685
|
||||||
|
RH-Acked-by: Juraj Marcin <None>
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Commit: [3/7] 94f345d1e7ad6437dd2ce67ca7cad224c67aa48f (peterx/qemu-kvm)
|
||||||
|
|
||||||
|
Refactor setting up of dirty ring code in kvm_init() so that is can be
|
||||||
|
reused in the future patchsets.
|
||||||
|
|
||||||
|
Signed-off-by: Ani Sinha <anisinha@redhat.com>
|
||||||
|
Link: https://lore.kernel.org/r/20240912061838.4501-1-anisinha@redhat.com
|
||||||
|
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||||
|
(cherry picked from commit 28ed7f9761eb273e7dedcfdc0507d158106d0451)
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
---
|
||||||
|
accel/kvm/kvm-all.c | 88 +++++++++++++++++++++++++--------------------
|
||||||
|
1 file changed, 50 insertions(+), 38 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||||
|
index d86d1b515a..8187ad3964 100644
|
||||||
|
--- a/accel/kvm/kvm-all.c
|
||||||
|
+++ b/accel/kvm/kvm-all.c
|
||||||
|
@@ -2439,6 +2439,55 @@ static int find_kvm_machine_type(MachineState *ms)
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static int kvm_setup_dirty_ring(KVMState *s)
|
||||||
|
+{
|
||||||
|
+ uint64_t dirty_log_manual_caps;
|
||||||
|
+ int ret;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Enable KVM dirty ring if supported, otherwise fall back to
|
||||||
|
+ * dirty logging mode
|
||||||
|
+ */
|
||||||
|
+ ret = kvm_dirty_ring_init(s);
|
||||||
|
+ if (ret < 0) {
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is not needed when dirty ring is
|
||||||
|
+ * enabled. More importantly, KVM_DIRTY_LOG_INITIALLY_SET will assume no
|
||||||
|
+ * page is wr-protected initially, which is against how kvm dirty ring is
|
||||||
|
+ * usage - kvm dirty ring requires all pages are wr-protected at the very
|
||||||
|
+ * beginning. Enabling this feature for dirty ring causes data corruption.
|
||||||
|
+ *
|
||||||
|
+ * TODO: Without KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and kvm clear dirty log,
|
||||||
|
+ * we may expect a higher stall time when starting the migration. In the
|
||||||
|
+ * future we can enable KVM_CLEAR_DIRTY_LOG to work with dirty ring too:
|
||||||
|
+ * instead of clearing dirty bit, it can be a way to explicitly wr-protect
|
||||||
|
+ * guest pages.
|
||||||
|
+ */
|
||||||
|
+ if (!s->kvm_dirty_ring_size) {
|
||||||
|
+ dirty_log_manual_caps =
|
||||||
|
+ kvm_check_extension(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
|
||||||
|
+ dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
|
||||||
|
+ KVM_DIRTY_LOG_INITIALLY_SET);
|
||||||
|
+ s->manual_dirty_log_protect = dirty_log_manual_caps;
|
||||||
|
+ if (dirty_log_manual_caps) {
|
||||||
|
+ ret = kvm_vm_enable_cap(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, 0,
|
||||||
|
+ dirty_log_manual_caps);
|
||||||
|
+ if (ret) {
|
||||||
|
+ warn_report("Trying to enable capability %"PRIu64" of "
|
||||||
|
+ "KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 but failed. "
|
||||||
|
+ "Falling back to the legacy mode. ",
|
||||||
|
+ dirty_log_manual_caps);
|
||||||
|
+ s->manual_dirty_log_protect = 0;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int kvm_init(MachineState *ms)
|
||||||
|
{
|
||||||
|
MachineClass *mc = MACHINE_GET_CLASS(ms);
|
||||||
|
@@ -2458,7 +2507,6 @@ static int kvm_init(MachineState *ms)
|
||||||
|
const KVMCapabilityInfo *missing_cap;
|
||||||
|
int ret;
|
||||||
|
int type;
|
||||||
|
- uint64_t dirty_log_manual_caps;
|
||||||
|
|
||||||
|
qemu_mutex_init(&kml_slots_lock);
|
||||||
|
|
||||||
|
@@ -2570,47 +2618,11 @@ static int kvm_init(MachineState *ms)
|
||||||
|
s->coalesced_pio = s->coalesced_mmio &&
|
||||||
|
kvm_check_extension(s, KVM_CAP_COALESCED_PIO);
|
||||||
|
|
||||||
|
- /*
|
||||||
|
- * Enable KVM dirty ring if supported, otherwise fall back to
|
||||||
|
- * dirty logging mode
|
||||||
|
- */
|
||||||
|
- ret = kvm_dirty_ring_init(s);
|
||||||
|
+ ret = kvm_setup_dirty_ring(s);
|
||||||
|
if (ret < 0) {
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
- /*
|
||||||
|
- * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is not needed when dirty ring is
|
||||||
|
- * enabled. More importantly, KVM_DIRTY_LOG_INITIALLY_SET will assume no
|
||||||
|
- * page is wr-protected initially, which is against how kvm dirty ring is
|
||||||
|
- * usage - kvm dirty ring requires all pages are wr-protected at the very
|
||||||
|
- * beginning. Enabling this feature for dirty ring causes data corruption.
|
||||||
|
- *
|
||||||
|
- * TODO: Without KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and kvm clear dirty log,
|
||||||
|
- * we may expect a higher stall time when starting the migration. In the
|
||||||
|
- * future we can enable KVM_CLEAR_DIRTY_LOG to work with dirty ring too:
|
||||||
|
- * instead of clearing dirty bit, it can be a way to explicitly wr-protect
|
||||||
|
- * guest pages.
|
||||||
|
- */
|
||||||
|
- if (!s->kvm_dirty_ring_size) {
|
||||||
|
- dirty_log_manual_caps =
|
||||||
|
- kvm_check_extension(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
|
||||||
|
- dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
|
||||||
|
- KVM_DIRTY_LOG_INITIALLY_SET);
|
||||||
|
- s->manual_dirty_log_protect = dirty_log_manual_caps;
|
||||||
|
- if (dirty_log_manual_caps) {
|
||||||
|
- ret = kvm_vm_enable_cap(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, 0,
|
||||||
|
- dirty_log_manual_caps);
|
||||||
|
- if (ret) {
|
||||||
|
- warn_report("Trying to enable capability %"PRIu64" of "
|
||||||
|
- "KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 but failed. "
|
||||||
|
- "Falling back to the legacy mode. ",
|
||||||
|
- dirty_log_manual_caps);
|
||||||
|
- s->manual_dirty_log_protect = 0;
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
#ifdef KVM_CAP_VCPU_EVENTS
|
||||||
|
s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
|
||||||
|
#endif
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,144 @@
|
|||||||
|
From 67180363bdc1898462f90e16c1909db7331cc5e2 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ani Sinha <anisinha@redhat.com>
|
||||||
|
Date: Thu, 8 Aug 2024 17:08:38 +0530
|
||||||
|
Subject: [PATCH 3/9] kvm: refactor core virtual machine creation into its own
|
||||||
|
function
|
||||||
|
|
||||||
|
RH-Author: Peter Xu <peterx@redhat.com>
|
||||||
|
RH-MergeRequest: 285: KVM: Dynamic sized kvm memslots array
|
||||||
|
RH-Jira: RHEL-57685
|
||||||
|
RH-Acked-by: Juraj Marcin <None>
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Commit: [2/7] a783111d9a2ef6590103543f1bd103bf90052872 (peterx/qemu-kvm)
|
||||||
|
|
||||||
|
Refactoring the core logic around KVM_CREATE_VM into its own separate function
|
||||||
|
so that it can be called from other functions in subsequent patches. There is
|
||||||
|
no functional change in this patch.
|
||||||
|
|
||||||
|
CC: pbonzini@redhat.com
|
||||||
|
CC: zhao1.liu@intel.com
|
||||||
|
Signed-off-by: Ani Sinha <anisinha@redhat.com>
|
||||||
|
Link: https://lore.kernel.org/r/20240808113838.1697366-1-anisinha@redhat.com
|
||||||
|
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||||
|
(cherry picked from commit 67388078da1cf6dac89e5a7c748cca3444d49690)
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
---
|
||||||
|
accel/kvm/kvm-all.c | 89 ++++++++++++++++++++++++++++-----------------
|
||||||
|
1 file changed, 56 insertions(+), 33 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||||
|
index 7432a54f39..d86d1b515a 100644
|
||||||
|
--- a/accel/kvm/kvm-all.c
|
||||||
|
+++ b/accel/kvm/kvm-all.c
|
||||||
|
@@ -2385,6 +2385,60 @@ uint32_t kvm_dirty_ring_size(void)
|
||||||
|
return kvm_state->kvm_dirty_ring_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static int do_kvm_create_vm(MachineState *ms, int type)
|
||||||
|
+{
|
||||||
|
+ KVMState *s;
|
||||||
|
+ int ret;
|
||||||
|
+
|
||||||
|
+ s = KVM_STATE(ms->accelerator);
|
||||||
|
+
|
||||||
|
+ do {
|
||||||
|
+ ret = kvm_ioctl(s, KVM_CREATE_VM, type);
|
||||||
|
+ } while (ret == -EINTR);
|
||||||
|
+
|
||||||
|
+ if (ret < 0) {
|
||||||
|
+ error_report("ioctl(KVM_CREATE_VM) failed: %s", strerror(-ret));
|
||||||
|
+
|
||||||
|
+#ifdef TARGET_S390X
|
||||||
|
+ if (ret == -EINVAL) {
|
||||||
|
+ error_printf("Host kernel setup problem detected."
|
||||||
|
+ " Please verify:\n");
|
||||||
|
+ error_printf("- for kernels supporting the"
|
||||||
|
+ " switch_amode or user_mode parameters, whether");
|
||||||
|
+ error_printf(" user space is running in primary address space\n");
|
||||||
|
+ error_printf("- for kernels supporting the vm.allocate_pgste"
|
||||||
|
+ " sysctl, whether it is enabled\n");
|
||||||
|
+ }
|
||||||
|
+#elif defined(TARGET_PPC)
|
||||||
|
+ if (ret == -EINVAL) {
|
||||||
|
+ error_printf("PPC KVM module is not loaded. Try modprobe kvm_%s.\n",
|
||||||
|
+ (type == 2) ? "pr" : "hv");
|
||||||
|
+ }
|
||||||
|
+#endif
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int find_kvm_machine_type(MachineState *ms)
|
||||||
|
+{
|
||||||
|
+ MachineClass *mc = MACHINE_GET_CLASS(ms);
|
||||||
|
+ int type;
|
||||||
|
+
|
||||||
|
+ if (object_property_find(OBJECT(current_machine), "kvm-type")) {
|
||||||
|
+ g_autofree char *kvm_type;
|
||||||
|
+ kvm_type = object_property_get_str(OBJECT(current_machine),
|
||||||
|
+ "kvm-type",
|
||||||
|
+ &error_abort);
|
||||||
|
+ type = mc->kvm_type(ms, kvm_type);
|
||||||
|
+ } else if (mc->kvm_type) {
|
||||||
|
+ type = mc->kvm_type(ms, NULL);
|
||||||
|
+ } else {
|
||||||
|
+ type = kvm_arch_get_default_type(ms);
|
||||||
|
+ }
|
||||||
|
+ return type;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int kvm_init(MachineState *ms)
|
||||||
|
{
|
||||||
|
MachineClass *mc = MACHINE_GET_CLASS(ms);
|
||||||
|
@@ -2467,45 +2521,14 @@ static int kvm_init(MachineState *ms)
|
||||||
|
}
|
||||||
|
s->as = g_new0(struct KVMAs, s->nr_as);
|
||||||
|
|
||||||
|
- if (object_property_find(OBJECT(current_machine), "kvm-type")) {
|
||||||
|
- g_autofree char *kvm_type = object_property_get_str(OBJECT(current_machine),
|
||||||
|
- "kvm-type",
|
||||||
|
- &error_abort);
|
||||||
|
- type = mc->kvm_type(ms, kvm_type);
|
||||||
|
- } else if (mc->kvm_type) {
|
||||||
|
- type = mc->kvm_type(ms, NULL);
|
||||||
|
- } else {
|
||||||
|
- type = kvm_arch_get_default_type(ms);
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
+ type = find_kvm_machine_type(ms);
|
||||||
|
if (type < 0) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
- do {
|
||||||
|
- ret = kvm_ioctl(s, KVM_CREATE_VM, type);
|
||||||
|
- } while (ret == -EINTR);
|
||||||
|
-
|
||||||
|
+ ret = do_kvm_create_vm(ms, type);
|
||||||
|
if (ret < 0) {
|
||||||
|
- error_report("ioctl(KVM_CREATE_VM) failed: %s", strerror(-ret));
|
||||||
|
-
|
||||||
|
-#ifdef TARGET_S390X
|
||||||
|
- if (ret == -EINVAL) {
|
||||||
|
- error_printf("Host kernel setup problem detected."
|
||||||
|
- " Please verify:\n");
|
||||||
|
- error_printf("- for kernels supporting the"
|
||||||
|
- " switch_amode or user_mode parameters, whether");
|
||||||
|
- error_printf(" user space is running in primary address space\n");
|
||||||
|
- error_printf("- for kernels supporting the vm.allocate_pgste"
|
||||||
|
- " sysctl, whether it is enabled\n");
|
||||||
|
- }
|
||||||
|
-#elif defined(TARGET_PPC)
|
||||||
|
- if (ret == -EINVAL) {
|
||||||
|
- error_printf("PPC KVM module is not loaded. Try modprobe kvm_%s.\n",
|
||||||
|
- (type == 2) ? "pr" : "hv");
|
||||||
|
- }
|
||||||
|
-#endif
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,132 @@
|
|||||||
|
From 522e19dd84eb5c4d88b3b70193ee104f67a5b89d Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ani Sinha <anisinha@redhat.com>
|
||||||
|
Date: Wed, 28 Aug 2024 18:15:39 +0530
|
||||||
|
Subject: [PATCH 2/9] kvm: replace fprintf with error_report()/printf() in
|
||||||
|
kvm_init()
|
||||||
|
|
||||||
|
RH-Author: Peter Xu <peterx@redhat.com>
|
||||||
|
RH-MergeRequest: 285: KVM: Dynamic sized kvm memslots array
|
||||||
|
RH-Jira: RHEL-57685
|
||||||
|
RH-Acked-by: Juraj Marcin <None>
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Commit: [1/7] 6c1230a6d5033d928817df9458938a675058e995 (peterx/qemu-kvm)
|
||||||
|
|
||||||
|
error_report() is more appropriate for error situations. Replace fprintf with
|
||||||
|
error_report() and error_printf() as appropriate. Some improvement in error
|
||||||
|
reporting also happens as a part of this change. For example:
|
||||||
|
|
||||||
|
From:
|
||||||
|
$ ./qemu-system-x86_64 --accel kvm
|
||||||
|
Could not access KVM kernel module: No such file or directory
|
||||||
|
|
||||||
|
To:
|
||||||
|
$ ./qemu-system-x86_64 --accel kvm
|
||||||
|
qemu-system-x86_64: --accel kvm: Could not access KVM kernel module: No such file or directory
|
||||||
|
|
||||||
|
CC: qemu-trivial@nongnu.org
|
||||||
|
CC: zhao1.liu@intel.com
|
||||||
|
CC: armbru@redhat.com
|
||||||
|
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
|
||||||
|
Reviewed-by: Markus Armbruster <armbru@redhat.com>
|
||||||
|
Signed-off-by: Ani Sinha <anisinha@redhat.com>
|
||||||
|
Link: https://lore.kernel.org/r/20240828124539.62672-1-anisinha@redhat.com
|
||||||
|
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||||
|
(cherry picked from commit 804dfbe3ef5e950328b162ae85741be2e228544f)
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
---
|
||||||
|
accel/kvm/kvm-all.c | 40 ++++++++++++++++++----------------------
|
||||||
|
1 file changed, 18 insertions(+), 22 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||||
|
index c7f1cc64b6..7432a54f39 100644
|
||||||
|
--- a/accel/kvm/kvm-all.c
|
||||||
|
+++ b/accel/kvm/kvm-all.c
|
||||||
|
@@ -2427,7 +2427,7 @@ static int kvm_init(MachineState *ms)
|
||||||
|
QLIST_INIT(&s->kvm_parked_vcpus);
|
||||||
|
s->fd = qemu_open_old(s->device ?: "/dev/kvm", O_RDWR);
|
||||||
|
if (s->fd == -1) {
|
||||||
|
- fprintf(stderr, "Could not access KVM kernel module: %m\n");
|
||||||
|
+ error_report("Could not access KVM kernel module: %m");
|
||||||
|
ret = -errno;
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
@@ -2437,13 +2437,13 @@ static int kvm_init(MachineState *ms)
|
||||||
|
if (ret >= 0) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
}
|
||||||
|
- fprintf(stderr, "kvm version too old\n");
|
||||||
|
+ error_report("kvm version too old");
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret > KVM_API_VERSION) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
- fprintf(stderr, "kvm version not supported\n");
|
||||||
|
+ error_report("kvm version not supported");
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -2488,26 +2488,22 @@ static int kvm_init(MachineState *ms)
|
||||||
|
} while (ret == -EINTR);
|
||||||
|
|
||||||
|
if (ret < 0) {
|
||||||
|
- fprintf(stderr, "ioctl(KVM_CREATE_VM) failed: %d %s\n", -ret,
|
||||||
|
- strerror(-ret));
|
||||||
|
+ error_report("ioctl(KVM_CREATE_VM) failed: %s", strerror(-ret));
|
||||||
|
|
||||||
|
#ifdef TARGET_S390X
|
||||||
|
if (ret == -EINVAL) {
|
||||||
|
- fprintf(stderr,
|
||||||
|
- "Host kernel setup problem detected. Please verify:\n");
|
||||||
|
- fprintf(stderr, "- for kernels supporting the switch_amode or"
|
||||||
|
- " user_mode parameters, whether\n");
|
||||||
|
- fprintf(stderr,
|
||||||
|
- " user space is running in primary address space\n");
|
||||||
|
- fprintf(stderr,
|
||||||
|
- "- for kernels supporting the vm.allocate_pgste sysctl, "
|
||||||
|
- "whether it is enabled\n");
|
||||||
|
+ error_printf("Host kernel setup problem detected."
|
||||||
|
+ " Please verify:\n");
|
||||||
|
+ error_printf("- for kernels supporting the"
|
||||||
|
+ " switch_amode or user_mode parameters, whether");
|
||||||
|
+ error_printf(" user space is running in primary address space\n");
|
||||||
|
+ error_printf("- for kernels supporting the vm.allocate_pgste"
|
||||||
|
+ " sysctl, whether it is enabled\n");
|
||||||
|
}
|
||||||
|
#elif defined(TARGET_PPC)
|
||||||
|
if (ret == -EINVAL) {
|
||||||
|
- fprintf(stderr,
|
||||||
|
- "PPC KVM module is not loaded. Try modprobe kvm_%s.\n",
|
||||||
|
- (type == 2) ? "pr" : "hv");
|
||||||
|
+ error_printf("PPC KVM module is not loaded. Try modprobe kvm_%s.\n",
|
||||||
|
+ (type == 2) ? "pr" : "hv");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
goto err;
|
||||||
|
@@ -2526,9 +2522,9 @@ static int kvm_init(MachineState *ms)
|
||||||
|
nc->name, nc->num, soft_vcpus_limit);
|
||||||
|
|
||||||
|
if (nc->num > hard_vcpus_limit) {
|
||||||
|
- fprintf(stderr, "Number of %s cpus requested (%d) exceeds "
|
||||||
|
- "the maximum cpus supported by KVM (%d)\n",
|
||||||
|
- nc->name, nc->num, hard_vcpus_limit);
|
||||||
|
+ error_report("Number of %s cpus requested (%d) exceeds "
|
||||||
|
+ "the maximum cpus supported by KVM (%d)",
|
||||||
|
+ nc->name, nc->num, hard_vcpus_limit);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -2542,8 +2538,8 @@ static int kvm_init(MachineState *ms)
|
||||||
|
}
|
||||||
|
if (missing_cap) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
- fprintf(stderr, "kvm does not support %s\n%s",
|
||||||
|
- missing_cap->name, upgrade_note);
|
||||||
|
+ error_report("kvm does not support %s", missing_cap->name);
|
||||||
|
+ error_printf("%s", upgrade_note);
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,92 @@
|
|||||||
|
From 6be2f51c147df1ab1dd7c68c6b554512dfc05e6f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Hanna Czenczek <hreitz@redhat.com>
|
||||||
|
Date: Tue, 15 Oct 2024 19:04:37 +0200
|
||||||
|
Subject: [PATCH 1/9] migration: Ensure vmstate_save() sets errp
|
||||||
|
|
||||||
|
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||||
|
RH-MergeRequest: 288: migration: Ensure vmstate_save() sets errp
|
||||||
|
RH-Jira: RHEL-63051
|
||||||
|
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||||
|
RH-Acked-by: German Maglione <None>
|
||||||
|
RH-Commit: [1/1] 4d5a65c294ae83a29db885e42fb3f2ca913c36f0 (hreitz/qemu-kvm-c-9-s)
|
||||||
|
|
||||||
|
migration/savevm.c contains some calls to vmstate_save() that are
|
||||||
|
followed by migrate_set_error() if the integer return value indicates an
|
||||||
|
error. migrate_set_error() requires that the `Error *` object passed to
|
||||||
|
it is set. Therefore, vmstate_save() is assumed to always set *errp on
|
||||||
|
error.
|
||||||
|
|
||||||
|
Right now, that assumption is not met: vmstate_save_state_v() (called
|
||||||
|
internally by vmstate_save()) will not set *errp if
|
||||||
|
vmstate_subsection_save() or vmsd->post_save() fail. Fix that by adding
|
||||||
|
an *errp parameter to vmstate_subsection_save(), and by generating a
|
||||||
|
generic error in case post_save() fails (as is already done for
|
||||||
|
pre_save()).
|
||||||
|
|
||||||
|
Without this patch, qemu will crash after vmstate_subsection_save() or
|
||||||
|
post_save() have failed inside of a vmstate_save() call (unless
|
||||||
|
migrate_set_error() then happen to discard the new error because
|
||||||
|
s->error is already set). This happens e.g. when receiving the state
|
||||||
|
from a virtio-fs back-end (virtiofsd) fails.
|
||||||
|
|
||||||
|
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||||
|
Link: https://lore.kernel.org/r/20241015170437.310358-1-hreitz@redhat.com
|
||||||
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||||
|
(cherry picked from commit 37dfcba1a04989830c706f9cbc00450e5d3a7447)
|
||||||
|
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||||
|
---
|
||||||
|
migration/vmstate.c | 13 ++++++++-----
|
||||||
|
1 file changed, 8 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/migration/vmstate.c b/migration/vmstate.c
|
||||||
|
index ff5d589a6d..fa002b24e8 100644
|
||||||
|
--- a/migration/vmstate.c
|
||||||
|
+++ b/migration/vmstate.c
|
||||||
|
@@ -22,7 +22,8 @@
|
||||||
|
#include "trace.h"
|
||||||
|
|
||||||
|
static int vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd,
|
||||||
|
- void *opaque, JSONWriter *vmdesc);
|
||||||
|
+ void *opaque, JSONWriter *vmdesc,
|
||||||
|
+ Error **errp);
|
||||||
|
static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
|
||||||
|
void *opaque);
|
||||||
|
|
||||||
|
@@ -441,12 +442,13 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd,
|
||||||
|
json_writer_end_array(vmdesc);
|
||||||
|
}
|
||||||
|
|
||||||
|
- ret = vmstate_subsection_save(f, vmsd, opaque, vmdesc);
|
||||||
|
+ ret = vmstate_subsection_save(f, vmsd, opaque, vmdesc, errp);
|
||||||
|
|
||||||
|
if (vmsd->post_save) {
|
||||||
|
int ps_ret = vmsd->post_save(opaque);
|
||||||
|
- if (!ret) {
|
||||||
|
+ if (!ret && ps_ret) {
|
||||||
|
ret = ps_ret;
|
||||||
|
+ error_setg(errp, "post-save failed: %s", vmsd->name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
@@ -518,7 +520,8 @@ static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
|
||||||
|
}
|
||||||
|
|
||||||
|
static int vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd,
|
||||||
|
- void *opaque, JSONWriter *vmdesc)
|
||||||
|
+ void *opaque, JSONWriter *vmdesc,
|
||||||
|
+ Error **errp)
|
||||||
|
{
|
||||||
|
const VMStateDescription * const *sub = vmsd->subsections;
|
||||||
|
bool vmdesc_has_subsections = false;
|
||||||
|
@@ -546,7 +549,7 @@ static int vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd,
|
||||||
|
qemu_put_byte(f, len);
|
||||||
|
qemu_put_buffer(f, (uint8_t *)vmsdsub->name, len);
|
||||||
|
qemu_put_be32(f, vmsdsub->version_id);
|
||||||
|
- ret = vmstate_save_state(f, vmsdsub, opaque, vmdesc);
|
||||||
|
+ ret = vmstate_save_state_with_err(f, vmsdsub, opaque, vmdesc, errp);
|
||||||
|
if (ret) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
Loading…
Reference in new issue