forked from rpms/qemu-kvm
parent
c3d7983744
commit
9c30296e51
@ -1 +1 @@
|
||||
SOURCES/qemu-8.0.0.tar.xz
|
||||
SOURCES/qemu-9.0.0.tar.xz
|
||||
|
@ -1 +1 @@
|
||||
17d54a85aa5d7f5dcfc619aa34049f9a91ceed0d SOURCES/qemu-8.0.0.tar.xz
|
||||
6699bb03d6da21159b89668bca01c6c958b95d07 SOURCES/qemu-9.0.0.tar.xz
|
||||
|
@ -0,0 +1,121 @@
|
||||
From 59470e8ab849f22b407f55292e540e16a8cad01a Mon Sep 17 00:00:00 2001
|
||||
From: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
Date: Wed, 20 Mar 2024 05:34:32 -0400
|
||||
Subject: Add upstream compatibility bits
|
||||
|
||||
Adding new compats structure for changes introduced during rebase to QEMU 9.0.0.
|
||||
|
||||
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
|
||||
---
|
||||
|
||||
Rebase notes (9.0.0 rc2):
|
||||
- Add aw-bits setting for aarch compat record (overwritten for 9.4 and older)
|
||||
---
|
||||
hw/arm/virt.c | 3 +++
|
||||
hw/core/machine.c | 10 ++++++++++
|
||||
hw/i386/pc_piix.c | 3 ++-
|
||||
hw/i386/pc_q35.c | 3 +++
|
||||
hw/s390x/s390-virtio-ccw.c | 1 +
|
||||
include/hw/boards.h | 3 +++
|
||||
6 files changed, 22 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 22bc345137..f1af9495c6 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -144,6 +144,8 @@ GlobalProperty arm_rhel_compat[] = {
|
||||
{"virtio-net-pci", "romfile", "" },
|
||||
{"virtio-net-pci-transitional", "romfile", "" },
|
||||
{"virtio-net-pci-non-transitional", "romfile", "" },
|
||||
+ /* arm_rhel_compat from arm_virt_compat, added for 9.0.0 rebase */
|
||||
+ { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "48" },
|
||||
};
|
||||
const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat);
|
||||
|
||||
@@ -3728,6 +3730,7 @@ type_init(rhel_machine_init);
|
||||
|
||||
static void rhel940_virt_options(MachineClass *mc)
|
||||
{
|
||||
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_5, hw_compat_rhel_9_5_len);
|
||||
}
|
||||
DEFINE_RHEL_MACHINE_AS_LATEST(9, 4, 0)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 695cb89a46..0f256d9633 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -302,6 +302,16 @@ const size_t hw_compat_2_1_len = G_N_ELEMENTS(hw_compat_2_1);
|
||||
const char *rhel_old_machine_deprecation =
|
||||
"machine types for previous major releases are deprecated";
|
||||
|
||||
+GlobalProperty hw_compat_rhel_9_5[] = {
|
||||
+ /* hw_compat_rhel_9_5 from hw_compat_8_2 */
|
||||
+ { "migration", "zero-page-detection", "legacy"},
|
||||
+ /* hw_compat_rhel_9_5 from hw_compat_8_2 */
|
||||
+ { TYPE_VIRTIO_IOMMU_PCI, "granule", "4k" },
|
||||
+ /* hw_compat_rhel_9_5 from hw_compat_8_2 */
|
||||
+ { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "64" },
|
||||
+};
|
||||
+const size_t hw_compat_rhel_9_5_len = G_N_ELEMENTS(hw_compat_rhel_9_5);
|
||||
+
|
||||
GlobalProperty hw_compat_rhel_9_4[] = {
|
||||
/* hw_compat_rhel_9_4 from hw_compat_8_0 */
|
||||
{ TYPE_VIRTIO_NET, "host_uso", "off"},
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index a647262d63..6b260682eb 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -1015,7 +1015,8 @@ static void pc_machine_rhel760_options(MachineClass *m)
|
||||
object_class_property_set_description(oc, "x-south-bridge",
|
||||
"Use a different south bridge than PIIX3");
|
||||
|
||||
-
|
||||
+ compat_props_add(m->compat_props, hw_compat_rhel_9_5,
|
||||
+ hw_compat_rhel_9_5_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_4,
|
||||
hw_compat_rhel_9_4_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_3,
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index e872dc7e46..2b54944c0f 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -733,6 +733,9 @@ static void pc_q35_machine_rhel940_options(MachineClass *m)
|
||||
m->desc = "RHEL-9.4.0 PC (Q35 + ICH9, 2009)";
|
||||
pcmc->smbios_stream_product = "RHEL";
|
||||
pcmc->smbios_stream_version = "9.4.0";
|
||||
+
|
||||
+ compat_props_add(m->compat_props, hw_compat_rhel_9_5,
|
||||
+ hw_compat_rhel_9_5_len);
|
||||
}
|
||||
|
||||
DEFINE_PC_MACHINE(q35_rhel940, "pc-q35-rhel9.4.0", pc_q35_init_rhel940,
|
||||
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
||||
index ff753a29e0..9ad54682c6 100644
|
||||
--- a/hw/s390x/s390-virtio-ccw.c
|
||||
+++ b/hw/s390x/s390-virtio-ccw.c
|
||||
@@ -1282,6 +1282,7 @@ static void ccw_machine_rhel940_instance_options(MachineState *machine)
|
||||
|
||||
static void ccw_machine_rhel940_class_options(MachineClass *mc)
|
||||
{
|
||||
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_5, hw_compat_rhel_9_5_len);
|
||||
}
|
||||
DEFINE_CCW_MACHINE(rhel940, "rhel9.4.0", true);
|
||||
|
||||
diff --git a/include/hw/boards.h b/include/hw/boards.h
|
||||
index 46b8725c41..cca62f906b 100644
|
||||
--- a/include/hw/boards.h
|
||||
+++ b/include/hw/boards.h
|
||||
@@ -514,6 +514,9 @@ extern const size_t hw_compat_2_2_len;
|
||||
extern GlobalProperty hw_compat_2_1[];
|
||||
extern const size_t hw_compat_2_1_len;
|
||||
|
||||
+extern GlobalProperty hw_compat_rhel_9_5[];
|
||||
+extern const size_t hw_compat_rhel_9_5_len;
|
||||
+
|
||||
extern GlobalProperty hw_compat_rhel_9_4[];
|
||||
extern const size_t hw_compat_rhel_9_4_len;
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,53 +0,0 @@
|
||||
From 78a42cf27aa519bb71214443ab570b40e156fa9c Mon Sep 17 00:00:00 2001
|
||||
From: Kfir Manor <kfir@daynix.com>
|
||||
Date: Sun, 22 Jan 2023 17:33:07 +0200
|
||||
Subject: qga/linux: add usb support to guest-get-fsinfo
|
||||
|
||||
RH-Author: Kostiantyn Kostiuk <kkostiuk@redhat.com>
|
||||
RH-MergeRequest: 140: qga/linux: add usb support to guest-get-fsinfo
|
||||
RH-Bugzilla: 2149191
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: yvugenfi <None>
|
||||
RH-Commit: [1/1] bae929a2d0d0ad20e7308ede69c26499fc2119c7 (kostyanf14/redhat_centos-stream_src_qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2149191
|
||||
Upstream patch: https://patchew.org/QEMU/20230122153307.1050593-1-kfir@daynix.com/
|
||||
|
||||
Signed-off-by: Kfir Manor <kfir@daynix.com>
|
||||
Reviewed-by: Konstantin Kostiuk <kkostiuk@redhat.com>
|
||||
Signed-off-by: Konstantin Kostiuk <kkostiuk@redhat.com>
|
||||
|
||||
Patch-name: kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch
|
||||
Patch-id: 72
|
||||
Patch-present-in-specfile: True
|
||||
---
|
||||
qga/commands-posix.c | 6 +++++-
|
||||
1 file changed, 5 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
|
||||
index 079689d79a..97754930c1 100644
|
||||
--- a/qga/commands-posix.c
|
||||
+++ b/qga/commands-posix.c
|
||||
@@ -879,7 +879,9 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
|
||||
g_str_equal(driver, "sym53c8xx") ||
|
||||
g_str_equal(driver, "virtio-pci") ||
|
||||
g_str_equal(driver, "ahci") ||
|
||||
- g_str_equal(driver, "nvme"))) {
|
||||
+ g_str_equal(driver, "nvme") ||
|
||||
+ g_str_equal(driver, "xhci_hcd") ||
|
||||
+ g_str_equal(driver, "ehci-pci"))) {
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -976,6 +978,8 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
|
||||
}
|
||||
} else if (strcmp(driver, "nvme") == 0) {
|
||||
disk->bus_type = GUEST_DISK_BUS_TYPE_NVME;
|
||||
+ } else if (strcmp(driver, "ehci-pci") == 0 || strcmp(driver, "xhci_hcd") == 0) {
|
||||
+ disk->bus_type = GUEST_DISK_BUS_TYPE_USB;
|
||||
} else {
|
||||
g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath);
|
||||
goto cleanup;
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,110 +0,0 @@
|
||||
From bd5d81d2865c239ffea0fecf32476732149ad05c Mon Sep 17 00:00:00 2001
|
||||
From: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
Date: Wed, 15 Feb 2023 02:03:17 -0500
|
||||
Subject: Add RHEL 9.2.0 compat structure
|
||||
|
||||
Adding compatibility bits necessary to keep 9.2.0 machine
|
||||
types same after rebase to 8.0.
|
||||
|
||||
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
|
||||
Rebase notes (8.0.0 rc4):
|
||||
- Added migration.x-preempt-pre-7-2 compat)
|
||||
---
|
||||
hw/arm/virt.c | 1 +
|
||||
hw/core/machine.c | 10 ++++++++++
|
||||
hw/i386/pc_piix.c | 2 ++
|
||||
hw/i386/pc_q35.c | 3 +++
|
||||
hw/s390x/s390-virtio-ccw.c | 1 +
|
||||
include/hw/boards.h | 3 +++
|
||||
6 files changed, 20 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 1ae1654be5..9be53e9355 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3669,6 +3669,7 @@ type_init(rhel_machine_init);
|
||||
static void rhel920_virt_options(MachineClass *mc)
|
||||
{
|
||||
compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
|
||||
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
|
||||
}
|
||||
DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 5aa567fad3..0e0120b7f2 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -52,6 +52,16 @@ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
|
||||
const char *rhel_old_machine_deprecation =
|
||||
"machine types for previous major releases are deprecated";
|
||||
|
||||
+GlobalProperty hw_compat_rhel_9_2[] = {
|
||||
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
+ { "e1000e", "migrate-timadj", "off" },
|
||||
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
+ { "virtio-mem", "x-early-migration", "false" },
|
||||
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
+ { "migration", "x-preempt-pre-7-2", "true" },
|
||||
+};
|
||||
+const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2);
|
||||
+
|
||||
/*
|
||||
* Mostly the same as hw_compat_7_0
|
||||
*/
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index 3e330fd36f..90fb6e2e03 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -947,6 +947,8 @@ static void pc_machine_rhel760_options(MachineClass *m)
|
||||
/* From pc_i440fx_5_1_machine_options() */
|
||||
pcmc->pci_root_uid = 1;
|
||||
pcmc->enforce_amd_1tb_hole = false;
|
||||
+ compat_props_add(m->compat_props, hw_compat_rhel_9_2,
|
||||
+ hw_compat_rhel_9_2_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
|
||||
hw_compat_rhel_9_1_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index 98601bb76f..8945b69175 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -701,6 +701,9 @@ static void pc_q35_machine_rhel920_options(MachineClass *m)
|
||||
m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)";
|
||||
pcmc->smbios_stream_product = "RHEL";
|
||||
pcmc->smbios_stream_version = "9.2.0";
|
||||
+
|
||||
+ compat_props_add(m->compat_props, hw_compat_rhel_9_2,
|
||||
+ hw_compat_rhel_9_2_len);
|
||||
}
|
||||
|
||||
DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
|
||||
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
||||
index dcd3b966b0..6a0b93c63d 100644
|
||||
--- a/hw/s390x/s390-virtio-ccw.c
|
||||
+++ b/hw/s390x/s390-virtio-ccw.c
|
||||
@@ -1211,6 +1211,7 @@ static void ccw_machine_rhel920_instance_options(MachineState *machine)
|
||||
|
||||
static void ccw_machine_rhel920_class_options(MachineClass *mc)
|
||||
{
|
||||
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
|
||||
}
|
||||
DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true);
|
||||
|
||||
diff --git a/include/hw/boards.h b/include/hw/boards.h
|
||||
index 5e7446ee40..5f08bd7550 100644
|
||||
--- a/include/hw/boards.h
|
||||
+++ b/include/hw/boards.h
|
||||
@@ -461,6 +461,9 @@ extern const size_t hw_compat_2_2_len;
|
||||
extern GlobalProperty hw_compat_2_1[];
|
||||
extern const size_t hw_compat_2_1_len;
|
||||
|
||||
+extern GlobalProperty hw_compat_rhel_9_2[];
|
||||
+extern const size_t hw_compat_rhel_9_2_len;
|
||||
+
|
||||
extern GlobalProperty hw_compat_rhel_9_1[];
|
||||
extern const size_t hw_compat_rhel_9_1_len;
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,30 @@
|
||||
From ba574acacf679850e337ec2d5e7836b8277cf393 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Ott <sebott@redhat.com>
|
||||
Date: Thu, 18 Apr 2024 15:04:28 +0200
|
||||
Subject: x86: rhel 9.4.0 machine type compat fix
|
||||
|
||||
Fix up the compatibility for 9.4.0. Ensure that pc-q35-rhel9.4.0
|
||||
still uses SMBIOS 3.X by default.
|
||||
|
||||
Signed-off-by: Sebastian Ott <sebott@redhat.com>
|
||||
---
|
||||
hw/i386/pc_q35.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index 2b54944c0f..2f11f9af7d 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -734,6 +734,9 @@ static void pc_q35_machine_rhel940_options(MachineClass *m)
|
||||
pcmc->smbios_stream_product = "RHEL";
|
||||
pcmc->smbios_stream_version = "9.4.0";
|
||||
|
||||
+ /* From pc_q35_8_2_machine_options() - use SMBIOS 3.X by default */
|
||||
+ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64;
|
||||
+
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_5,
|
||||
hw_compat_rhel_9_5_len);
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,76 +0,0 @@
|
||||
From c6eaf73adda2e87fe91c9a3836f45dd58a553e06 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Mon, 27 Mar 2023 15:14:03 +0200
|
||||
Subject: redhat: hw/i386/pc: Update x86 machine type compatibility for QEMU
|
||||
8.0.0 update
|
||||
|
||||
Add pc_rhel_9_2_compat based on upstream pc_compat_7_2.
|
||||
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
hw/i386/pc.c | 6 ++++++
|
||||
hw/i386/pc_piix.c | 2 ++
|
||||
hw/i386/pc_q35.c | 2 ++
|
||||
include/hw/i386/pc.h | 3 +++
|
||||
4 files changed, 13 insertions(+)
|
||||
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index 8abb1f872e..f216922cee 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -429,6 +429,12 @@ GlobalProperty pc_rhel_compat[] = {
|
||||
};
|
||||
const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat);
|
||||
|
||||
+GlobalProperty pc_rhel_9_2_compat[] = {
|
||||
+ /* pc_rhel_9_2_compat from pc_compat_7_2 */
|
||||
+ { "ICH9-LPC", "noreboot", "true" },
|
||||
+};
|
||||
+const size_t pc_rhel_9_2_compat_len = G_N_ELEMENTS(pc_rhel_9_2_compat);
|
||||
+
|
||||
GlobalProperty pc_rhel_9_0_compat[] = {
|
||||
/* pc_rhel_9_0_compat from pc_compat_6_2 */
|
||||
{ "virtio-mem", "unplugged-inaccessible", "off" },
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index 90fb6e2e03..fc704d783f 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -949,6 +949,8 @@ static void pc_machine_rhel760_options(MachineClass *m)
|
||||
pcmc->enforce_amd_1tb_hole = false;
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_2,
|
||||
hw_compat_rhel_9_2_len);
|
||||
+ compat_props_add(m->compat_props, pc_rhel_9_2_compat,
|
||||
+ pc_rhel_9_2_compat_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
|
||||
hw_compat_rhel_9_1_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index 8945b69175..e97655616a 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -704,6 +704,8 @@ static void pc_q35_machine_rhel920_options(MachineClass *m)
|
||||
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_2,
|
||||
hw_compat_rhel_9_2_len);
|
||||
+ compat_props_add(m->compat_props, pc_rhel_9_2_compat,
|
||||
+ pc_rhel_9_2_compat_len);
|
||||
}
|
||||
|
||||
DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
|
||||
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
|
||||
index 4376f64a47..d218ad1628 100644
|
||||
--- a/include/hw/i386/pc.h
|
||||
+++ b/include/hw/i386/pc.h
|
||||
@@ -296,6 +296,9 @@ int pc_machine_kvm_type(MachineState *machine, const char *vm_type);
|
||||
extern GlobalProperty pc_rhel_compat[];
|
||||
extern const size_t pc_rhel_compat_len;
|
||||
|
||||
+extern GlobalProperty pc_rhel_9_2_compat[];
|
||||
+extern const size_t pc_rhel_9_2_compat_len;
|
||||
+
|
||||
extern GlobalProperty pc_rhel_9_0_compat[];
|
||||
extern const size_t pc_rhel_9_0_compat_len;
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,83 +0,0 @@
|
||||
From 8173d2eabaf77312d36b00c618f6770948b80593 Mon Sep 17 00:00:00 2001
|
||||
From: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
Date: Mon, 17 Apr 2023 01:24:18 -0400
|
||||
Subject: Disable unwanted new devices
|
||||
|
||||
QEMU 8.0 adds two new device we do not want to support that can't
|
||||
be disabled using configure switch.
|
||||
|
||||
1) ide-cf - virtual CompactFlash card
|
||||
|
||||
2) i2c-echo - testing echo device
|
||||
|
||||
Use manual disabling of the device by changing code (1) and meson configs (2).
|
||||
|
||||
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
---
|
||||
hw/ide/qdev.c | 9 +++++++++
|
||||
hw/misc/meson.build | 3 ++-
|
||||
2 files changed, 11 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
|
||||
index 1b3b4da01d..454bfa5783 100644
|
||||
--- a/hw/ide/qdev.c
|
||||
+++ b/hw/ide/qdev.c
|
||||
@@ -283,10 +283,13 @@ static void ide_cd_realize(IDEDevice *dev, Error **errp)
|
||||
ide_dev_initfn(dev, IDE_CD, errp);
|
||||
}
|
||||
|
||||
+/* Disabled for Red Hat Enterprise Linux */
|
||||
+#if 0
|
||||
static void ide_cf_realize(IDEDevice *dev, Error **errp)
|
||||
{
|
||||
ide_dev_initfn(dev, IDE_CFATA, errp);
|
||||
}
|
||||
+#endif
|
||||
|
||||
#define DEFINE_IDE_DEV_PROPERTIES() \
|
||||
DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \
|
||||
@@ -346,6 +349,8 @@ static const TypeInfo ide_cd_info = {
|
||||
.class_init = ide_cd_class_init,
|
||||
};
|
||||
|
||||
+/* Disabled for Red Hat Enterprise Linux */
|
||||
+#if 0
|
||||
static Property ide_cf_properties[] = {
|
||||
DEFINE_IDE_DEV_PROPERTIES(),
|
||||
DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf),
|
||||
@@ -371,6 +376,7 @@ static const TypeInfo ide_cf_info = {
|
||||
.instance_size = sizeof(IDEDrive),
|
||||
.class_init = ide_cf_class_init,
|
||||
};
|
||||
+#endif
|
||||
|
||||
static void ide_device_class_init(ObjectClass *klass, void *data)
|
||||
{
|
||||
@@ -396,7 +402,10 @@ static void ide_register_types(void)
|
||||
type_register_static(&ide_bus_info);
|
||||
type_register_static(&ide_hd_info);
|
||||
type_register_static(&ide_cd_info);
|
||||
+/* Disabled for Red Hat Enterprise Linux */
|
||||
+#if 0
|
||||
type_register_static(&ide_cf_info);
|
||||
+#endif
|
||||
type_register_static(&ide_device_type_info);
|
||||
}
|
||||
|
||||
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
|
||||
index a40245ad44..9cc5a61ed7 100644
|
||||
--- a/hw/misc/meson.build
|
||||
+++ b/hw/misc/meson.build
|
||||
@@ -128,7 +128,8 @@ softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_rng.c'))
|
||||
|
||||
softmmu_ss.add(when: 'CONFIG_GRLIB', if_true: files('grlib_ahb_apb_pnp.c'))
|
||||
|
||||
-softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c'))
|
||||
+# Disabled for Red Hat Enterprise Linux
|
||||
+# softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c'))
|
||||
|
||||
specific_ss.add(when: 'CONFIG_AVR_POWER', if_true: files('avr_power.c'))
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,139 @@
|
||||
From 93ea86ac8849ad9ca365b1646313dde9a34ba59c Mon Sep 17 00:00:00 2001
|
||||
From: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Date: Wed, 20 Mar 2024 03:39:03 -0500
|
||||
Subject: [PATCH 031/100] HostMem: Add mechanism to opt in kvm guest memfd via
|
||||
MachineState
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [31/91] 43ce32aef954479cdb736301d1adcb919602c321 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Add a new member "guest_memfd" to memory backends. When it's set
|
||||
to true, it enables RAM_GUEST_MEMFD in ram_flags, thus private kvm
|
||||
guest_memfd will be allocated during RAMBlock allocation.
|
||||
|
||||
Memory backend's @guest_memfd is wired with @require_guest_memfd
|
||||
field of MachineState. It avoid looking up the machine in phymem.c.
|
||||
|
||||
MachineState::require_guest_memfd is supposed to be set by any VMs
|
||||
that requires KVM guest memfd as private memory, e.g., TDX VM.
|
||||
|
||||
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Message-ID: <20240320083945.991426-8-michael.roth@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 37662d85b0b7dded0ebdf6747bef6c3bb7ed6a0c)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
backends/hostmem-file.c | 1 +
|
||||
backends/hostmem-memfd.c | 1 +
|
||||
backends/hostmem-ram.c | 1 +
|
||||
backends/hostmem.c | 1 +
|
||||
hw/core/machine.c | 5 +++++
|
||||
include/hw/boards.h | 2 ++
|
||||
include/sysemu/hostmem.h | 1 +
|
||||
7 files changed, 12 insertions(+)
|
||||
|
||||
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
|
||||
index ac3e433cbd..3c69db7946 100644
|
||||
--- a/backends/hostmem-file.c
|
||||
+++ b/backends/hostmem-file.c
|
||||
@@ -85,6 +85,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
|
||||
ram_flags |= fb->readonly ? RAM_READONLY_FD : 0;
|
||||
ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0;
|
||||
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
|
||||
+ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
|
||||
ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
|
||||
ram_flags |= RAM_NAMED_FILE;
|
||||
return memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
|
||||
diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c
|
||||
index 3923ea9364..745ead0034 100644
|
||||
--- a/backends/hostmem-memfd.c
|
||||
+++ b/backends/hostmem-memfd.c
|
||||
@@ -55,6 +55,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
|
||||
name = host_memory_backend_get_name(backend);
|
||||
ram_flags = backend->share ? RAM_SHARED : 0;
|
||||
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
|
||||
+ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
|
||||
return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
|
||||
backend->size, ram_flags, fd, 0, errp);
|
||||
}
|
||||
diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c
|
||||
index d121249f0f..f7d81af783 100644
|
||||
--- a/backends/hostmem-ram.c
|
||||
+++ b/backends/hostmem-ram.c
|
||||
@@ -30,6 +30,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
|
||||
name = host_memory_backend_get_name(backend);
|
||||
ram_flags = backend->share ? RAM_SHARED : 0;
|
||||
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
|
||||
+ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
|
||||
return memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend),
|
||||
name, backend->size,
|
||||
ram_flags, errp);
|
||||
diff --git a/backends/hostmem.c b/backends/hostmem.c
|
||||
index 81a72ce40b..eb9682b4a8 100644
|
||||
--- a/backends/hostmem.c
|
||||
+++ b/backends/hostmem.c
|
||||
@@ -277,6 +277,7 @@ static void host_memory_backend_init(Object *obj)
|
||||
/* TODO: convert access to globals to compat properties */
|
||||
backend->merge = machine_mem_merge(machine);
|
||||
backend->dump = machine_dump_guest_core(machine);
|
||||
+ backend->guest_memfd = machine_require_guest_memfd(machine);
|
||||
backend->reserve = true;
|
||||
backend->prealloc_threads = machine->smp.cpus;
|
||||
}
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 92609aae27..07b994e136 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -1480,6 +1480,11 @@ bool machine_mem_merge(MachineState *machine)
|
||||
return machine->mem_merge;
|
||||
}
|
||||
|
||||
+bool machine_require_guest_memfd(MachineState *machine)
|
||||
+{
|
||||
+ return machine->require_guest_memfd;
|
||||
+}
|
||||
+
|
||||
static char *cpu_slot_to_string(const CPUArchId *cpu)
|
||||
{
|
||||
GString *s = g_string_new(NULL);
|
||||
diff --git a/include/hw/boards.h b/include/hw/boards.h
|
||||
index cca62f906b..815a1c4b26 100644
|
||||
--- a/include/hw/boards.h
|
||||
+++ b/include/hw/boards.h
|
||||
@@ -36,6 +36,7 @@ bool machine_usb(MachineState *machine);
|
||||
int machine_phandle_start(MachineState *machine);
|
||||
bool machine_dump_guest_core(MachineState *machine);
|
||||
bool machine_mem_merge(MachineState *machine);
|
||||
+bool machine_require_guest_memfd(MachineState *machine);
|
||||
HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine);
|
||||
void machine_set_cpu_numa_node(MachineState *machine,
|
||||
const CpuInstanceProperties *props,
|
||||
@@ -372,6 +373,7 @@ struct MachineState {
|
||||
char *dt_compatible;
|
||||
bool dump_guest_core;
|
||||
bool mem_merge;
|
||||
+ bool require_guest_memfd;
|
||||
bool usb;
|
||||
bool usb_disabled;
|
||||
char *firmware;
|
||||
diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h
|
||||
index 0e411aaa29..04b884bf42 100644
|
||||
--- a/include/sysemu/hostmem.h
|
||||
+++ b/include/sysemu/hostmem.h
|
||||
@@ -74,6 +74,7 @@ struct HostMemoryBackend {
|
||||
uint64_t size;
|
||||
bool merge, dump, use_canonical_path;
|
||||
bool prealloc, is_mapped, share, reserve;
|
||||
+ bool guest_memfd;
|
||||
uint32_t prealloc_threads;
|
||||
ThreadContext *prealloc_context;
|
||||
DECLARE_BITMAP(host_nodes, MAX_NODES + 1);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,203 @@
|
||||
From c46ac3db0a4db60e667edeabc9ed451c6e8e0ccf Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Mon, 18 Mar 2024 14:41:33 -0400
|
||||
Subject: [PATCH 020/100] KVM: remove kvm_arch_cpu_check_are_resettable
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [20/91] d7745bd1a0ed1b215847f150f4a1bb2e912beabc (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Board reset requires writing a fresh CPU state. As far as KVM is
|
||||
concerned, the only thing that blocks reset is that CPU state is
|
||||
encrypted; therefore, kvm_cpus_are_resettable() can simply check
|
||||
if that is the case.
|
||||
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit a99c0c66ebe7d8db3af6f16689ade9375247e43e)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
accel/kvm/kvm-accel-ops.c | 2 +-
|
||||
accel/kvm/kvm-all.c | 5 -----
|
||||
include/sysemu/kvm.h | 10 ----------
|
||||
target/arm/kvm.c | 5 -----
|
||||
target/i386/kvm/kvm.c | 5 -----
|
||||
target/loongarch/kvm/kvm.c | 5 -----
|
||||
target/mips/kvm.c | 5 -----
|
||||
target/ppc/kvm.c | 5 -----
|
||||
target/riscv/kvm/kvm-cpu.c | 5 -----
|
||||
target/s390x/kvm/kvm.c | 5 -----
|
||||
10 files changed, 1 insertion(+), 51 deletions(-)
|
||||
|
||||
diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c
|
||||
index b3c946dc4b..74e3c5785b 100644
|
||||
--- a/accel/kvm/kvm-accel-ops.c
|
||||
+++ b/accel/kvm/kvm-accel-ops.c
|
||||
@@ -82,7 +82,7 @@ static bool kvm_vcpu_thread_is_idle(CPUState *cpu)
|
||||
|
||||
static bool kvm_cpus_are_resettable(void)
|
||||
{
|
||||
- return !kvm_enabled() || kvm_cpu_check_are_resettable();
|
||||
+ return !kvm_enabled() || !kvm_state->guest_state_protected;
|
||||
}
|
||||
|
||||
#ifdef KVM_CAP_SET_GUEST_DEBUG
|
||||
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||
index ec0f6df7c5..b51e09a583 100644
|
||||
--- a/accel/kvm/kvm-all.c
|
||||
+++ b/accel/kvm/kvm-all.c
|
||||
@@ -2696,11 +2696,6 @@ void kvm_flush_coalesced_mmio_buffer(void)
|
||||
s->coalesced_flush_in_progress = false;
|
||||
}
|
||||
|
||||
-bool kvm_cpu_check_are_resettable(void)
|
||||
-{
|
||||
- return kvm_arch_cpu_check_are_resettable();
|
||||
-}
|
||||
-
|
||||
static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
|
||||
{
|
||||
if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) {
|
||||
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
|
||||
index 302e8f6f1e..54f4d83a37 100644
|
||||
--- a/include/sysemu/kvm.h
|
||||
+++ b/include/sysemu/kvm.h
|
||||
@@ -525,16 +525,6 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target);
|
||||
/* Notify resamplefd for EOI of specific interrupts. */
|
||||
void kvm_resample_fd_notify(int gsi);
|
||||
|
||||
-/**
|
||||
- * kvm_cpu_check_are_resettable - return whether CPUs can be reset
|
||||
- *
|
||||
- * Returns: true: CPUs are resettable
|
||||
- * false: CPUs are not resettable
|
||||
- */
|
||||
-bool kvm_cpu_check_are_resettable(void);
|
||||
-
|
||||
-bool kvm_arch_cpu_check_are_resettable(void);
|
||||
-
|
||||
bool kvm_dirty_ring_enabled(void);
|
||||
|
||||
uint32_t kvm_dirty_ring_size(void);
|
||||
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
|
||||
index ab85d628a8..21ebbf3b8f 100644
|
||||
--- a/target/arm/kvm.c
|
||||
+++ b/target/arm/kvm.c
|
||||
@@ -1598,11 +1598,6 @@ int kvm_arch_msi_data_to_gsi(uint32_t data)
|
||||
return (data - 32) & 0xffff;
|
||||
}
|
||||
|
||||
-bool kvm_arch_cpu_check_are_resettable(void)
|
||||
-{
|
||||
- return true;
|
||||
-}
|
||||
-
|
||||
static void kvm_arch_get_eager_split_size(Object *obj, Visitor *v,
|
||||
const char *name, void *opaque,
|
||||
Error **errp)
|
||||
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
|
||||
index e271652620..a12207a8ee 100644
|
||||
--- a/target/i386/kvm/kvm.c
|
||||
+++ b/target/i386/kvm/kvm.c
|
||||
@@ -5623,11 +5623,6 @@ bool kvm_has_waitpkg(void)
|
||||
return has_msr_umwait;
|
||||
}
|
||||
|
||||
-bool kvm_arch_cpu_check_are_resettable(void)
|
||||
-{
|
||||
- return !sev_es_enabled();
|
||||
-}
|
||||
-
|
||||
#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
|
||||
|
||||
void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
|
||||
diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c
|
||||
index d630cc39cb..8224d94333 100644
|
||||
--- a/target/loongarch/kvm/kvm.c
|
||||
+++ b/target/loongarch/kvm/kvm.c
|
||||
@@ -733,11 +733,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs)
|
||||
return true;
|
||||
}
|
||||
|
||||
-bool kvm_arch_cpu_check_are_resettable(void)
|
||||
-{
|
||||
- return true;
|
||||
-}
|
||||
-
|
||||
int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
|
||||
{
|
||||
int ret = 0;
|
||||
diff --git a/target/mips/kvm.c b/target/mips/kvm.c
|
||||
index 6c52e59f55..a631ab544f 100644
|
||||
--- a/target/mips/kvm.c
|
||||
+++ b/target/mips/kvm.c
|
||||
@@ -1273,11 +1273,6 @@ int kvm_arch_get_default_type(MachineState *machine)
|
||||
return -1;
|
||||
}
|
||||
|
||||
-bool kvm_arch_cpu_check_are_resettable(void)
|
||||
-{
|
||||
- return true;
|
||||
-}
|
||||
-
|
||||
void kvm_arch_accel_class_init(ObjectClass *oc)
|
||||
{
|
||||
}
|
||||
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
|
||||
index 59f640cf7b..9d9d9f0d79 100644
|
||||
--- a/target/ppc/kvm.c
|
||||
+++ b/target/ppc/kvm.c
|
||||
@@ -2968,11 +2968,6 @@ void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset)
|
||||
}
|
||||
}
|
||||
|
||||
-bool kvm_arch_cpu_check_are_resettable(void)
|
||||
-{
|
||||
- return true;
|
||||
-}
|
||||
-
|
||||
void kvm_arch_accel_class_init(ObjectClass *oc)
|
||||
{
|
||||
}
|
||||
diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
|
||||
index 6a6c6cae80..49d2f3ad58 100644
|
||||
--- a/target/riscv/kvm/kvm-cpu.c
|
||||
+++ b/target/riscv/kvm/kvm-cpu.c
|
||||
@@ -1475,11 +1475,6 @@ void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level)
|
||||
}
|
||||
}
|
||||
|
||||
-bool kvm_arch_cpu_check_are_resettable(void)
|
||||
-{
|
||||
- return true;
|
||||
-}
|
||||
-
|
||||
static int aia_mode;
|
||||
|
||||
static const char *kvm_aia_mode_str(uint64_t mode)
|
||||
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
|
||||
index 55fb4855b1..4db59658e1 100644
|
||||
--- a/target/s390x/kvm/kvm.c
|
||||
+++ b/target/s390x/kvm/kvm.c
|
||||
@@ -2630,11 +2630,6 @@ void kvm_s390_stop_interrupt(S390CPU *cpu)
|
||||
kvm_s390_vcpu_interrupt(cpu, &irq);
|
||||
}
|
||||
|
||||
-bool kvm_arch_cpu_check_are_resettable(void)
|
||||
-{
|
||||
- return true;
|
||||
-}
|
||||
-
|
||||
int kvm_s390_get_zpci_op(void)
|
||||
{
|
||||
return cap_zpci_op;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,127 @@
|
||||
From 50399796da938c4ea7c69058fde84695bce9d794 Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Mon, 18 Mar 2024 14:41:10 -0400
|
||||
Subject: [PATCH 019/100] KVM: track whether guest state is encrypted
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [19/91] 685b9c54d43d0043d15c33d13afc3a420cbe139b (bonzini/rhel-qemu-kvm)
|
||||
|
||||
So far, KVM has allowed KVM_GET/SET_* ioctls to execute even if the
|
||||
guest state is encrypted, in which case they do nothing. For the new
|
||||
API using VM types, instead, the ioctls will fail which is a safer and
|
||||
more robust approach.
|
||||
|
||||
The new API will be the only one available for SEV-SNP and TDX, but it
|
||||
is also usable for SEV and SEV-ES. In preparation for that, require
|
||||
architecture-specific KVM code to communicate the point at which guest
|
||||
state is protected (which must be after kvm_cpu_synchronize_post_init(),
|
||||
though that might change in the future in order to suppor migration).
|
||||
From that point, skip reading registers so that cpu->vcpu_dirty is
|
||||
never true: if it ever becomes true, kvm_arch_put_registers() will
|
||||
fail miserably.
|
||||
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 5c3131c392f84c660033d511ec39872d8beb4b1e)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
accel/kvm/kvm-all.c | 17 ++++++++++++++---
|
||||
include/sysemu/kvm.h | 2 ++
|
||||
include/sysemu/kvm_int.h | 1 +
|
||||
target/i386/sev.c | 1 +
|
||||
4 files changed, 18 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||
index 931f74256e..ec0f6df7c5 100644
|
||||
--- a/accel/kvm/kvm-all.c
|
||||
+++ b/accel/kvm/kvm-all.c
|
||||
@@ -2703,7 +2703,7 @@ bool kvm_cpu_check_are_resettable(void)
|
||||
|
||||
static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
|
||||
{
|
||||
- if (!cpu->vcpu_dirty) {
|
||||
+ if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) {
|
||||
int ret = kvm_arch_get_registers(cpu);
|
||||
if (ret) {
|
||||
error_report("Failed to get registers: %s", strerror(-ret));
|
||||
@@ -2717,7 +2717,7 @@ static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
|
||||
|
||||
void kvm_cpu_synchronize_state(CPUState *cpu)
|
||||
{
|
||||
- if (!cpu->vcpu_dirty) {
|
||||
+ if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) {
|
||||
run_on_cpu(cpu, do_kvm_cpu_synchronize_state, RUN_ON_CPU_NULL);
|
||||
}
|
||||
}
|
||||
@@ -2752,7 +2752,13 @@ static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
|
||||
|
||||
void kvm_cpu_synchronize_post_init(CPUState *cpu)
|
||||
{
|
||||
- run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
|
||||
+ if (!kvm_state->guest_state_protected) {
|
||||
+ /*
|
||||
+ * This runs before the machine_init_done notifiers, and is the last
|
||||
+ * opportunity to synchronize the state of confidential guests.
|
||||
+ */
|
||||
+ run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
|
||||
+ }
|
||||
}
|
||||
|
||||
static void do_kvm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
|
||||
@@ -4099,3 +4105,8 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp)
|
||||
query_stats_schema_vcpu(first_cpu, &stats_args);
|
||||
}
|
||||
}
|
||||
+
|
||||
+void kvm_mark_guest_state_protected(void)
|
||||
+{
|
||||
+ kvm_state->guest_state_protected = true;
|
||||
+}
|
||||
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
|
||||
index fad9a7e8ff..302e8f6f1e 100644
|
||||
--- a/include/sysemu/kvm.h
|
||||
+++ b/include/sysemu/kvm.h
|
||||
@@ -539,6 +539,8 @@ bool kvm_dirty_ring_enabled(void);
|
||||
|
||||
uint32_t kvm_dirty_ring_size(void);
|
||||
|
||||
+void kvm_mark_guest_state_protected(void);
|
||||
+
|
||||
/**
|
||||
* kvm_hwpoisoned_mem - indicate if there is any hwpoisoned page
|
||||
* reported for the VM.
|
||||
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
|
||||
index 882e37e12c..3496be7997 100644
|
||||
--- a/include/sysemu/kvm_int.h
|
||||
+++ b/include/sysemu/kvm_int.h
|
||||
@@ -87,6 +87,7 @@ struct KVMState
|
||||
bool kernel_irqchip_required;
|
||||
OnOffAuto kernel_irqchip_split;
|
||||
bool sync_mmu;
|
||||
+ bool guest_state_protected;
|
||||
uint64_t manual_dirty_log_protect;
|
||||
/* The man page (and posix) say ioctl numbers are signed int, but
|
||||
* they're not. Linux, glibc and *BSD all treat ioctl numbers as
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index b8f79d34d1..c49a8fd55e 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -755,6 +755,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused)
|
||||
if (ret) {
|
||||
exit(1);
|
||||
}
|
||||
+ kvm_mark_guest_state_protected();
|
||||
}
|
||||
|
||||
/* query the measurement blob length */
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,329 @@
|
||||
From f4b01d645926faab2cab86fadb7398c26d6b8285 Mon Sep 17 00:00:00 2001
|
||||
From: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Date: Wed, 20 Mar 2024 03:39:02 -0500
|
||||
Subject: [PATCH 028/100] RAMBlock: Add support of KVM private guest memfd
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [28/91] 95fdf196afcb67113834c20fa354ee1397411bfd (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Add KVM guest_memfd support to RAMBlock so both normal hva based memory
|
||||
and kvm guest memfd based private memory can be associated in one RAMBlock.
|
||||
|
||||
Introduce new flag RAM_GUEST_MEMFD. When it's set, it calls KVM ioctl to
|
||||
create private guest_memfd during RAMBlock setup.
|
||||
|
||||
Allocating a new RAM_GUEST_MEMFD flag to instruct the setup of guest memfd
|
||||
is more flexible and extensible than simply relying on the VM type because
|
||||
in the future we may have the case that not all the memory of a VM need
|
||||
guest memfd. As a benefit, it also avoid getting MachineState in memory
|
||||
subsystem.
|
||||
|
||||
Note, RAM_GUEST_MEMFD is supposed to be set for memory backends of
|
||||
confidential guests, such as TDX VM. How and when to set it for memory
|
||||
backends will be implemented in the following patches.
|
||||
|
||||
Introduce memory_region_has_guest_memfd() to query if the MemoryRegion has
|
||||
KVM guest_memfd allocated.
|
||||
|
||||
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Message-ID: <20240320083945.991426-7-michael.roth@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 15f7a80c49cb3637f62fa37fa4a17da913bd91ff)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
accel/kvm/kvm-all.c | 28 ++++++++++++++++++++++++++++
|
||||
accel/stubs/kvm-stub.c | 5 +++++
|
||||
include/exec/memory.h | 20 +++++++++++++++++---
|
||||
include/exec/ram_addr.h | 2 +-
|
||||
include/exec/ramblock.h | 1 +
|
||||
include/sysemu/kvm.h | 2 ++
|
||||
system/memory.c | 5 +++++
|
||||
system/physmem.c | 34 +++++++++++++++++++++++++++++++---
|
||||
8 files changed, 90 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||
index 272e945f52..a7b9a127dd 100644
|
||||
--- a/accel/kvm/kvm-all.c
|
||||
+++ b/accel/kvm/kvm-all.c
|
||||
@@ -92,6 +92,7 @@ static bool kvm_has_guest_debug;
|
||||
static int kvm_sstep_flags;
|
||||
static bool kvm_immediate_exit;
|
||||
static uint64_t kvm_supported_memory_attributes;
|
||||
+static bool kvm_guest_memfd_supported;
|
||||
static hwaddr kvm_max_slot_size = ~0;
|
||||
|
||||
static const KVMCapabilityInfo kvm_required_capabilites[] = {
|
||||
@@ -2419,6 +2420,11 @@ static int kvm_init(MachineState *ms)
|
||||
}
|
||||
|
||||
kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES);
|
||||
+ kvm_guest_memfd_supported =
|
||||
+ kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) &&
|
||||
+ kvm_check_extension(s, KVM_CAP_USER_MEMORY2) &&
|
||||
+ (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE);
|
||||
+
|
||||
kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT);
|
||||
s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS);
|
||||
|
||||
@@ -4138,3 +4144,25 @@ void kvm_mark_guest_state_protected(void)
|
||||
{
|
||||
kvm_state->guest_state_protected = true;
|
||||
}
|
||||
+
|
||||
+int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp)
|
||||
+{
|
||||
+ int fd;
|
||||
+ struct kvm_create_guest_memfd guest_memfd = {
|
||||
+ .size = size,
|
||||
+ .flags = flags,
|
||||
+ };
|
||||
+
|
||||
+ if (!kvm_guest_memfd_supported) {
|
||||
+ error_setg(errp, "KVM does not support guest_memfd");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &guest_memfd);
|
||||
+ if (fd < 0) {
|
||||
+ error_setg_errno(errp, errno, "Error creating KVM guest_memfd");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return fd;
|
||||
+}
|
||||
diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c
|
||||
index ca38172884..8e0eb22e61 100644
|
||||
--- a/accel/stubs/kvm-stub.c
|
||||
+++ b/accel/stubs/kvm-stub.c
|
||||
@@ -129,3 +129,8 @@ bool kvm_hwpoisoned_mem(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
+
|
||||
+int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp)
|
||||
+{
|
||||
+ return -ENOSYS;
|
||||
+}
|
||||
diff --git a/include/exec/memory.h b/include/exec/memory.h
|
||||
index 8626a355b3..679a847685 100644
|
||||
--- a/include/exec/memory.h
|
||||
+++ b/include/exec/memory.h
|
||||
@@ -243,6 +243,9 @@ typedef struct IOMMUTLBEvent {
|
||||
/* RAM FD is opened read-only */
|
||||
#define RAM_READONLY_FD (1 << 11)
|
||||
|
||||
+/* RAM can be private that has kvm guest memfd backend */
|
||||
+#define RAM_GUEST_MEMFD (1 << 12)
|
||||
+
|
||||
static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
|
||||
IOMMUNotifierFlag flags,
|
||||
hwaddr start, hwaddr end,
|
||||
@@ -1307,7 +1310,8 @@ bool memory_region_init_ram_nomigrate(MemoryRegion *mr,
|
||||
* @name: Region name, becomes part of RAMBlock name used in migration stream
|
||||
* must be unique within any device
|
||||
* @size: size of the region.
|
||||
- * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE.
|
||||
+ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE,
|
||||
+ * RAM_GUEST_MEMFD.
|
||||
* @errp: pointer to Error*, to store an error if it happens.
|
||||
*
|
||||
* Note that this function does not do anything to cause the data in the
|
||||
@@ -1369,7 +1373,7 @@ bool memory_region_init_resizeable_ram(MemoryRegion *mr,
|
||||
* (getpagesize()) will be used.
|
||||
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
|
||||
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
|
||||
- * RAM_READONLY_FD
|
||||
+ * RAM_READONLY_FD, RAM_GUEST_MEMFD
|
||||
* @path: the path in which to allocate the RAM.
|
||||
* @offset: offset within the file referenced by path
|
||||
* @errp: pointer to Error*, to store an error if it happens.
|
||||
@@ -1399,7 +1403,7 @@ bool memory_region_init_ram_from_file(MemoryRegion *mr,
|
||||
* @size: size of the region.
|
||||
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
|
||||
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
|
||||
- * RAM_READONLY_FD
|
||||
+ * RAM_READONLY_FD, RAM_GUEST_MEMFD
|
||||
* @fd: the fd to mmap.
|
||||
* @offset: offset within the file referenced by fd
|
||||
* @errp: pointer to Error*, to store an error if it happens.
|
||||
@@ -1722,6 +1726,16 @@ static inline bool memory_region_is_romd(MemoryRegion *mr)
|
||||
*/
|
||||
bool memory_region_is_protected(MemoryRegion *mr);
|
||||
|
||||
+/**
|
||||
+ * memory_region_has_guest_memfd: check whether a memory region has guest_memfd
|
||||
+ * associated
|
||||
+ *
|
||||
+ * Returns %true if a memory region's ram_block has valid guest_memfd assigned.
|
||||
+ *
|
||||
+ * @mr: the memory region being queried
|
||||
+ */
|
||||
+bool memory_region_has_guest_memfd(MemoryRegion *mr);
|
||||
+
|
||||
/**
|
||||
* memory_region_get_iommu: check whether a memory region is an iommu
|
||||
*
|
||||
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
|
||||
index de45ba7bc9..07c8f86375 100644
|
||||
--- a/include/exec/ram_addr.h
|
||||
+++ b/include/exec/ram_addr.h
|
||||
@@ -110,7 +110,7 @@ long qemu_maxrampagesize(void);
|
||||
* @mr: the memory region where the ram block is
|
||||
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
|
||||
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
|
||||
- * RAM_READONLY_FD
|
||||
+ * RAM_READONLY_FD, RAM_GUEST_MEMFD
|
||||
* @mem_path or @fd: specify the backing file or device
|
||||
* @offset: Offset into target file
|
||||
* @errp: pointer to Error*, to store an error if it happens
|
||||
diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h
|
||||
index 848915ea5b..459c8917de 100644
|
||||
--- a/include/exec/ramblock.h
|
||||
+++ b/include/exec/ramblock.h
|
||||
@@ -41,6 +41,7 @@ struct RAMBlock {
|
||||
QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
|
||||
int fd;
|
||||
uint64_t fd_offset;
|
||||
+ int guest_memfd;
|
||||
size_t page_size;
|
||||
/* dirty bitmap used during migration */
|
||||
unsigned long *bmap;
|
||||
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
|
||||
index f114ff6986..9e4ab7ae89 100644
|
||||
--- a/include/sysemu/kvm.h
|
||||
+++ b/include/sysemu/kvm.h
|
||||
@@ -537,6 +537,8 @@ void kvm_mark_guest_state_protected(void);
|
||||
*/
|
||||
bool kvm_hwpoisoned_mem(void);
|
||||
|
||||
+int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp);
|
||||
+
|
||||
int kvm_set_memory_attributes_private(hwaddr start, uint64_t size);
|
||||
int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size);
|
||||
|
||||
diff --git a/system/memory.c b/system/memory.c
|
||||
index a229a79988..c756950c0c 100644
|
||||
--- a/system/memory.c
|
||||
+++ b/system/memory.c
|
||||
@@ -1850,6 +1850,11 @@ bool memory_region_is_protected(MemoryRegion *mr)
|
||||
return mr->ram && (mr->ram_block->flags & RAM_PROTECTED);
|
||||
}
|
||||
|
||||
+bool memory_region_has_guest_memfd(MemoryRegion *mr)
|
||||
+{
|
||||
+ return mr->ram_block && mr->ram_block->guest_memfd >= 0;
|
||||
+}
|
||||
+
|
||||
uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr)
|
||||
{
|
||||
uint8_t mask = mr->dirty_log_mask;
|
||||
diff --git a/system/physmem.c b/system/physmem.c
|
||||
index a4fe3d2bf8..f5dfa20e57 100644
|
||||
--- a/system/physmem.c
|
||||
+++ b/system/physmem.c
|
||||
@@ -1808,6 +1808,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
|
||||
const bool shared = qemu_ram_is_shared(new_block);
|
||||
RAMBlock *block;
|
||||
RAMBlock *last_block = NULL;
|
||||
+ bool free_on_error = false;
|
||||
ram_addr_t old_ram_size, new_ram_size;
|
||||
Error *err = NULL;
|
||||
|
||||
@@ -1837,6 +1838,19 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
|
||||
return;
|
||||
}
|
||||
memory_try_enable_merging(new_block->host, new_block->max_length);
|
||||
+ free_on_error = true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (new_block->flags & RAM_GUEST_MEMFD) {
|
||||
+ assert(kvm_enabled());
|
||||
+ assert(new_block->guest_memfd < 0);
|
||||
+
|
||||
+ new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length,
|
||||
+ 0, errp);
|
||||
+ if (new_block->guest_memfd < 0) {
|
||||
+ qemu_mutex_unlock_ramlist();
|
||||
+ goto out_free;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1888,6 +1902,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
|
||||
ram_block_notify_add(new_block->host, new_block->used_length,
|
||||
new_block->max_length);
|
||||
}
|
||||
+ return;
|
||||
+
|
||||
+out_free:
|
||||
+ if (free_on_error) {
|
||||
+ qemu_anon_ram_free(new_block->host, new_block->max_length);
|
||||
+ new_block->host = NULL;
|
||||
+ }
|
||||
}
|
||||
|
||||
#ifdef CONFIG_POSIX
|
||||
@@ -1902,7 +1923,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
|
||||
/* Just support these ram flags by now. */
|
||||
assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
|
||||
RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
|
||||
- RAM_READONLY_FD)) == 0);
|
||||
+ RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0);
|
||||
|
||||
if (xen_enabled()) {
|
||||
error_setg(errp, "-mem-path not supported with Xen");
|
||||
@@ -1939,6 +1960,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
|
||||
new_block->used_length = size;
|
||||
new_block->max_length = size;
|
||||
new_block->flags = ram_flags;
|
||||
+ new_block->guest_memfd = -1;
|
||||
new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset,
|
||||
errp);
|
||||
if (!new_block->host) {
|
||||
@@ -2018,7 +2040,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
|
||||
int align;
|
||||
|
||||
assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC |
|
||||
- RAM_NORESERVE)) == 0);
|
||||
+ RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0);
|
||||
assert(!host ^ (ram_flags & RAM_PREALLOC));
|
||||
|
||||
align = qemu_real_host_page_size();
|
||||
@@ -2033,6 +2055,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
|
||||
new_block->max_length = max_size;
|
||||
assert(max_size >= size);
|
||||
new_block->fd = -1;
|
||||
+ new_block->guest_memfd = -1;
|
||||
new_block->page_size = qemu_real_host_page_size();
|
||||
new_block->host = host;
|
||||
new_block->flags = ram_flags;
|
||||
@@ -2055,7 +2078,7 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
|
||||
RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags,
|
||||
MemoryRegion *mr, Error **errp)
|
||||
{
|
||||
- assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0);
|
||||
+ assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0);
|
||||
return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp);
|
||||
}
|
||||
|
||||
@@ -2083,6 +2106,11 @@ static void reclaim_ramblock(RAMBlock *block)
|
||||
} else {
|
||||
qemu_anon_ram_free(block->host, block->max_length);
|
||||
}
|
||||
+
|
||||
+ if (block->guest_memfd >= 0) {
|
||||
+ close(block->guest_memfd);
|
||||
+ }
|
||||
+
|
||||
g_free(block);
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,82 @@
|
||||
From bd289293604d6f33e9fb89196f0b19117ce81f89 Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Wed, 20 Mar 2024 17:45:29 +0100
|
||||
Subject: [PATCH 032/100] RAMBlock: make guest_memfd require uncoordinated
|
||||
discard
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [32/91] 0c005849026c334737b88cbd20a0ac237dfca37e (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Some subsystems like VFIO might disable ram block discard, but guest_memfd
|
||||
uses discard operations to implement conversions between private and
|
||||
shared memory. Because of this, sequences like the following can result
|
||||
in stale IOMMU mappings:
|
||||
|
||||
1. allocate shared page
|
||||
2. convert page shared->private
|
||||
3. discard shared page
|
||||
4. convert page private->shared
|
||||
5. allocate shared page
|
||||
6. issue DMA operations against that shared page
|
||||
|
||||
This is not a use-after-free, because after step 3 VFIO is still pinning
|
||||
the page. However, DMA operations in step 6 will hit the old mapping
|
||||
that was allocated in step 1.
|
||||
|
||||
Address this by taking ram_block_discard_is_enabled() into account when
|
||||
deciding whether or not to discard pages.
|
||||
|
||||
Since kvm_convert_memory()/guest_memfd doesn't implement a
|
||||
RamDiscardManager handler to convey and replay discard operations,
|
||||
this is a case of uncoordinated discard, which is blocked/released
|
||||
by ram_block_discard_require(). Interestingly, this function had
|
||||
no use so far.
|
||||
|
||||
Alternative approaches would be to block discard of shared pages, but
|
||||
this would cause guests to consume twice the memory if they use VFIO;
|
||||
or to implement a RamDiscardManager and only block uncoordinated
|
||||
discard, i.e. use ram_block_coordinated_discard_require().
|
||||
|
||||
[Commit message mostly by Michael Roth <michael.roth@amd.com>]
|
||||
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 852f0048f3ea9f14de18eb279a99fccb6d250e8f)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
system/physmem.c | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/system/physmem.c b/system/physmem.c
|
||||
index f5dfa20e57..5ebcf5be11 100644
|
||||
--- a/system/physmem.c
|
||||
+++ b/system/physmem.c
|
||||
@@ -1846,6 +1846,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
|
||||
assert(kvm_enabled());
|
||||
assert(new_block->guest_memfd < 0);
|
||||
|
||||
+ if (ram_block_discard_require(true) < 0) {
|
||||
+ error_setg_errno(errp, errno,
|
||||
+ "cannot set up private guest memory: discard currently blocked");
|
||||
+ error_append_hint(errp, "Are you using assigned devices?\n");
|
||||
+ goto out_free;
|
||||
+ }
|
||||
+
|
||||
new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length,
|
||||
0, errp);
|
||||
if (new_block->guest_memfd < 0) {
|
||||
@@ -2109,6 +2116,7 @@ static void reclaim_ramblock(RAMBlock *block)
|
||||
|
||||
if (block->guest_memfd >= 0) {
|
||||
close(block->guest_memfd);
|
||||
+ ram_block_discard_require(false);
|
||||
}
|
||||
|
||||
g_free(block);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,67 @@
|
||||
From d4e6f7105b00ba2536d5d733b7c03116f28ce116 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Mon, 6 May 2024 15:06:21 -0400
|
||||
Subject: [PATCH 2/5] Revert "monitor: use aio_co_reschedule_self()"
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 248: Revert "monitor: use aio_co_reschedule_self()"
|
||||
RH-Jira: RHEL-34618 RHEL-38697
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/2] b6a2ebd4a69dbcd2bd56c61e7c747f8f8f42337e (kmwolf/centos-qemu-kvm)
|
||||
|
||||
Commit 1f25c172f837 ("monitor: use aio_co_reschedule_self()") was a code
|
||||
cleanup that uses aio_co_reschedule_self() instead of open coding
|
||||
coroutine rescheduling.
|
||||
|
||||
Bug RHEL-34618 was reported and Kevin Wolf <kwolf@redhat.com> identified
|
||||
the root cause. I missed that aio_co_reschedule_self() ->
|
||||
qemu_get_current_aio_context() only knows about
|
||||
qemu_aio_context/IOThread AioContexts and not about iohandler_ctx. It
|
||||
does not function correctly when going back from the iohandler_ctx to
|
||||
qemu_aio_context.
|
||||
|
||||
Go back to open coding the AioContext transitions to avoid this bug.
|
||||
|
||||
This reverts commit 1f25c172f83704e350c0829438d832384084a74d.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Buglink: https://issues.redhat.com/browse/RHEL-34618
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20240506190622.56095-2-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 719c6819ed9a9838520fa732f9861918dc693bda)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
qapi/qmp-dispatch.c | 7 +++++--
|
||||
1 file changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c
|
||||
index f3488afeef..176b549473 100644
|
||||
--- a/qapi/qmp-dispatch.c
|
||||
+++ b/qapi/qmp-dispatch.c
|
||||
@@ -212,7 +212,8 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ
|
||||
* executing the command handler so that it can make progress if it
|
||||
* involves an AIO_WAIT_WHILE().
|
||||
*/
|
||||
- aio_co_reschedule_self(qemu_get_aio_context());
|
||||
+ aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self());
|
||||
+ qemu_coroutine_yield();
|
||||
}
|
||||
|
||||
monitor_set_cur(qemu_coroutine_self(), cur_mon);
|
||||
@@ -226,7 +227,9 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ
|
||||
* Move back to iohandler_ctx so that nested event loops for
|
||||
* qemu_aio_context don't start new monitor commands.
|
||||
*/
|
||||
- aio_co_reschedule_self(iohandler_get_aio_context());
|
||||
+ aio_co_schedule(iohandler_get_aio_context(),
|
||||
+ qemu_coroutine_self());
|
||||
+ qemu_coroutine_yield();
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,38 @@
|
||||
From bcbc897cb19b3a6523de611f48f6bac6cea16c97 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Ott <sebott@redhat.com>
|
||||
Date: Thu, 2 May 2024 13:17:03 +0200
|
||||
Subject: [PATCH 2/2] Revert "x86: rhel 9.4.0 machine type compat fix"
|
||||
|
||||
RH-Author: Sebastian Ott <sebott@redhat.com>
|
||||
RH-MergeRequest: 237: Revert "x86: rhel 9.4.0 machine type compat fix"
|
||||
RH-Jira: RHEL-30362
|
||||
RH-Acked-by: Ani Sinha <anisinha@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/1] 858ec153e65e96c39ca4db17ed93fd58c77dc2eb (seott1/cos-qemu-kvm)
|
||||
|
||||
This reverts commit c46e44f0f4e861fe412ce679b0b0204881c1c2f5.
|
||||
|
||||
pc-q35-rhel9.4.0 and newer should stay with SMBIOS_ENTRY_POINT_TYPE_AUTO.
|
||||
|
||||
Signed-off-by: Sebastian Ott <sebott@redhat.com>
|
||||
---
|
||||
hw/i386/pc_q35.c | 3 ---
|
||||
1 file changed, 3 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index 2f11f9af7d..2b54944c0f 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -734,9 +734,6 @@ static void pc_q35_machine_rhel940_options(MachineClass *m)
|
||||
pcmc->smbios_stream_product = "RHEL";
|
||||
pcmc->smbios_stream_version = "9.4.0";
|
||||
|
||||
- /* From pc_q35_8_2_machine_options() - use SMBIOS 3.X by default */
|
||||
- pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64;
|
||||
-
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_5,
|
||||
hw_compat_rhel_9_5_len);
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,84 +0,0 @@
|
||||
From 61256a82ce78f40222455becb8850b5f5ebb5d72 Mon Sep 17 00:00:00 2001
|
||||
From: Igor Mammedov <imammedo@redhat.com>
|
||||
Date: Tue, 18 Apr 2023 11:04:49 +0200
|
||||
Subject: [PATCH 1/3] acpi: pcihp: allow repeating hot-unplug requests
|
||||
|
||||
RH-Author: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-MergeRequest: 159: acpi: pcihp: allow repeating hot-unplug requests
|
||||
RH-Bugzilla: 2087047
|
||||
RH-Acked-by: Ani Sinha <None>
|
||||
RH-Acked-by: Julia Suvorova <None>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Commit: [1/1] 9c597232466b27d91f127ee6004322d6ba69755f (imammedo/qemu-kvm-c-9-s-imam)
|
||||
|
||||
with Q35 using ACPI PCI hotplug by default, user's request to unplug
|
||||
device is ignored when it's issued before guest OS has been booted.
|
||||
And any additional attempt to request device hot-unplug afterwards
|
||||
results in following error:
|
||||
|
||||
"Device XYZ is already in the process of unplug"
|
||||
|
||||
arguably it can be considered as a regression introduced by [2],
|
||||
before which it was possible to issue unplug request multiple
|
||||
times.
|
||||
|
||||
Accept new uplug requests after timeout (1ms). This brings ACPI PCI
|
||||
hotplug on par with native PCIe unplug behavior [1] and allows user
|
||||
to repeat unplug requests at propper times.
|
||||
Set expire timeout to arbitrary 1msec so user won't be able to
|
||||
flood guest with SCI interrupts by calling device_del in tight loop.
|
||||
|
||||
PS:
|
||||
ACPI spec doesn't mandate what OSPM can do with GPEx.status
|
||||
bits set before it's booted => it's impl. depended.
|
||||
Status bits may be retained (I tested with one Windows version)
|
||||
or cleared (Linux since 2.6 kernel times) during guest's ACPI
|
||||
subsystem initialization.
|
||||
Clearing status bits (though not wrong per se) hides the unplug
|
||||
event from guest, and it's upto user to repeat device_del later
|
||||
when guest is able to handle unplug requests.
|
||||
|
||||
1) 18416c62e3 ("pcie: expire pending delete")
|
||||
2)
|
||||
Fixes: cce8944cc9ef ("qdev-monitor: Forbid repeated device_del")
|
||||
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
|
||||
CC: mst@redhat.com
|
||||
CC: anisinha@redhat.com
|
||||
CC: jusual@redhat.com
|
||||
CC: kraxel@redhat.com
|
||||
Message-Id: <20230418090449.2155757-1-imammedo@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Reviewed-by: Ani Sinha <anisinha@redhat.com>
|
||||
(cherry picked from commit 0f689cf5ada4d5df5ab95c7f7aa9fc221afa855d)
|
||||
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
|
||||
---
|
||||
hw/acpi/pcihp.c | 10 ++++++++++
|
||||
1 file changed, 10 insertions(+)
|
||||
|
||||
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
|
||||
index dcfb779a7a..cdd6f775a1 100644
|
||||
--- a/hw/acpi/pcihp.c
|
||||
+++ b/hw/acpi/pcihp.c
|
||||
@@ -357,6 +357,16 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev,
|
||||
* acpi_pcihp_eject_slot() when the operation is completed.
|
||||
*/
|
||||
pdev->qdev.pending_deleted_event = true;
|
||||
+ /* if unplug was requested before OSPM is initialized,
|
||||
+ * linux kernel will clear GPE0.sts[] bits during boot, which effectively
|
||||
+ * hides unplug event. And than followup qmp_device_del() calls remain
|
||||
+ * blocked by above flag permanently.
|
||||
+ * Unblock qmp_device_del() by setting expire limit, so user can
|
||||
+ * repeat unplug request later when OSPM has been booted.
|
||||
+ */
|
||||
+ pdev->qdev.pending_deleted_expires_ms =
|
||||
+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); /* 1 msec */
|
||||
+
|
||||
s->acpi_pcihp_pci_status[bsel].down |= (1U << slot);
|
||||
acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS);
|
||||
}
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,64 @@
|
||||
From 0e3934e89ad1dda21681f64ff38da69b07d1b531 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Mon, 6 May 2024 15:06:22 -0400
|
||||
Subject: [PATCH 3/5] aio: warn about iohandler_ctx special casing
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 248: Revert "monitor: use aio_co_reschedule_self()"
|
||||
RH-Jira: RHEL-34618 RHEL-38697
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/2] cc316d70b2c187ee0412d6560ca1a03e381a69c1 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
The main loop has two AioContexts: qemu_aio_context and iohandler_ctx.
|
||||
The main loop runs them both, but nested aio_poll() calls on
|
||||
qemu_aio_context exclude iohandler_ctx.
|
||||
|
||||
Which one should qemu_get_current_aio_context() return when called from
|
||||
the main loop? Document that it's always qemu_aio_context.
|
||||
|
||||
This has subtle effects on functions that use
|
||||
qemu_get_current_aio_context(). For example, aio_co_reschedule_self()
|
||||
does not work when moving from iohandler_ctx to qemu_aio_context because
|
||||
qemu_get_current_aio_context() does not differentiate these two
|
||||
AioContexts.
|
||||
|
||||
Document this in order to reduce the chance of future bugs.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20240506190622.56095-3-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit e669e800fc9ef8806af5c5578249ab758a4f8a5a)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
include/block/aio.h | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/include/block/aio.h b/include/block/aio.h
|
||||
index 8378553eb9..4ee81936ed 100644
|
||||
--- a/include/block/aio.h
|
||||
+++ b/include/block/aio.h
|
||||
@@ -629,6 +629,9 @@ void aio_co_schedule(AioContext *ctx, Coroutine *co);
|
||||
*
|
||||
* Move the currently running coroutine to new_ctx. If the coroutine is already
|
||||
* running in new_ctx, do nothing.
|
||||
+ *
|
||||
+ * Note that this function cannot reschedule from iohandler_ctx to
|
||||
+ * qemu_aio_context.
|
||||
*/
|
||||
void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx);
|
||||
|
||||
@@ -661,6 +664,9 @@ void aio_co_enter(AioContext *ctx, Coroutine *co);
|
||||
* If called from an IOThread this will be the IOThread's AioContext. If
|
||||
* called from the main thread or with the "big QEMU lock" taken it
|
||||
* will be the main loop AioContext.
|
||||
+ *
|
||||
+ * Note that the return value is never the main loop's iohandler_ctx and the
|
||||
+ * return value is the main loop AioContext instead.
|
||||
*/
|
||||
AioContext *qemu_get_current_aio_context(void);
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,55 +0,0 @@
|
||||
From 5beea8b889a38aa59259679d7f1ba050f09eb0f0 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 12/21] apic: disable reentrancy detection for apic-msi
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [8/13] 329f3b1c02fc42d85c821dd14c70e6b885cf849a (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit 50795ee051a342c681a9b45671c552fbd6274db8
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:13 2023 -0400
|
||||
|
||||
apic: disable reentrancy detection for apic-msi
|
||||
|
||||
As the code is designed for re-entrant calls to apic-msi, mark apic-msi
|
||||
as reentrancy-safe.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Message-Id: <20230427211013.2994127-9-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/intc/apic.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
|
||||
index 20b5a94073..ac3d47d231 100644
|
||||
--- a/hw/intc/apic.c
|
||||
+++ b/hw/intc/apic.c
|
||||
@@ -885,6 +885,13 @@ static void apic_realize(DeviceState *dev, Error **errp)
|
||||
memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi",
|
||||
APIC_SPACE_SIZE);
|
||||
|
||||
+ /*
|
||||
+ * apic-msi's apic_mem_write can call into ioapic_eoi_broadcast, which can
|
||||
+ * write back to apic-msi. As such mark the apic-msi region re-entrancy
|
||||
+ * safe.
|
||||
+ */
|
||||
+ s->io_memory.disable_reentrancy_guard = true;
|
||||
+
|
||||
s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s);
|
||||
local_apics[s->id] = s;
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,231 +0,0 @@
|
||||
From f6db359f543723e2eb840653d35004af357ea5ac Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 06/21] async: Add an optional reentrancy guard to the BH API
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/13] 009a9a68c1c25b9ad0cd9bc0d73b3e07bee2a19d (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit 9c86c97f12c060bf7484dd931f38634e166a81f0
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:07 2023 -0400
|
||||
|
||||
async: Add an optional reentrancy guard to the BH API
|
||||
|
||||
Devices can pass their MemoryReentrancyGuard (from their DeviceState),
|
||||
when creating new BHes. Then, the async API will toggle the guard
|
||||
before/after calling the BH call-back. This prevents bh->mmio reentrancy
|
||||
issues.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Message-Id: <20230427211013.2994127-3-alxndr@bu.edu>
|
||||
[thuth: Fix "line over 90 characters" checkpatch.pl error]
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
docs/devel/multiple-iothreads.txt | 7 +++++++
|
||||
include/block/aio.h | 18 ++++++++++++++++--
|
||||
include/qemu/main-loop.h | 7 +++++--
|
||||
tests/unit/ptimer-test-stubs.c | 3 ++-
|
||||
util/async.c | 18 +++++++++++++++++-
|
||||
util/main-loop.c | 6 ++++--
|
||||
util/trace-events | 1 +
|
||||
7 files changed, 52 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt
|
||||
index 343120f2ef..a3e949f6b3 100644
|
||||
--- a/docs/devel/multiple-iothreads.txt
|
||||
+++ b/docs/devel/multiple-iothreads.txt
|
||||
@@ -61,6 +61,7 @@ There are several old APIs that use the main loop AioContext:
|
||||
* LEGACY qemu_aio_set_event_notifier() - monitor an event notifier
|
||||
* LEGACY timer_new_ms() - create a timer
|
||||
* LEGACY qemu_bh_new() - create a BH
|
||||
+ * LEGACY qemu_bh_new_guarded() - create a BH with a device re-entrancy guard
|
||||
* LEGACY qemu_aio_wait() - run an event loop iteration
|
||||
|
||||
Since they implicitly work on the main loop they cannot be used in code that
|
||||
@@ -72,8 +73,14 @@ Instead, use the AioContext functions directly (see include/block/aio.h):
|
||||
* aio_set_event_notifier() - monitor an event notifier
|
||||
* aio_timer_new() - create a timer
|
||||
* aio_bh_new() - create a BH
|
||||
+ * aio_bh_new_guarded() - create a BH with a device re-entrancy guard
|
||||
* aio_poll() - run an event loop iteration
|
||||
|
||||
+The qemu_bh_new_guarded/aio_bh_new_guarded APIs accept a "MemReentrancyGuard"
|
||||
+argument, which is used to check for and prevent re-entrancy problems. For
|
||||
+BHs associated with devices, the reentrancy-guard is contained in the
|
||||
+corresponding DeviceState and named "mem_reentrancy_guard".
|
||||
+
|
||||
The AioContext can be obtained from the IOThread using
|
||||
iothread_get_aio_context() or for the main loop using qemu_get_aio_context().
|
||||
Code that takes an AioContext argument works both in IOThreads or the main
|
||||
diff --git a/include/block/aio.h b/include/block/aio.h
|
||||
index 543717f294..db6f23c619 100644
|
||||
--- a/include/block/aio.h
|
||||
+++ b/include/block/aio.h
|
||||
@@ -23,6 +23,8 @@
|
||||
#include "qemu/thread.h"
|
||||
#include "qemu/timer.h"
|
||||
#include "block/graph-lock.h"
|
||||
+#include "hw/qdev-core.h"
|
||||
+
|
||||
|
||||
typedef struct BlockAIOCB BlockAIOCB;
|
||||
typedef void BlockCompletionFunc(void *opaque, int ret);
|
||||
@@ -331,9 +333,11 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
* is opaque and must be allocated prior to its use.
|
||||
*
|
||||
* @name: A human-readable identifier for debugging purposes.
|
||||
+ * @reentrancy_guard: A guard set when entering a cb to prevent
|
||||
+ * device-reentrancy issues
|
||||
*/
|
||||
QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
- const char *name);
|
||||
+ const char *name, MemReentrancyGuard *reentrancy_guard);
|
||||
|
||||
/**
|
||||
* aio_bh_new: Allocate a new bottom half structure
|
||||
@@ -342,7 +346,17 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
* string.
|
||||
*/
|
||||
#define aio_bh_new(ctx, cb, opaque) \
|
||||
- aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)))
|
||||
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL)
|
||||
+
|
||||
+/**
|
||||
+ * aio_bh_new_guarded: Allocate a new bottom half structure with a
|
||||
+ * reentrancy_guard
|
||||
+ *
|
||||
+ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name
|
||||
+ * string.
|
||||
+ */
|
||||
+#define aio_bh_new_guarded(ctx, cb, opaque, guard) \
|
||||
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard)
|
||||
|
||||
/**
|
||||
* aio_notify: Force processing of pending events.
|
||||
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
|
||||
index b3e54e00bc..68e70e61aa 100644
|
||||
--- a/include/qemu/main-loop.h
|
||||
+++ b/include/qemu/main-loop.h
|
||||
@@ -387,9 +387,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms);
|
||||
|
||||
/* internal interfaces */
|
||||
|
||||
+#define qemu_bh_new_guarded(cb, opaque, guard) \
|
||||
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard)
|
||||
#define qemu_bh_new(cb, opaque) \
|
||||
- qemu_bh_new_full((cb), (opaque), (stringify(cb)))
|
||||
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name);
|
||||
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL)
|
||||
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
|
||||
+ MemReentrancyGuard *reentrancy_guard);
|
||||
void qemu_bh_schedule_idle(QEMUBH *bh);
|
||||
|
||||
enum {
|
||||
diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c
|
||||
index f2bfcede93..8c9407c560 100644
|
||||
--- a/tests/unit/ptimer-test-stubs.c
|
||||
+++ b/tests/unit/ptimer-test-stubs.c
|
||||
@@ -107,7 +107,8 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
|
||||
return deadline;
|
||||
}
|
||||
|
||||
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
|
||||
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
|
||||
+ MemReentrancyGuard *reentrancy_guard)
|
||||
{
|
||||
QEMUBH *bh = g_new(QEMUBH, 1);
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index 21016a1ac7..a9b528c370 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -65,6 +65,7 @@ struct QEMUBH {
|
||||
void *opaque;
|
||||
QSLIST_ENTRY(QEMUBH) next;
|
||||
unsigned flags;
|
||||
+ MemReentrancyGuard *reentrancy_guard;
|
||||
};
|
||||
|
||||
/* Called concurrently from any thread */
|
||||
@@ -137,7 +138,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb,
|
||||
}
|
||||
|
||||
QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
- const char *name)
|
||||
+ const char *name, MemReentrancyGuard *reentrancy_guard)
|
||||
{
|
||||
QEMUBH *bh;
|
||||
bh = g_new(QEMUBH, 1);
|
||||
@@ -146,13 +147,28 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
.cb = cb,
|
||||
.opaque = opaque,
|
||||
.name = name,
|
||||
+ .reentrancy_guard = reentrancy_guard,
|
||||
};
|
||||
return bh;
|
||||
}
|
||||
|
||||
void aio_bh_call(QEMUBH *bh)
|
||||
{
|
||||
+ bool last_engaged_in_io = false;
|
||||
+
|
||||
+ if (bh->reentrancy_guard) {
|
||||
+ last_engaged_in_io = bh->reentrancy_guard->engaged_in_io;
|
||||
+ if (bh->reentrancy_guard->engaged_in_io) {
|
||||
+ trace_reentrant_aio(bh->ctx, bh->name);
|
||||
+ }
|
||||
+ bh->reentrancy_guard->engaged_in_io = true;
|
||||
+ }
|
||||
+
|
||||
bh->cb(bh->opaque);
|
||||
+
|
||||
+ if (bh->reentrancy_guard) {
|
||||
+ bh->reentrancy_guard->engaged_in_io = last_engaged_in_io;
|
||||
+ }
|
||||
}
|
||||
|
||||
/* Multiple occurrences of aio_bh_poll cannot be called concurrently. */
|
||||
diff --git a/util/main-loop.c b/util/main-loop.c
|
||||
index e180c85145..7022f02ef8 100644
|
||||
--- a/util/main-loop.c
|
||||
+++ b/util/main-loop.c
|
||||
@@ -605,9 +605,11 @@ void main_loop_wait(int nonblocking)
|
||||
|
||||
/* Functions to operate on the main QEMU AioContext. */
|
||||
|
||||
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
|
||||
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
|
||||
+ MemReentrancyGuard *reentrancy_guard)
|
||||
{
|
||||
- return aio_bh_new_full(qemu_aio_context, cb, opaque, name);
|
||||
+ return aio_bh_new_full(qemu_aio_context, cb, opaque, name,
|
||||
+ reentrancy_guard);
|
||||
}
|
||||
|
||||
/*
|
||||
diff --git a/util/trace-events b/util/trace-events
|
||||
index 16f78d8fe5..3f7e766683 100644
|
||||
--- a/util/trace-events
|
||||
+++ b/util/trace-events
|
||||
@@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d"
|
||||
# async.c
|
||||
aio_co_schedule(void *ctx, void *co) "ctx %p co %p"
|
||||
aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p"
|
||||
+reentrant_aio(void *ctx, const char *name) "ctx %p name %s"
|
||||
|
||||
# thread-pool.c
|
||||
thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,70 +0,0 @@
|
||||
From 137e84f68da06666ebf7f391766cc6209ce1c39c Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 13/21] async: avoid use-after-free on re-entrancy guard
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [9/13] d4b957108aaacf4a597122aaeeaa8e56985f1fca (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit 7915bd06f25e1803778081161bf6fa10c42dc7cd
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Mon May 1 10:19:56 2023 -0400
|
||||
|
||||
async: avoid use-after-free on re-entrancy guard
|
||||
|
||||
A BH callback can free the BH, causing a use-after-free in aio_bh_call.
|
||||
Fix that by keeping a local copy of the re-entrancy guard pointer.
|
||||
|
||||
Buglink: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58513
|
||||
Fixes: 9c86c97f12 ("async: Add an optional reentrancy guard to the BH API")
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Message-Id: <20230501141956.3444868-1-alxndr@bu.edu>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
util/async.c | 14 ++++++++------
|
||||
1 file changed, 8 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index a9b528c370..cd1a1815f9 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -156,18 +156,20 @@ void aio_bh_call(QEMUBH *bh)
|
||||
{
|
||||
bool last_engaged_in_io = false;
|
||||
|
||||
- if (bh->reentrancy_guard) {
|
||||
- last_engaged_in_io = bh->reentrancy_guard->engaged_in_io;
|
||||
- if (bh->reentrancy_guard->engaged_in_io) {
|
||||
+ /* Make a copy of the guard-pointer as cb may free the bh */
|
||||
+ MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard;
|
||||
+ if (reentrancy_guard) {
|
||||
+ last_engaged_in_io = reentrancy_guard->engaged_in_io;
|
||||
+ if (reentrancy_guard->engaged_in_io) {
|
||||
trace_reentrant_aio(bh->ctx, bh->name);
|
||||
}
|
||||
- bh->reentrancy_guard->engaged_in_io = true;
|
||||
+ reentrancy_guard->engaged_in_io = true;
|
||||
}
|
||||
|
||||
bh->cb(bh->opaque);
|
||||
|
||||
- if (bh->reentrancy_guard) {
|
||||
- bh->reentrancy_guard->engaged_in_io = last_engaged_in_io;
|
||||
+ if (reentrancy_guard) {
|
||||
+ reentrancy_guard->engaged_in_io = last_engaged_in_io;
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,57 +0,0 @@
|
||||
From 40866640d15e6a8c9f6af7e437edc1ec1e17ba34 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 10/21] bcm2835_property: disable reentrancy detection for
|
||||
iomem
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [6/13] 128ebc85e228674af66553af82fba70eb87960e6 (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit 985c4a4e547afb9573b6bd6843d20eb2c3d1d1cd
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:11 2023 -0400
|
||||
|
||||
bcm2835_property: disable reentrancy detection for iomem
|
||||
|
||||
As the code is designed for re-entrant calls from bcm2835_property to
|
||||
bcm2835_mbox and back into bcm2835_property, mark iomem as
|
||||
reentrancy-safe.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Message-Id: <20230427211013.2994127-7-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/misc/bcm2835_property.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c
|
||||
index 890ae7bae5..de056ea2df 100644
|
||||
--- a/hw/misc/bcm2835_property.c
|
||||
+++ b/hw/misc/bcm2835_property.c
|
||||
@@ -382,6 +382,13 @@ static void bcm2835_property_init(Object *obj)
|
||||
|
||||
memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_property_ops, s,
|
||||
TYPE_BCM2835_PROPERTY, 0x10);
|
||||
+
|
||||
+ /*
|
||||
+ * bcm2835_property_ops call into bcm2835_mbox, which in-turn reads from
|
||||
+ * iomem. As such, mark iomem as re-entracy safe.
|
||||
+ */
|
||||
+ s->iomem.disable_reentrancy_guard = true;
|
||||
+
|
||||
sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
|
||||
sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq);
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,354 +0,0 @@
|
||||
From ff05c0b0d3414c0e5b3903048280accdc6c75ca0 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Czenczek <hreitz@redhat.com>
|
||||
Date: Tue, 11 Apr 2023 19:34:16 +0200
|
||||
Subject: [PATCH 2/9] block: Collapse padded I/O vecs exceeding IOV_MAX
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX
|
||||
RH-Bugzilla: 2174676
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/5] 84c56bd16f841a18cf2baa918dfeab3240e3944d (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
When processing vectored guest requests that are not aligned to the
|
||||
storage request alignment, we pad them by adding head and/or tail
|
||||
buffers for a read-modify-write cycle.
|
||||
|
||||
The guest can submit I/O vectors up to IOV_MAX (1024) in length, but
|
||||
with this padding, the vector can exceed that limit. As of
|
||||
4c002cef0e9abe7135d7916c51abce47f7fc1ee2 ("util/iov: make
|
||||
qemu_iovec_init_extended() honest"), we refuse to pad vectors beyond the
|
||||
limit, instead returning an error to the guest.
|
||||
|
||||
To the guest, this appears as a random I/O error. We should not return
|
||||
an I/O error to the guest when it issued a perfectly valid request.
|
||||
|
||||
Before 4c002cef0e9abe7135d7916c51abce47f7fc1ee2, we just made the vector
|
||||
longer than IOV_MAX, which generally seems to work (because the guest
|
||||
assumes a smaller alignment than we really have, file-posix's
|
||||
raw_co_prw() will generally see bdrv_qiov_is_aligned() return false, and
|
||||
so emulate the request, so that the IOV_MAX does not matter). However,
|
||||
that does not seem exactly great.
|
||||
|
||||
I see two ways to fix this problem:
|
||||
1. We split such long requests into two requests.
|
||||
2. We join some elements of the vector into new buffers to make it
|
||||
shorter.
|
||||
|
||||
I am wary of (1), because it seems like it may have unintended side
|
||||
effects.
|
||||
|
||||
(2) on the other hand seems relatively simple to implement, with
|
||||
hopefully few side effects, so this patch does that.
|
||||
|
||||
To do this, the use of qemu_iovec_init_extended() in bdrv_pad_request()
|
||||
is effectively replaced by the new function bdrv_create_padded_qiov(),
|
||||
which not only wraps the request IOV with padding head/tail, but also
|
||||
ensures that the resulting vector will not have more than IOV_MAX
|
||||
elements. Putting that functionality into qemu_iovec_init_extended() is
|
||||
infeasible because it requires allocating a bounce buffer; doing so
|
||||
would require many more parameters (buffer alignment, how to initialize
|
||||
the buffer, and out parameters like the buffer, its length, and the
|
||||
original elements), which is not reasonable.
|
||||
|
||||
Conversely, it is not difficult to move qemu_iovec_init_extended()'s
|
||||
functionality into bdrv_create_padded_qiov() by using public
|
||||
qemu_iovec_* functions, so that is what this patch does.
|
||||
|
||||
Because bdrv_pad_request() was the only "serious" user of
|
||||
qemu_iovec_init_extended(), the next patch will remove the latter
|
||||
function, so the functionality is not implemented twice.
|
||||
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2141964
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Message-Id: <20230411173418.19549-3-hreitz@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
(cherry picked from commit 18743311b829cafc1737a5f20bc3248d5f91ee2a)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/io.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++-----
|
||||
1 file changed, 151 insertions(+), 15 deletions(-)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 2e267a85ab..4e8e90208b 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -1439,6 +1439,14 @@ out:
|
||||
* @merge_reads is true for small requests,
|
||||
* if @buf_len == @head + bytes + @tail. In this case it is possible that both
|
||||
* head and tail exist but @buf_len == align and @tail_buf == @buf.
|
||||
+ *
|
||||
+ * @write is true for write requests, false for read requests.
|
||||
+ *
|
||||
+ * If padding makes the vector too long (exceeding IOV_MAX), then we need to
|
||||
+ * merge existing vector elements into a single one. @collapse_bounce_buf acts
|
||||
+ * as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse
|
||||
+ * I/O vector elements so for read requests, the data can be copied back after
|
||||
+ * the read is done.
|
||||
*/
|
||||
typedef struct BdrvRequestPadding {
|
||||
uint8_t *buf;
|
||||
@@ -1447,11 +1455,17 @@ typedef struct BdrvRequestPadding {
|
||||
size_t head;
|
||||
size_t tail;
|
||||
bool merge_reads;
|
||||
+ bool write;
|
||||
QEMUIOVector local_qiov;
|
||||
+
|
||||
+ uint8_t *collapse_bounce_buf;
|
||||
+ size_t collapse_len;
|
||||
+ QEMUIOVector pre_collapse_qiov;
|
||||
} BdrvRequestPadding;
|
||||
|
||||
static bool bdrv_init_padding(BlockDriverState *bs,
|
||||
int64_t offset, int64_t bytes,
|
||||
+ bool write,
|
||||
BdrvRequestPadding *pad)
|
||||
{
|
||||
int64_t align = bs->bl.request_alignment;
|
||||
@@ -1483,6 +1497,8 @@ static bool bdrv_init_padding(BlockDriverState *bs,
|
||||
pad->tail_buf = pad->buf + pad->buf_len - align;
|
||||
}
|
||||
|
||||
+ pad->write = write;
|
||||
+
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1547,8 +1563,23 @@ zero_mem:
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
+/**
|
||||
+ * Free *pad's associated buffers, and perform any necessary finalization steps.
|
||||
+ */
|
||||
+static void bdrv_padding_finalize(BdrvRequestPadding *pad)
|
||||
{
|
||||
+ if (pad->collapse_bounce_buf) {
|
||||
+ if (!pad->write) {
|
||||
+ /*
|
||||
+ * If padding required elements in the vector to be collapsed into a
|
||||
+ * bounce buffer, copy the bounce buffer content back
|
||||
+ */
|
||||
+ qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0,
|
||||
+ pad->collapse_bounce_buf, pad->collapse_len);
|
||||
+ }
|
||||
+ qemu_vfree(pad->collapse_bounce_buf);
|
||||
+ qemu_iovec_destroy(&pad->pre_collapse_qiov);
|
||||
+ }
|
||||
if (pad->buf) {
|
||||
qemu_vfree(pad->buf);
|
||||
qemu_iovec_destroy(&pad->local_qiov);
|
||||
@@ -1556,6 +1587,101 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
memset(pad, 0, sizeof(*pad));
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while
|
||||
+ * ensuring that the resulting vector will not exceed IOV_MAX elements.
|
||||
+ *
|
||||
+ * To ensure this, when necessary, the first two or three elements of @iov are
|
||||
+ * merged into pad->collapse_bounce_buf and replaced by a reference to that
|
||||
+ * bounce buffer in pad->local_qiov.
|
||||
+ *
|
||||
+ * After performing a read request, the data from the bounce buffer must be
|
||||
+ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()).
|
||||
+ */
|
||||
+static int bdrv_create_padded_qiov(BlockDriverState *bs,
|
||||
+ BdrvRequestPadding *pad,
|
||||
+ struct iovec *iov, int niov,
|
||||
+ size_t iov_offset, size_t bytes)
|
||||
+{
|
||||
+ int padded_niov, surplus_count, collapse_count;
|
||||
+
|
||||
+ /* Assert this invariant */
|
||||
+ assert(niov <= IOV_MAX);
|
||||
+
|
||||
+ /*
|
||||
+ * Cannot pad if resulting length would exceed SIZE_MAX. Returning an error
|
||||
+ * to the guest is not ideal, but there is little else we can do. At least
|
||||
+ * this will practically never happen on 64-bit systems.
|
||||
+ */
|
||||
+ if (SIZE_MAX - pad->head < bytes ||
|
||||
+ SIZE_MAX - pad->head - bytes < pad->tail)
|
||||
+ {
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ /* Length of the resulting IOV if we just concatenated everything */
|
||||
+ padded_niov = !!pad->head + niov + !!pad->tail;
|
||||
+
|
||||
+ qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX));
|
||||
+
|
||||
+ if (pad->head) {
|
||||
+ qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head);
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * If padded_niov > IOV_MAX, we cannot just concatenate everything.
|
||||
+ * Instead, merge the first two or three elements of @iov to reduce the
|
||||
+ * number of vector elements as necessary.
|
||||
+ */
|
||||
+ if (padded_niov > IOV_MAX) {
|
||||
+ /*
|
||||
+ * Only head and tail can have lead to the number of entries exceeding
|
||||
+ * IOV_MAX, so we can exceed it by the head and tail at most. We need
|
||||
+ * to reduce the number of elements by `surplus_count`, so we merge that
|
||||
+ * many elements plus one into one element.
|
||||
+ */
|
||||
+ surplus_count = padded_niov - IOV_MAX;
|
||||
+ assert(surplus_count <= !!pad->head + !!pad->tail);
|
||||
+ collapse_count = surplus_count + 1;
|
||||
+
|
||||
+ /*
|
||||
+ * Move the elements to collapse into `pad->pre_collapse_qiov`, then
|
||||
+ * advance `iov` (and associated variables) by those elements.
|
||||
+ */
|
||||
+ qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count);
|
||||
+ qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov,
|
||||
+ collapse_count, iov_offset, SIZE_MAX);
|
||||
+ iov += collapse_count;
|
||||
+ iov_offset = 0;
|
||||
+ niov -= collapse_count;
|
||||
+ bytes -= pad->pre_collapse_qiov.size;
|
||||
+
|
||||
+ /*
|
||||
+ * Construct the bounce buffer to match the length of the to-collapse
|
||||
+ * vector elements, and for write requests, initialize it with the data
|
||||
+ * from those elements. Then add it to `pad->local_qiov`.
|
||||
+ */
|
||||
+ pad->collapse_len = pad->pre_collapse_qiov.size;
|
||||
+ pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len);
|
||||
+ if (pad->write) {
|
||||
+ qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0,
|
||||
+ pad->collapse_bounce_buf, pad->collapse_len);
|
||||
+ }
|
||||
+ qemu_iovec_add(&pad->local_qiov,
|
||||
+ pad->collapse_bounce_buf, pad->collapse_len);
|
||||
+ }
|
||||
+
|
||||
+ qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes);
|
||||
+
|
||||
+ if (pad->tail) {
|
||||
+ qemu_iovec_add(&pad->local_qiov,
|
||||
+ pad->buf + pad->buf_len - pad->tail, pad->tail);
|
||||
+ }
|
||||
+
|
||||
+ assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX));
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* bdrv_pad_request
|
||||
*
|
||||
@@ -1563,6 +1689,8 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
* read of padding, bdrv_padding_rmw_read() should be called separately if
|
||||
* needed.
|
||||
*
|
||||
+ * @write is true for write requests, false for read requests.
|
||||
+ *
|
||||
* Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out:
|
||||
* - on function start they represent original request
|
||||
* - on failure or when padding is not needed they are unchanged
|
||||
@@ -1571,26 +1699,34 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
static int bdrv_pad_request(BlockDriverState *bs,
|
||||
QEMUIOVector **qiov, size_t *qiov_offset,
|
||||
int64_t *offset, int64_t *bytes,
|
||||
+ bool write,
|
||||
BdrvRequestPadding *pad, bool *padded,
|
||||
BdrvRequestFlags *flags)
|
||||
{
|
||||
int ret;
|
||||
+ struct iovec *sliced_iov;
|
||||
+ int sliced_niov;
|
||||
+ size_t sliced_head, sliced_tail;
|
||||
|
||||
bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
|
||||
|
||||
- if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
|
||||
+ if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
|
||||
if (padded) {
|
||||
*padded = false;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
- ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
|
||||
- *qiov, *qiov_offset, *bytes,
|
||||
- pad->buf + pad->buf_len - pad->tail,
|
||||
- pad->tail);
|
||||
+ sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
|
||||
+ &sliced_head, &sliced_tail,
|
||||
+ &sliced_niov);
|
||||
+
|
||||
+ /* Guaranteed by bdrv_check_qiov_request() */
|
||||
+ assert(*bytes <= SIZE_MAX);
|
||||
+ ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
|
||||
+ sliced_head, *bytes);
|
||||
if (ret < 0) {
|
||||
- bdrv_padding_destroy(pad);
|
||||
+ bdrv_padding_finalize(pad);
|
||||
return ret;
|
||||
}
|
||||
*bytes += pad->head + pad->tail;
|
||||
@@ -1657,8 +1793,8 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
|
||||
flags |= BDRV_REQ_COPY_ON_READ;
|
||||
}
|
||||
|
||||
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
|
||||
- NULL, &flags);
|
||||
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false,
|
||||
+ &pad, NULL, &flags);
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
@@ -1668,7 +1804,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
|
||||
bs->bl.request_alignment,
|
||||
qiov, qiov_offset, flags);
|
||||
tracked_request_end(&req);
|
||||
- bdrv_padding_destroy(&pad);
|
||||
+ bdrv_padding_finalize(&pad);
|
||||
|
||||
fail:
|
||||
bdrv_dec_in_flight(bs);
|
||||
@@ -2000,7 +2136,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
|
||||
/* This flag doesn't make sense for padding or zero writes */
|
||||
flags &= ~BDRV_REQ_REGISTERED_BUF;
|
||||
|
||||
- padding = bdrv_init_padding(bs, offset, bytes, &pad);
|
||||
+ padding = bdrv_init_padding(bs, offset, bytes, true, &pad);
|
||||
if (padding) {
|
||||
assert(!(flags & BDRV_REQ_NO_WAIT));
|
||||
bdrv_make_request_serialising(req, align);
|
||||
@@ -2048,7 +2184,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
|
||||
}
|
||||
|
||||
out:
|
||||
- bdrv_padding_destroy(&pad);
|
||||
+ bdrv_padding_finalize(&pad);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -2116,8 +2252,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
|
||||
* bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
|
||||
* alignment only if there is no ZERO flag.
|
||||
*/
|
||||
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
|
||||
- &padded, &flags);
|
||||
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true,
|
||||
+ &pad, &padded, &flags);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
@@ -2147,7 +2283,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
|
||||
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
|
||||
qiov, qiov_offset, flags);
|
||||
|
||||
- bdrv_padding_destroy(&pad);
|
||||
+ bdrv_padding_finalize(&pad);
|
||||
|
||||
out:
|
||||
tracked_request_end(&req);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,56 +0,0 @@
|
||||
From dfa2811e88afaf996345552330e97f0513c1803c Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 4 May 2023 13:57:34 +0200
|
||||
Subject: [PATCH 53/56] block: Don't call no_coroutine_fns in
|
||||
qmp_block_resize()
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize()
|
||||
RH-Bugzilla: 2185688
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [2/4] 7ac7e34821cfc8bd5f0daadd7a1c4a5596bc60a6 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
This QMP handler runs in a coroutine, so it must use the corresponding
|
||||
no_co_wrappers instead.
|
||||
|
||||
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2185688
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20230504115750.54437-5-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 0c7d204f50c382c6baac8c94bd57af4a022b3888)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
blockdev.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/blockdev.c b/blockdev.c
|
||||
index d7b5c18f0a..eb509cf964 100644
|
||||
--- a/blockdev.c
|
||||
+++ b/blockdev.c
|
||||
@@ -2430,7 +2430,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
|
||||
return;
|
||||
}
|
||||
|
||||
- blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
|
||||
+ blk = blk_co_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
|
||||
if (!blk) {
|
||||
return;
|
||||
}
|
||||
@@ -2445,7 +2445,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
|
||||
|
||||
bdrv_co_lock(bs);
|
||||
bdrv_drained_end(bs);
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
bdrv_co_unlock(bs);
|
||||
}
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,73 +0,0 @@
|
||||
From 547f6bf93734f7c13675eebb93273ef2273f7c31 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Czenczek <hreitz@redhat.com>
|
||||
Date: Fri, 14 Jul 2023 10:59:38 +0200
|
||||
Subject: [PATCH 5/9] block: Fix pad_request's request restriction
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX
|
||||
RH-Bugzilla: 2174676
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [5/5] e8abc0485f6e0608a1ec55143ff40a14d273dfc8 (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
bdrv_pad_request() relies on requests' lengths not to exceed SIZE_MAX,
|
||||
which bdrv_check_qiov_request() does not guarantee.
|
||||
|
||||
bdrv_check_request32() however will guarantee this, and both of
|
||||
bdrv_pad_request()'s callers (bdrv_co_preadv_part() and
|
||||
bdrv_co_pwritev_part()) already run it before calling
|
||||
bdrv_pad_request(). Therefore, bdrv_pad_request() can safely call
|
||||
bdrv_check_request32() without expecting error, too.
|
||||
|
||||
In effect, this patch will not change guest-visible behavior. It is a
|
||||
clean-up to tighten a condition to match what is guaranteed by our
|
||||
callers, and which exists purely to show clearly why the subsequent
|
||||
assertion (`assert(*bytes <= SIZE_MAX)`) is always true.
|
||||
|
||||
Note there is a difference between the interfaces of
|
||||
bdrv_check_qiov_request() and bdrv_check_request32(): The former takes
|
||||
an errp, the latter does not, so we can no longer just pass
|
||||
&error_abort. Instead, we need to check the returned value. While we
|
||||
do expect success (because the callers have already run this function),
|
||||
an assert(ret == 0) is not much simpler than just to return an error if
|
||||
it occurs, so let us handle errors by returning them up the stack now.
|
||||
|
||||
Reported-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Message-id: 20230714085938.202730-1-hreitz@redhat.com
|
||||
Fixes: 18743311b829cafc1737a5f20bc3248d5f91ee2a
|
||||
("block: Collapse padded I/O vecs exceeding IOV_MAX")
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
block/io.c | 8 ++++++--
|
||||
1 file changed, 6 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 4e8e90208b..807c9fb720 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -1708,7 +1708,11 @@ static int bdrv_pad_request(BlockDriverState *bs,
|
||||
int sliced_niov;
|
||||
size_t sliced_head, sliced_tail;
|
||||
|
||||
- bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
|
||||
+ /* Should have been checked by the caller already */
|
||||
+ ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
+ }
|
||||
|
||||
if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
|
||||
if (padded) {
|
||||
@@ -1721,7 +1725,7 @@ static int bdrv_pad_request(BlockDriverState *bs,
|
||||
&sliced_head, &sliced_tail,
|
||||
&sliced_niov);
|
||||
|
||||
- /* Guaranteed by bdrv_check_qiov_request() */
|
||||
+ /* Guaranteed by bdrv_check_request32() */
|
||||
assert(*bytes <= SIZE_MAX);
|
||||
ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
|
||||
sliced_head, *bytes);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,252 @@
|
||||
From 2ee645a339e9ef9cd92620a8b784d18d512326be Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 25 Apr 2024 14:56:02 +0200
|
||||
Subject: [PATCH 4/4] block: Parse filenames only when explicitly requested
|
||||
|
||||
RH-Author: Hana Czenczek <hczenczek@redhat.com>
|
||||
RH-MergeRequest: 1: CVE 2024-4467 (PRDSC)
|
||||
RH-Jira: RHEL-35611
|
||||
RH-CVE: CVE-2024-4467
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Commit: [4/4] f44c2941d4419e60f16dea3e9adca164e75aa78d
|
||||
|
||||
When handling image filenames from legacy options such as -drive or from
|
||||
tools, these filenames are parsed for protocol prefixes, including for
|
||||
the json:{} pseudo-protocol.
|
||||
|
||||
This behaviour is intended for filenames that come directly from the
|
||||
command line and for backing files, which may come from the image file
|
||||
itself. Higher level management tools generally take care to verify that
|
||||
untrusted images don't contain a bad (or any) backing file reference;
|
||||
'qemu-img info' is a suitable tool for this.
|
||||
|
||||
However, for other files that can be referenced in images, such as
|
||||
qcow2 data files or VMDK extents, the string from the image file is
|
||||
usually not verified by management tools - and 'qemu-img info' wouldn't
|
||||
be suitable because in contrast to backing files, it already opens these
|
||||
other referenced files. So here the string should be interpreted as a
|
||||
literal local filename. More complex configurations need to be specified
|
||||
explicitly on the command line or in QMP.
|
||||
|
||||
This patch changes bdrv_open_inherit() so that it only parses filenames
|
||||
if a new parameter parse_filename is true. It is set for the top level
|
||||
in bdrv_open(), for the file child and for the backing file child. All
|
||||
other callers pass false and disable filename parsing this way.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Upstream: N/A, embargoed
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block.c | 90 ++++++++++++++++++++++++++++++++++++---------------------
|
||||
1 file changed, 57 insertions(+), 33 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 468cf5e67d..50bdd197b7 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -86,6 +86,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
|
||||
BlockDriverState *parent,
|
||||
const BdrvChildClass *child_class,
|
||||
BdrvChildRole child_role,
|
||||
+ bool parse_filename,
|
||||
Error **errp);
|
||||
|
||||
static bool bdrv_recurse_has_child(BlockDriverState *bs,
|
||||
@@ -2058,7 +2059,8 @@ static void parse_json_protocol(QDict *options, const char **pfilename,
|
||||
* block driver has been specified explicitly.
|
||||
*/
|
||||
static int bdrv_fill_options(QDict **options, const char *filename,
|
||||
- int *flags, Error **errp)
|
||||
+ int *flags, bool allow_parse_filename,
|
||||
+ Error **errp)
|
||||
{
|
||||
const char *drvname;
|
||||
bool protocol = *flags & BDRV_O_PROTOCOL;
|
||||
@@ -2100,7 +2102,7 @@ static int bdrv_fill_options(QDict **options, const char *filename,
|
||||
if (protocol && filename) {
|
||||
if (!qdict_haskey(*options, "filename")) {
|
||||
qdict_put_str(*options, "filename", filename);
|
||||
- parse_filename = true;
|
||||
+ parse_filename = allow_parse_filename;
|
||||
} else {
|
||||
error_setg(errp, "Can't specify 'file' and 'filename' options at "
|
||||
"the same time");
|
||||
@@ -3663,7 +3665,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
|
||||
}
|
||||
|
||||
backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs,
|
||||
- &child_of_bds, bdrv_backing_role(bs), errp);
|
||||
+ &child_of_bds, bdrv_backing_role(bs), true,
|
||||
+ errp);
|
||||
if (!backing_hd) {
|
||||
bs->open_flags |= BDRV_O_NO_BACKING;
|
||||
error_prepend(errp, "Could not open backing file: ");
|
||||
@@ -3697,7 +3700,8 @@ free_exit:
|
||||
static BlockDriverState *
|
||||
bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key,
|
||||
BlockDriverState *parent, const BdrvChildClass *child_class,
|
||||
- BdrvChildRole child_role, bool allow_none, Error **errp)
|
||||
+ BdrvChildRole child_role, bool allow_none,
|
||||
+ bool parse_filename, Error **errp)
|
||||
{
|
||||
BlockDriverState *bs = NULL;
|
||||
QDict *image_options;
|
||||
@@ -3728,7 +3732,8 @@ bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key,
|
||||
}
|
||||
|
||||
bs = bdrv_open_inherit(filename, reference, image_options, 0,
|
||||
- parent, child_class, child_role, errp);
|
||||
+ parent, child_class, child_role, parse_filename,
|
||||
+ errp);
|
||||
if (!bs) {
|
||||
goto done;
|
||||
}
|
||||
@@ -3738,6 +3743,33 @@ done:
|
||||
return bs;
|
||||
}
|
||||
|
||||
+static BdrvChild *bdrv_open_child_common(const char *filename,
|
||||
+ QDict *options, const char *bdref_key,
|
||||
+ BlockDriverState *parent,
|
||||
+ const BdrvChildClass *child_class,
|
||||
+ BdrvChildRole child_role,
|
||||
+ bool allow_none, bool parse_filename,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ BlockDriverState *bs;
|
||||
+ BdrvChild *child;
|
||||
+
|
||||
+ GLOBAL_STATE_CODE();
|
||||
+
|
||||
+ bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class,
|
||||
+ child_role, allow_none, parse_filename, errp);
|
||||
+ if (bs == NULL) {
|
||||
+ return NULL;
|
||||
+ }
|
||||
+
|
||||
+ bdrv_graph_wrlock();
|
||||
+ child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
|
||||
+ errp);
|
||||
+ bdrv_graph_wrunlock();
|
||||
+
|
||||
+ return child;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Opens a disk image whose options are given as BlockdevRef in another block
|
||||
* device's options.
|
||||
@@ -3761,27 +3793,15 @@ BdrvChild *bdrv_open_child(const char *filename,
|
||||
BdrvChildRole child_role,
|
||||
bool allow_none, Error **errp)
|
||||
{
|
||||
- BlockDriverState *bs;
|
||||
- BdrvChild *child;
|
||||
-
|
||||
- GLOBAL_STATE_CODE();
|
||||
-
|
||||
- bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class,
|
||||
- child_role, allow_none, errp);
|
||||
- if (bs == NULL) {
|
||||
- return NULL;
|
||||
- }
|
||||
-
|
||||
- bdrv_graph_wrlock();
|
||||
- child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
|
||||
- errp);
|
||||
- bdrv_graph_wrunlock();
|
||||
-
|
||||
- return child;
|
||||
+ return bdrv_open_child_common(filename, options, bdref_key, parent,
|
||||
+ child_class, child_role, allow_none, false,
|
||||
+ errp);
|
||||
}
|
||||
|
||||
/*
|
||||
- * Wrapper on bdrv_open_child() for most popular case: open primary child of bs.
|
||||
+ * This does mostly the same as bdrv_open_child(), but for opening the primary
|
||||
+ * child of a node. A notable difference from bdrv_open_child() is that it
|
||||
+ * enables filename parsing for protocol names (including json:).
|
||||
*
|
||||
* @parent can move to a different AioContext in this function.
|
||||
*/
|
||||
@@ -3796,8 +3816,8 @@ int bdrv_open_file_child(const char *filename,
|
||||
role = parent->drv->is_filter ?
|
||||
(BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY) : BDRV_CHILD_IMAGE;
|
||||
|
||||
- if (!bdrv_open_child(filename, options, bdref_key, parent,
|
||||
- &child_of_bds, role, false, errp))
|
||||
+ if (!bdrv_open_child_common(filename, options, bdref_key, parent,
|
||||
+ &child_of_bds, role, false, true, errp))
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -3842,7 +3862,8 @@ BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp)
|
||||
|
||||
}
|
||||
|
||||
- bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, errp);
|
||||
+ bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, false,
|
||||
+ errp);
|
||||
obj = NULL;
|
||||
qobject_unref(obj);
|
||||
visit_free(v);
|
||||
@@ -3932,7 +3953,7 @@ static BlockDriverState * no_coroutine_fn
|
||||
bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
|
||||
int flags, BlockDriverState *parent,
|
||||
const BdrvChildClass *child_class, BdrvChildRole child_role,
|
||||
- Error **errp)
|
||||
+ bool parse_filename, Error **errp)
|
||||
{
|
||||
int ret;
|
||||
BlockBackend *file = NULL;
|
||||
@@ -3980,9 +4001,11 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
|
||||
}
|
||||
|
||||
/* json: syntax counts as explicit options, as if in the QDict */
|
||||
- parse_json_protocol(options, &filename, &local_err);
|
||||
- if (local_err) {
|
||||
- goto fail;
|
||||
+ if (parse_filename) {
|
||||
+ parse_json_protocol(options, &filename, &local_err);
|
||||
+ if (local_err) {
|
||||
+ goto fail;
|
||||
+ }
|
||||
}
|
||||
|
||||
bs->explicit_options = qdict_clone_shallow(options);
|
||||
@@ -4007,7 +4030,8 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
|
||||
parent->open_flags, parent->options);
|
||||
}
|
||||
|
||||
- ret = bdrv_fill_options(&options, filename, &flags, &local_err);
|
||||
+ ret = bdrv_fill_options(&options, filename, &flags, parse_filename,
|
||||
+ &local_err);
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
@@ -4076,7 +4100,7 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
|
||||
|
||||
file_bs = bdrv_open_child_bs(filename, options, "file", bs,
|
||||
&child_of_bds, BDRV_CHILD_IMAGE,
|
||||
- true, &local_err);
|
||||
+ true, true, &local_err);
|
||||
if (local_err) {
|
||||
goto fail;
|
||||
}
|
||||
@@ -4225,7 +4249,7 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference,
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
return bdrv_open_inherit(filename, reference, options, flags, NULL,
|
||||
- NULL, 0, errp);
|
||||
+ NULL, 0, true, errp);
|
||||
}
|
||||
|
||||
/* Return true if the NULL-terminated @list contains @str */
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,386 +0,0 @@
|
||||
From 7baea25be90e184175dd5a919ee5878cbd4970c2 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 4 May 2023 13:57:33 +0200
|
||||
Subject: [PATCH 52/56] block: bdrv/blk_co_unref() for calls in coroutine
|
||||
context
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize()
|
||||
RH-Bugzilla: 2185688
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [1/4] 8ebf8486b082c30ca1b39a6ede35e471eaaccfa3 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
These functions must not be called in coroutine context, because they
|
||||
need write access to the graph.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20230504115750.54437-4-kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit b2ab5f545fa1eaaf2955dd617bee19a8b3279786)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block.c | 2 +-
|
||||
block/crypto.c | 6 +++---
|
||||
block/parallels.c | 6 +++---
|
||||
block/qcow.c | 6 +++---
|
||||
block/qcow2.c | 14 +++++++-------
|
||||
block/qed.c | 6 +++---
|
||||
block/vdi.c | 6 +++---
|
||||
block/vhdx.c | 6 +++---
|
||||
block/vmdk.c | 18 +++++++++---------
|
||||
block/vpc.c | 6 +++---
|
||||
include/block/block-global-state.h | 3 ++-
|
||||
include/sysemu/block-backend-global-state.h | 5 ++++-
|
||||
12 files changed, 44 insertions(+), 40 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index d79a52ca74..a48112f945 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -680,7 +680,7 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
diff --git a/block/crypto.c b/block/crypto.c
|
||||
index ca67289187..8fd3ad0054 100644
|
||||
--- a/block/crypto.c
|
||||
+++ b/block/crypto.c
|
||||
@@ -355,7 +355,7 @@ block_crypto_co_create_generic(BlockDriverState *bs, int64_t size,
|
||||
ret = 0;
|
||||
cleanup:
|
||||
qcrypto_block_free(crypto);
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -661,7 +661,7 @@ block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp)
|
||||
|
||||
ret = 0;
|
||||
fail:
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -730,7 +730,7 @@ fail:
|
||||
bdrv_co_delete_file_noerr(bs);
|
||||
}
|
||||
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_QCryptoBlockCreateOptions(create_opts);
|
||||
qobject_unref(cryptoopts);
|
||||
return ret;
|
||||
diff --git a/block/parallels.c b/block/parallels.c
|
||||
index 013684801a..b49c35929e 100644
|
||||
--- a/block/parallels.c
|
||||
+++ b/block/parallels.c
|
||||
@@ -613,8 +613,8 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts,
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
return ret;
|
||||
|
||||
exit:
|
||||
@@ -691,7 +691,7 @@ parallels_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
|
||||
done:
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/qcow.c b/block/qcow.c
|
||||
index 490e4f819e..a0c701f578 100644
|
||||
--- a/block/qcow.c
|
||||
+++ b/block/qcow.c
|
||||
@@ -915,8 +915,8 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
|
||||
g_free(tmp);
|
||||
ret = 0;
|
||||
exit:
|
||||
- blk_unref(qcow_blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(qcow_blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
qcrypto_block_free(crypto);
|
||||
return ret;
|
||||
}
|
||||
@@ -1015,7 +1015,7 @@ qcow_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
fail:
|
||||
g_free(backing_fmt);
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/qcow2.c b/block/qcow2.c
|
||||
index 22084730f9..0b8beb8b47 100644
|
||||
--- a/block/qcow2.c
|
||||
+++ b/block/qcow2.c
|
||||
@@ -3711,7 +3711,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
|
||||
goto out;
|
||||
}
|
||||
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
blk = NULL;
|
||||
|
||||
/*
|
||||
@@ -3791,7 +3791,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
|
||||
}
|
||||
}
|
||||
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
blk = NULL;
|
||||
|
||||
/* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning.
|
||||
@@ -3816,9 +3816,9 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
- bdrv_unref(data_bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
+ bdrv_co_unref(data_bs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -3949,8 +3949,8 @@ finish:
|
||||
}
|
||||
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
- bdrv_unref(data_bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
+ bdrv_co_unref(data_bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/qed.c b/block/qed.c
|
||||
index 0705a7b4e2..aff2a2076e 100644
|
||||
--- a/block/qed.c
|
||||
+++ b/block/qed.c
|
||||
@@ -748,8 +748,8 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
|
||||
ret = 0; /* success */
|
||||
out:
|
||||
g_free(l1_table);
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -819,7 +819,7 @@ bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
|
||||
fail:
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/vdi.c b/block/vdi.c
|
||||
index f2434d6153..08331d2dd7 100644
|
||||
--- a/block/vdi.c
|
||||
+++ b/block/vdi.c
|
||||
@@ -886,8 +886,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
|
||||
|
||||
ret = 0;
|
||||
exit:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs_file);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs_file);
|
||||
g_free(bmap);
|
||||
return ret;
|
||||
}
|
||||
@@ -975,7 +975,7 @@ vdi_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
done:
|
||||
qobject_unref(qdict);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
- bdrv_unref(bs_file);
|
||||
+ bdrv_co_unref(bs_file);
|
||||
return ret;
|
||||
}
|
||||
|
||||
diff --git a/block/vhdx.c b/block/vhdx.c
|
||||
index 81420722a1..00777da91a 100644
|
||||
--- a/block/vhdx.c
|
||||
+++ b/block/vhdx.c
|
||||
@@ -2053,8 +2053,8 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts,
|
||||
|
||||
ret = 0;
|
||||
delete_and_exit:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
g_free(creator);
|
||||
return ret;
|
||||
}
|
||||
@@ -2144,7 +2144,7 @@ vhdx_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
|
||||
fail:
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/block/vmdk.c b/block/vmdk.c
|
||||
index f5f49018fe..01ca13c82b 100644
|
||||
--- a/block/vmdk.c
|
||||
+++ b/block/vmdk.c
|
||||
@@ -2306,7 +2306,7 @@ exit:
|
||||
if (pbb) {
|
||||
*pbb = blk;
|
||||
} else {
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
blk = NULL;
|
||||
}
|
||||
}
|
||||
@@ -2516,12 +2516,12 @@ vmdk_co_do_create(int64_t size,
|
||||
if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) {
|
||||
error_setg(errp, "Invalid backing file format: %s. Must be vmdk",
|
||||
blk_bs(backing)->drv->format_name);
|
||||
- blk_unref(backing);
|
||||
+ blk_co_unref(backing);
|
||||
ret = -EINVAL;
|
||||
goto exit;
|
||||
}
|
||||
ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid);
|
||||
- blk_unref(backing);
|
||||
+ blk_co_unref(backing);
|
||||
if (ret) {
|
||||
error_setg(errp, "Failed to read parent CID");
|
||||
goto exit;
|
||||
@@ -2542,14 +2542,14 @@ vmdk_co_do_create(int64_t size,
|
||||
blk_bs(extent_blk)->filename);
|
||||
created_size += cur_size;
|
||||
extent_idx++;
|
||||
- blk_unref(extent_blk);
|
||||
+ blk_co_unref(extent_blk);
|
||||
}
|
||||
|
||||
/* Check whether we got excess extents */
|
||||
extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain,
|
||||
opaque, NULL);
|
||||
if (extent_blk) {
|
||||
- blk_unref(extent_blk);
|
||||
+ blk_co_unref(extent_blk);
|
||||
error_setg(errp, "List of extents contains unused extents");
|
||||
ret = -EINVAL;
|
||||
goto exit;
|
||||
@@ -2590,7 +2590,7 @@ vmdk_co_do_create(int64_t size,
|
||||
ret = 0;
|
||||
exit:
|
||||
if (blk) {
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
}
|
||||
g_free(desc);
|
||||
g_free(parent_desc_line);
|
||||
@@ -2641,7 +2641,7 @@ vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split,
|
||||
errp)) {
|
||||
goto exit;
|
||||
}
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
exit:
|
||||
g_free(ext_filename);
|
||||
return blk;
|
||||
@@ -2797,12 +2797,12 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx,
|
||||
return NULL;
|
||||
}
|
||||
blk_set_allow_write_beyond_eof(blk, true);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
|
||||
if (size != -1) {
|
||||
ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp);
|
||||
if (ret) {
|
||||
- blk_unref(blk);
|
||||
+ blk_co_unref(blk);
|
||||
blk = NULL;
|
||||
}
|
||||
}
|
||||
diff --git a/block/vpc.c b/block/vpc.c
|
||||
index b89b0ff8e2..07ddda5b99 100644
|
||||
--- a/block/vpc.c
|
||||
+++ b/block/vpc.c
|
||||
@@ -1082,8 +1082,8 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts,
|
||||
}
|
||||
|
||||
out:
|
||||
- blk_unref(blk);
|
||||
- bdrv_unref(bs);
|
||||
+ blk_co_unref(blk);
|
||||
+ bdrv_co_unref(bs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1162,7 +1162,7 @@ vpc_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
|
||||
fail:
|
||||
qobject_unref(qdict);
|
||||
- bdrv_unref(bs);
|
||||
+ bdrv_co_unref(bs);
|
||||
qapi_free_BlockdevCreateOptions(create_options);
|
||||
return ret;
|
||||
}
|
||||
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
|
||||
index 399200a9a3..cd4ea554bf 100644
|
||||
--- a/include/block/block-global-state.h
|
||||
+++ b/include/block/block-global-state.h
|
||||
@@ -214,7 +214,8 @@ void bdrv_img_create(const char *filename, const char *fmt,
|
||||
bool quiet, Error **errp);
|
||||
|
||||
void bdrv_ref(BlockDriverState *bs);
|
||||
-void bdrv_unref(BlockDriverState *bs);
|
||||
+void no_coroutine_fn bdrv_unref(BlockDriverState *bs);
|
||||
+void coroutine_fn no_co_wrapper bdrv_co_unref(BlockDriverState *bs);
|
||||
void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
|
||||
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
|
||||
BlockDriverState *child_bs,
|
||||
diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h
|
||||
index 2b6d27db7c..fa83f9389c 100644
|
||||
--- a/include/sysemu/block-backend-global-state.h
|
||||
+++ b/include/sysemu/block-backend-global-state.h
|
||||
@@ -42,7 +42,10 @@ blk_co_new_open(const char *filename, const char *reference, QDict *options,
|
||||
|
||||
int blk_get_refcnt(BlockBackend *blk);
|
||||
void blk_ref(BlockBackend *blk);
|
||||
-void blk_unref(BlockBackend *blk);
|
||||
+
|
||||
+void no_coroutine_fn blk_unref(BlockBackend *blk);
|
||||
+void coroutine_fn no_co_wrapper blk_co_unref(BlockBackend *blk);
|
||||
+
|
||||
void blk_remove_all_bs(void);
|
||||
BlockBackend *blk_by_name(const char *name);
|
||||
BlockBackend *blk_next(BlockBackend *blk);
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,74 +0,0 @@
|
||||
From b1f0546548e561856252c2bc610a8f4f8fcdf007 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Wed, 26 Jul 2023 09:48:07 +0200
|
||||
Subject: [PATCH 02/14] block/blkio: do not use open flags in qemu_open()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [2/6] 1ccd0ef56182bb5e2374c3b5be98ee1ec05066d6 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
qemu_open() in blkio_virtio_blk_common_open() is used to open the
|
||||
character device (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or in
|
||||
the future eventually the unix socket.
|
||||
|
||||
In all these cases we cannot open the path in read-only mode,
|
||||
when the `read-only` option of blockdev is on, because the exchange
|
||||
of IOCTL commands for example will fail.
|
||||
|
||||
In order to open the device read-only, we have to use the `read-only`
|
||||
property of the libblkio driver as we already do in blkio_file_open().
|
||||
|
||||
Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk")
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2225439
|
||||
Reported-by: Qing Wang <qinwang@redhat.com>
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Message-id: 20230726074807.14041-1-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit a5942c177b7bcc1357e496b7d68668befcfc2bb9)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 21 ++++++++++++---------
|
||||
1 file changed, 12 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 3ea9841bd8..5a82c6cb1a 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -685,15 +685,18 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs,
|
||||
* layer through the "/dev/fdset/N" special path.
|
||||
*/
|
||||
if (fd_supported) {
|
||||
- int open_flags;
|
||||
-
|
||||
- if (flags & BDRV_O_RDWR) {
|
||||
- open_flags = O_RDWR;
|
||||
- } else {
|
||||
- open_flags = O_RDONLY;
|
||||
- }
|
||||
-
|
||||
- fd = qemu_open(path, open_flags, errp);
|
||||
+ /*
|
||||
+ * `path` can contain the path of a character device
|
||||
+ * (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or a unix socket.
|
||||
+ *
|
||||
+ * So, we should always open it with O_RDWR flag, also if BDRV_O_RDWR
|
||||
+ * is not set in the open flags, because the exchange of IOCTL commands
|
||||
+ * for example will fail.
|
||||
+ *
|
||||
+ * In order to open the device read-only, we are using the `read-only`
|
||||
+ * property of the libblkio driver in blkio_file_open().
|
||||
+ */
|
||||
+ fd = qemu_open(path, O_RDWR, errp);
|
||||
if (fd < 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,54 +0,0 @@
|
||||
From ef99db21e9469f3fc946b7bf3edc1837d7b24e0b Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Tue, 25 Jul 2023 12:37:44 +0200
|
||||
Subject: [PATCH 01/14] block/blkio: enable the completion eventfd
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [1/6] d91b3a465942863550130105ae2f38f47a82a360 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Until libblkio 1.3.0, virtio-blk drivers had completion eventfd
|
||||
notifications enabled from the start, but from the next releases
|
||||
this is no longer the case, so we have to explicitly enable them.
|
||||
|
||||
In fact, the libblkio documentation says they could be disabled,
|
||||
so we should always enable them at the start if we want to be
|
||||
sure to get completion eventfd notifications:
|
||||
|
||||
By default, the driver might not generate completion events for
|
||||
requests so it is necessary to explicitly enable the completion
|
||||
file descriptor before use:
|
||||
|
||||
void blkioq_set_completion_fd_enabled(struct blkioq *q, bool enable);
|
||||
|
||||
I discovered this while trying a development version of libblkio:
|
||||
the guest kernel hangs during boot, while probing the device.
|
||||
|
||||
Fixes: fd66dbd424f5 ("blkio: add libblkio block driver")
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230725103744.77343-1-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 9359c459889fce1804c4e1b2a2ff8f182b4a9ae8)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index afcec359f2..3ea9841bd8 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -844,6 +844,7 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
QLIST_INIT(&s->bounce_bufs);
|
||||
s->blkioq = blkio_get_queue(s->blkio, 0);
|
||||
s->completion_fd = blkioq_get_completion_fd(s->blkioq);
|
||||
+ blkioq_set_completion_fd_enabled(s->blkioq, true);
|
||||
|
||||
blkio_attach_aio_context(bs, bdrv_get_aio_context(bs));
|
||||
return 0;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,67 +0,0 @@
|
||||
From c1ce3ba81698b9d52ac9dff83c01ee8141ca403d Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Thu, 27 Jul 2023 18:10:19 +0200
|
||||
Subject: [PATCH 05/14] block/blkio: fall back on using `path` when `fd`
|
||||
setting fails
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [5/6] c03cea95146a59b2830ffe2dd56ef77a6630ce3e (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
qemu_open() fails if called with an unix domain socket in this way:
|
||||
-blockdev node-name=drive0,driver=virtio-blk-vhost-user,path=vhost-user-blk.sock,cache.direct=on: Could not open 'vhost-user-blk.sock': No such device or address
|
||||
|
||||
Since virtio-blk-vhost-user does not support fd passing, let`s always fall back
|
||||
on using `path` if we fail the fd passing.
|
||||
|
||||
Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk")
|
||||
Reported-by: Qing Wang <qinwang@redhat.com>
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230727161020.84213-4-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 723bea27b127969931fa26bc0de79372a3d9e148)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 20 ++++++++++----------
|
||||
1 file changed, 10 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 93a8f8fc5c..eef80e9ce5 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -710,19 +710,19 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
|
||||
* In order to open the device read-only, we are using the `read-only`
|
||||
* property of the libblkio driver in blkio_file_open().
|
||||
*/
|
||||
- fd = qemu_open(path, O_RDWR, errp);
|
||||
+ fd = qemu_open(path, O_RDWR, NULL);
|
||||
if (fd < 0) {
|
||||
- return -EINVAL;
|
||||
+ fd_supported = false;
|
||||
+ } else {
|
||||
+ ret = blkio_set_int(s->blkio, "fd", fd);
|
||||
+ if (ret < 0) {
|
||||
+ fd_supported = false;
|
||||
+ qemu_close(fd);
|
||||
+ }
|
||||
}
|
||||
+ }
|
||||
|
||||
- ret = blkio_set_int(s->blkio, "fd", fd);
|
||||
- if (ret < 0) {
|
||||
- error_setg_errno(errp, -ret, "failed to set fd: %s",
|
||||
- blkio_get_error_msg());
|
||||
- qemu_close(fd);
|
||||
- return ret;
|
||||
- }
|
||||
- } else {
|
||||
+ if (!fd_supported) {
|
||||
ret = blkio_set_str(s->blkio, "path", path);
|
||||
if (ret < 0) {
|
||||
error_setg_errno(errp, -ret, "failed to set path: %s",
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,205 +0,0 @@
|
||||
From 545482400ea87d54b1b839587f8aaad41e30692f Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 4 Jul 2023 14:34:36 +0200
|
||||
Subject: [PATCH 36/37] block/blkio: fix module_block.py parsing
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 181: block/blkio: fix module_block.py parsing
|
||||
RH-Bugzilla: 2213317
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [1/2] c85df95824f4889526a73527771dec9efcb06926 (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
When QEMU is built with --enable-modules, the module_block.py script
|
||||
parses block/*.c to find block drivers that are built as modules. The
|
||||
script generates a table of block drivers called block_driver_modules[].
|
||||
This table is used for block driver module loading.
|
||||
|
||||
The blkio.c driver uses macros to define its BlockDriver structs. This
|
||||
was done to avoid code duplication but the module_block.py script is
|
||||
unable to parse the macro. The result is that libblkio-based block
|
||||
drivers can be built as modules but will not be found at runtime.
|
||||
|
||||
One fix is to make the module_block.py script or build system fancier so
|
||||
it can parse C macros (e.g. by parsing the preprocessed source code). I
|
||||
chose not to do this because it raises the complexity of the build,
|
||||
making future issues harder to debug.
|
||||
|
||||
Keep things simple: use the macro to avoid duplicating BlockDriver
|
||||
function pointers but define .format_name and .protocol_name manually
|
||||
for each BlockDriver. This way the module_block.py is able to parse the
|
||||
code.
|
||||
|
||||
Also get rid of the block driver name macros (e.g. DRIVER_IO_URING)
|
||||
because module_block.py cannot parse them either.
|
||||
|
||||
Fixes: fd66dbd424f5 ("blkio: add libblkio block driver")
|
||||
Reported-by: Qing Wang <qinwang@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230704123436.187761-1-stefanha@redhat.com
|
||||
Cc: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit c21eae1ccc782440f320accb6f90c66cb8f45ee9)
|
||||
|
||||
Conflicts:
|
||||
- Downstream lacks commit 28ff7b4dfbb5 ("block/blkio: convert to
|
||||
blk_io_plug_call() API") so keep the .bdrv_co_io_unplug callback.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
block/blkio.c | 118 ++++++++++++++++++++++++++------------------------
|
||||
1 file changed, 61 insertions(+), 57 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 6a6f20f923..afcec359f2 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -21,16 +21,6 @@
|
||||
|
||||
#include "block/block-io.h"
|
||||
|
||||
-/*
|
||||
- * Keep the QEMU BlockDriver names identical to the libblkio driver names.
|
||||
- * Using macros instead of typing out the string literals avoids typos.
|
||||
- */
|
||||
-#define DRIVER_IO_URING "io_uring"
|
||||
-#define DRIVER_NVME_IO_URING "nvme-io_uring"
|
||||
-#define DRIVER_VIRTIO_BLK_VFIO_PCI "virtio-blk-vfio-pci"
|
||||
-#define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user"
|
||||
-#define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa"
|
||||
-
|
||||
/*
|
||||
* Allocated bounce buffers are kept in a list sorted by buffer address.
|
||||
*/
|
||||
@@ -743,15 +733,15 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
return ret;
|
||||
}
|
||||
|
||||
- if (strcmp(blkio_driver, DRIVER_IO_URING) == 0) {
|
||||
+ if (strcmp(blkio_driver, "io_uring") == 0) {
|
||||
ret = blkio_io_uring_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) {
|
||||
+ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
|
||||
ret = blkio_nvme_io_uring(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VFIO_PCI) == 0) {
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
|
||||
ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) {
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
|
||||
ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) {
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
|
||||
ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
} else {
|
||||
g_assert_not_reached();
|
||||
@@ -1027,50 +1017,64 @@ static void blkio_refresh_limits(BlockDriverState *bs, Error **errp)
|
||||
* - truncate
|
||||
*/
|
||||
|
||||
-#define BLKIO_DRIVER(name, ...) \
|
||||
- { \
|
||||
- .format_name = name, \
|
||||
- .protocol_name = name, \
|
||||
- .instance_size = sizeof(BDRVBlkioState), \
|
||||
- .bdrv_file_open = blkio_file_open, \
|
||||
- .bdrv_close = blkio_close, \
|
||||
- .bdrv_co_getlength = blkio_co_getlength, \
|
||||
- .bdrv_co_truncate = blkio_truncate, \
|
||||
- .bdrv_co_get_info = blkio_co_get_info, \
|
||||
- .bdrv_attach_aio_context = blkio_attach_aio_context, \
|
||||
- .bdrv_detach_aio_context = blkio_detach_aio_context, \
|
||||
- .bdrv_co_pdiscard = blkio_co_pdiscard, \
|
||||
- .bdrv_co_preadv = blkio_co_preadv, \
|
||||
- .bdrv_co_pwritev = blkio_co_pwritev, \
|
||||
- .bdrv_co_flush_to_disk = blkio_co_flush, \
|
||||
- .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
|
||||
- .bdrv_co_io_unplug = blkio_co_io_unplug, \
|
||||
- .bdrv_refresh_limits = blkio_refresh_limits, \
|
||||
- .bdrv_register_buf = blkio_register_buf, \
|
||||
- .bdrv_unregister_buf = blkio_unregister_buf, \
|
||||
- __VA_ARGS__ \
|
||||
- }
|
||||
-
|
||||
-static BlockDriver bdrv_io_uring = BLKIO_DRIVER(
|
||||
- DRIVER_IO_URING,
|
||||
- .bdrv_needs_filename = true,
|
||||
-);
|
||||
-
|
||||
-static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER(
|
||||
- DRIVER_NVME_IO_URING,
|
||||
-);
|
||||
-
|
||||
-static BlockDriver bdrv_virtio_blk_vfio_pci = BLKIO_DRIVER(
|
||||
- DRIVER_VIRTIO_BLK_VFIO_PCI
|
||||
-);
|
||||
+/*
|
||||
+ * Do not include .format_name and .protocol_name because module_block.py
|
||||
+ * does not parse macros in the source code.
|
||||
+ */
|
||||
+#define BLKIO_DRIVER_COMMON \
|
||||
+ .instance_size = sizeof(BDRVBlkioState), \
|
||||
+ .bdrv_file_open = blkio_file_open, \
|
||||
+ .bdrv_close = blkio_close, \
|
||||
+ .bdrv_co_getlength = blkio_co_getlength, \
|
||||
+ .bdrv_co_truncate = blkio_truncate, \
|
||||
+ .bdrv_co_get_info = blkio_co_get_info, \
|
||||
+ .bdrv_attach_aio_context = blkio_attach_aio_context, \
|
||||
+ .bdrv_detach_aio_context = blkio_detach_aio_context, \
|
||||
+ .bdrv_co_pdiscard = blkio_co_pdiscard, \
|
||||
+ .bdrv_co_preadv = blkio_co_preadv, \
|
||||
+ .bdrv_co_pwritev = blkio_co_pwritev, \
|
||||
+ .bdrv_co_flush_to_disk = blkio_co_flush, \
|
||||
+ .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
|
||||
+ .bdrv_co_io_unplug = blkio_co_io_unplug, \
|
||||
+ .bdrv_refresh_limits = blkio_refresh_limits, \
|
||||
+ .bdrv_register_buf = blkio_register_buf, \
|
||||
+ .bdrv_unregister_buf = blkio_unregister_buf,
|
||||
|
||||
-static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER(
|
||||
- DRIVER_VIRTIO_BLK_VHOST_USER
|
||||
-);
|
||||
+/*
|
||||
+ * Use the same .format_name and .protocol_name as the libblkio driver name for
|
||||
+ * consistency.
|
||||
+ */
|
||||
|
||||
-static BlockDriver bdrv_virtio_blk_vhost_vdpa = BLKIO_DRIVER(
|
||||
- DRIVER_VIRTIO_BLK_VHOST_VDPA
|
||||
-);
|
||||
+static BlockDriver bdrv_io_uring = {
|
||||
+ .format_name = "io_uring",
|
||||
+ .protocol_name = "io_uring",
|
||||
+ .bdrv_needs_filename = true,
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
+
|
||||
+static BlockDriver bdrv_nvme_io_uring = {
|
||||
+ .format_name = "nvme-io_uring",
|
||||
+ .protocol_name = "nvme-io_uring",
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
+
|
||||
+static BlockDriver bdrv_virtio_blk_vfio_pci = {
|
||||
+ .format_name = "virtio-blk-vfio-pci",
|
||||
+ .protocol_name = "virtio-blk-vfio-pci",
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
+
|
||||
+static BlockDriver bdrv_virtio_blk_vhost_user = {
|
||||
+ .format_name = "virtio-blk-vhost-user",
|
||||
+ .protocol_name = "virtio-blk-vhost-user",
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
+
|
||||
+static BlockDriver bdrv_virtio_blk_vhost_vdpa = {
|
||||
+ .format_name = "virtio-blk-vhost-vdpa",
|
||||
+ .protocol_name = "virtio-blk-vhost-vdpa",
|
||||
+ BLKIO_DRIVER_COMMON
|
||||
+};
|
||||
|
||||
static void bdrv_blkio_init(void)
|
||||
{
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,151 +0,0 @@
|
||||
From 458c33c9f19ed01beeb9b2b494ce6ed10d2ed4ac Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Thu, 27 Jul 2023 18:10:17 +0200
|
||||
Subject: [PATCH 03/14] block/blkio: move blkio_connect() in the drivers
|
||||
functions
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [3/6] c356108d7dfe1ba2098c094f8d12b6e40853560c (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
This is in preparation for the next patch, where for virtio-blk
|
||||
drivers we need to handle the failure of blkio_connect().
|
||||
|
||||
Let's also rename the *_open() functions to *_connect() to make
|
||||
the code reflect the changes applied.
|
||||
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230727161020.84213-2-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 69785d66ae1ec43f77fc65109a21721992bead9f)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 67 ++++++++++++++++++++++++++++++---------------------
|
||||
1 file changed, 40 insertions(+), 27 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 5a82c6cb1a..85d1eed5fb 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -602,8 +602,8 @@ static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size)
|
||||
}
|
||||
}
|
||||
|
||||
-static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
- Error **errp)
|
||||
+static int blkio_io_uring_connect(BlockDriverState *bs, QDict *options,
|
||||
+ int flags, Error **errp)
|
||||
{
|
||||
const char *filename = qdict_get_str(options, "filename");
|
||||
BDRVBlkioState *s = bs->opaque;
|
||||
@@ -626,11 +626,18 @@ static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
}
|
||||
}
|
||||
|
||||
+ ret = blkio_connect(s->blkio);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags,
|
||||
- Error **errp)
|
||||
+static int blkio_nvme_io_uring_connect(BlockDriverState *bs, QDict *options,
|
||||
+ int flags, Error **errp)
|
||||
{
|
||||
const char *path = qdict_get_try_str(options, "path");
|
||||
BDRVBlkioState *s = bs->opaque;
|
||||
@@ -654,11 +661,18 @@ static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
+ ret = blkio_connect(s->blkio);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int blkio_virtio_blk_common_open(BlockDriverState *bs,
|
||||
- QDict *options, int flags, Error **errp)
|
||||
+static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
|
||||
+ int flags, Error **errp)
|
||||
{
|
||||
const char *path = qdict_get_try_str(options, "path");
|
||||
BDRVBlkioState *s = bs->opaque;
|
||||
@@ -717,6 +731,13 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs,
|
||||
}
|
||||
}
|
||||
|
||||
+ ret = blkio_connect(s->blkio);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
qdict_del(options, "path");
|
||||
|
||||
return 0;
|
||||
@@ -736,24 +757,6 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
return ret;
|
||||
}
|
||||
|
||||
- if (strcmp(blkio_driver, "io_uring") == 0) {
|
||||
- ret = blkio_io_uring_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
|
||||
- ret = blkio_nvme_io_uring(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
|
||||
- ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
|
||||
- ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
|
||||
- ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
|
||||
- } else {
|
||||
- g_assert_not_reached();
|
||||
- }
|
||||
- if (ret < 0) {
|
||||
- blkio_destroy(&s->blkio);
|
||||
- return ret;
|
||||
- }
|
||||
-
|
||||
if (!(flags & BDRV_O_RDWR)) {
|
||||
ret = blkio_set_bool(s->blkio, "read-only", true);
|
||||
if (ret < 0) {
|
||||
@@ -764,10 +767,20 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
}
|
||||
}
|
||||
|
||||
- ret = blkio_connect(s->blkio);
|
||||
+ if (strcmp(blkio_driver, "io_uring") == 0) {
|
||||
+ ret = blkio_io_uring_connect(bs, options, flags, errp);
|
||||
+ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
|
||||
+ ret = blkio_nvme_io_uring_connect(bs, options, flags, errp);
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
|
||||
+ ret = blkio_virtio_blk_connect(bs, options, flags, errp);
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
|
||||
+ ret = blkio_virtio_blk_connect(bs, options, flags, errp);
|
||||
+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
|
||||
+ ret = blkio_virtio_blk_connect(bs, options, flags, errp);
|
||||
+ } else {
|
||||
+ g_assert_not_reached();
|
||||
+ }
|
||||
if (ret < 0) {
|
||||
- error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
- blkio_get_error_msg());
|
||||
blkio_destroy(&s->blkio);
|
||||
return ret;
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,85 +0,0 @@
|
||||
From ece855a71d9234c58497f37cb5498f507742167d Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Thu, 27 Jul 2023 18:10:18 +0200
|
||||
Subject: [PATCH 04/14] block/blkio: retry blkio_connect() if it fails using
|
||||
`fd`
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [4/6] 14ebc1f333617ce22c68693dec1c9a186d4f8a08 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
libblkio 1.3.0 added support of "fd" property for virtio-blk-vhost-vdpa
|
||||
driver. In QEMU, starting from commit cad2ccc395 ("block/blkio: use
|
||||
qemu_open() to support fd passing for virtio-blk") we are using
|
||||
`blkio_get_int(..., "fd")` to check if the "fd" property is supported
|
||||
for all the virtio-blk-* driver.
|
||||
|
||||
Unfortunately that property is also available for those driver that do
|
||||
not support it, such as virtio-blk-vhost-user.
|
||||
|
||||
So, `blkio_get_int()` is not enough to check whether the driver supports
|
||||
the `fd` property or not. This is because the virito-blk common libblkio
|
||||
driver only checks whether or not `fd` is set during `blkio_connect()`
|
||||
and fails with -EINVAL for those transports that do not support it
|
||||
(all except vhost-vdpa for now).
|
||||
|
||||
So let's handle the `blkio_connect()` failure, retrying it using `path`
|
||||
directly.
|
||||
|
||||
Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk")
|
||||
Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230727161020.84213-3-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 809c319f8a089fbc49223dc29e1cc2b978beeada)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 29 +++++++++++++++++++++++++++++
|
||||
1 file changed, 29 insertions(+)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 85d1eed5fb..93a8f8fc5c 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -732,6 +732,35 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
|
||||
}
|
||||
|
||||
ret = blkio_connect(s->blkio);
|
||||
+ /*
|
||||
+ * If the libblkio driver doesn't support the `fd` property, blkio_connect()
|
||||
+ * will fail with -EINVAL. So let's try calling blkio_connect() again by
|
||||
+ * directly setting `path`.
|
||||
+ */
|
||||
+ if (fd_supported && ret == -EINVAL) {
|
||||
+ qemu_close(fd);
|
||||
+
|
||||
+ /*
|
||||
+ * We need to clear the `fd` property we set previously by setting
|
||||
+ * it to -1.
|
||||
+ */
|
||||
+ ret = blkio_set_int(s->blkio, "fd", -1);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "failed to set fd: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ ret = blkio_set_str(s->blkio, "path", path);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "failed to set path: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ ret = blkio_connect(s->blkio);
|
||||
+ }
|
||||
+
|
||||
if (ret < 0) {
|
||||
error_setg_errno(errp, -ret, "blkio_connect failed: %s",
|
||||
blkio_get_error_msg());
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,49 +0,0 @@
|
||||
From 2f4436e7cc2f63d198229dc8ba32783460c0b185 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Thu, 27 Jul 2023 18:10:20 +0200
|
||||
Subject: [PATCH 06/14] block/blkio: use blkio_set_int("fd") to check fd
|
||||
support
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers
|
||||
RH-Bugzilla: 2225354 2225439
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Alberto Faria <None>
|
||||
RH-Commit: [6/6] d57aafb2c3a8ed13aa3c6dcce5525a9cc8f5aa21 (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Setting the `fd` property fails with virtio-blk-* libblkio drivers
|
||||
that do not support fd passing since
|
||||
https://gitlab.com/libblkio/libblkio/-/merge_requests/208.
|
||||
|
||||
Getting the `fd` property, on the other hand, always succeeds for
|
||||
virtio-blk-* libblkio drivers even when they don't support fd passing.
|
||||
|
||||
This patch switches to setting the `fd` property because it is a
|
||||
better mechanism for probing fd passing support than getting the `fd`
|
||||
property.
|
||||
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230727161020.84213-5-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 1c38fe69e2b8a05c1762b122292fa7e3662f06fd)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index eef80e9ce5..8defbf744f 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -689,7 +689,7 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
- if (blkio_get_int(s->blkio, "fd", &fd) == 0) {
|
||||
+ if (blkio_set_int(s->blkio, "fd", -1) == 0) {
|
||||
fd_supported = true;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,108 +0,0 @@
|
||||
From fd57241cf0f8c2906fa56118f8da1e65a5b1e4d8 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Date: Tue, 30 May 2023 09:19:40 +0200
|
||||
Subject: [PATCH 3/5] block/blkio: use qemu_open() to support fd passing for
|
||||
virtio-blk
|
||||
|
||||
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-MergeRequest: 169: block/blkio: support fd passing for virtio-blk-vhost-vdpa driver
|
||||
RH-Bugzilla: 2180076
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [1/2] 9ff1a1510500db101648341207a36318a0c41c5a (sgarzarella/qemu-kvm-c-9-s)
|
||||
|
||||
Some virtio-blk drivers (e.g. virtio-blk-vhost-vdpa) supports the fd
|
||||
passing. Let's expose this to the user, so the management layer
|
||||
can pass the file descriptor of an already opened path.
|
||||
|
||||
If the libblkio virtio-blk driver supports fd passing, let's always
|
||||
use qemu_open() to open the `path`, so we can handle fd passing
|
||||
from the management layer through the "/dev/fdset/N" special path.
|
||||
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Message-id: 20230530071941.8954-2-sgarzare@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit cad2ccc395c7113fb30bc9390774b67b34f06c68)
|
||||
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
---
|
||||
block/blkio.c | 53 ++++++++++++++++++++++++++++++++++++++++++---------
|
||||
1 file changed, 44 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/block/blkio.c b/block/blkio.c
|
||||
index 0cdc99a729..6a6f20f923 100644
|
||||
--- a/block/blkio.c
|
||||
+++ b/block/blkio.c
|
||||
@@ -672,25 +672,60 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs,
|
||||
{
|
||||
const char *path = qdict_get_try_str(options, "path");
|
||||
BDRVBlkioState *s = bs->opaque;
|
||||
- int ret;
|
||||
+ bool fd_supported = false;
|
||||
+ int fd, ret;
|
||||
|
||||
if (!path) {
|
||||
error_setg(errp, "missing 'path' option");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
- ret = blkio_set_str(s->blkio, "path", path);
|
||||
- qdict_del(options, "path");
|
||||
- if (ret < 0) {
|
||||
- error_setg_errno(errp, -ret, "failed to set path: %s",
|
||||
- blkio_get_error_msg());
|
||||
- return ret;
|
||||
- }
|
||||
-
|
||||
if (!(flags & BDRV_O_NOCACHE)) {
|
||||
error_setg(errp, "cache.direct=off is not supported");
|
||||
return -EINVAL;
|
||||
}
|
||||
+
|
||||
+ if (blkio_get_int(s->blkio, "fd", &fd) == 0) {
|
||||
+ fd_supported = true;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * If the libblkio driver supports fd passing, let's always use qemu_open()
|
||||
+ * to open the `path`, so we can handle fd passing from the management
|
||||
+ * layer through the "/dev/fdset/N" special path.
|
||||
+ */
|
||||
+ if (fd_supported) {
|
||||
+ int open_flags;
|
||||
+
|
||||
+ if (flags & BDRV_O_RDWR) {
|
||||
+ open_flags = O_RDWR;
|
||||
+ } else {
|
||||
+ open_flags = O_RDONLY;
|
||||
+ }
|
||||
+
|
||||
+ fd = qemu_open(path, open_flags, errp);
|
||||
+ if (fd < 0) {
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ ret = blkio_set_int(s->blkio, "fd", fd);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "failed to set fd: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ qemu_close(fd);
|
||||
+ return ret;
|
||||
+ }
|
||||
+ } else {
|
||||
+ ret = blkio_set_str(s->blkio, "path", path);
|
||||
+ if (ret < 0) {
|
||||
+ error_setg_errno(errp, -ret, "failed to set path: %s",
|
||||
+ blkio_get_error_msg());
|
||||
+ return ret;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ qdict_del(options, "path");
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,121 +0,0 @@
|
||||
From d9190117f3c701380701d6e9b2aa3c2446b9708f Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Mon, 1 May 2023 13:34:43 -0400
|
||||
Subject: [PATCH 01/21] block: compile out assert_bdrv_graph_readable() by
|
||||
default
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 166: block/graph-lock: Disable locking for now
|
||||
RH-Bugzilla: 2186725
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [1/4] d8cb4bb832c85e8216d97e57679a34c7bc6a8f71 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
reader_count() is a performance bottleneck because the global
|
||||
aio_context_list_lock mutex causes thread contention. Put this debugging
|
||||
assertion behind a new ./configure --enable-debug-graph-lock option and
|
||||
disable it by default.
|
||||
|
||||
The --enable-debug-graph-lock option is also enabled by the more general
|
||||
--enable-debug option.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20230501173443.153062-1-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 58a2e3f5c37be02dac3086b81bdda9414b931edf)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block/graph-lock.c | 3 +++
|
||||
configure | 1 +
|
||||
meson.build | 2 ++
|
||||
meson_options.txt | 2 ++
|
||||
scripts/meson-buildoptions.sh | 4 ++++
|
||||
5 files changed, 12 insertions(+)
|
||||
|
||||
diff --git a/block/graph-lock.c b/block/graph-lock.c
|
||||
index 454c31e691..259a7a0bde 100644
|
||||
--- a/block/graph-lock.c
|
||||
+++ b/block/graph-lock.c
|
||||
@@ -265,7 +265,10 @@ void bdrv_graph_rdunlock_main_loop(void)
|
||||
|
||||
void assert_bdrv_graph_readable(void)
|
||||
{
|
||||
+ /* reader_count() is slow due to aio_context_list_lock lock contention */
|
||||
+#ifdef CONFIG_DEBUG_GRAPH_LOCK
|
||||
assert(qemu_in_main_thread() || reader_count());
|
||||
+#endif
|
||||
}
|
||||
|
||||
void assert_bdrv_graph_writable(void)
|
||||
diff --git a/configure b/configure
|
||||
index 800b5850f4..a62a3e6be9 100755
|
||||
--- a/configure
|
||||
+++ b/configure
|
||||
@@ -806,6 +806,7 @@ for opt do
|
||||
--enable-debug)
|
||||
# Enable debugging options that aren't excessively noisy
|
||||
debug_tcg="yes"
|
||||
+ meson_option_parse --enable-debug-graph-lock ""
|
||||
meson_option_parse --enable-debug-mutex ""
|
||||
meson_option_add -Doptimization=0
|
||||
fortify_source="no"
|
||||
diff --git a/meson.build b/meson.build
|
||||
index c44d05a13f..d964e741e7 100644
|
||||
--- a/meson.build
|
||||
+++ b/meson.build
|
||||
@@ -1956,6 +1956,7 @@ if get_option('debug_stack_usage') and have_coroutine_pool
|
||||
have_coroutine_pool = false
|
||||
endif
|
||||
config_host_data.set10('CONFIG_COROUTINE_POOL', have_coroutine_pool)
|
||||
+config_host_data.set('CONFIG_DEBUG_GRAPH_LOCK', get_option('debug_graph_lock'))
|
||||
config_host_data.set('CONFIG_DEBUG_MUTEX', get_option('debug_mutex'))
|
||||
config_host_data.set('CONFIG_DEBUG_STACK_USAGE', get_option('debug_stack_usage'))
|
||||
config_host_data.set('CONFIG_GPROF', get_option('gprof'))
|
||||
@@ -3833,6 +3834,7 @@ summary_info += {'PIE': get_option('b_pie')}
|
||||
summary_info += {'static build': config_host.has_key('CONFIG_STATIC')}
|
||||
summary_info += {'malloc trim support': has_malloc_trim}
|
||||
summary_info += {'membarrier': have_membarrier}
|
||||
+summary_info += {'debug graph lock': get_option('debug_graph_lock')}
|
||||
summary_info += {'debug stack usage': get_option('debug_stack_usage')}
|
||||
summary_info += {'mutex debugging': get_option('debug_mutex')}
|
||||
summary_info += {'memory allocator': get_option('malloc')}
|
||||
diff --git a/meson_options.txt b/meson_options.txt
|
||||
index fc9447d267..bc857fe68b 100644
|
||||
--- a/meson_options.txt
|
||||
+++ b/meson_options.txt
|
||||
@@ -311,6 +311,8 @@ option('rng_none', type: 'boolean', value: false,
|
||||
description: 'dummy RNG, avoid using /dev/(u)random and getrandom()')
|
||||
option('coroutine_pool', type: 'boolean', value: true,
|
||||
description: 'coroutine freelist (better performance)')
|
||||
+option('debug_graph_lock', type: 'boolean', value: false,
|
||||
+ description: 'graph lock debugging support')
|
||||
option('debug_mutex', type: 'boolean', value: false,
|
||||
description: 'mutex debugging support')
|
||||
option('debug_stack_usage', type: 'boolean', value: false,
|
||||
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
|
||||
index 009fab1515..30e1f25259 100644
|
||||
--- a/scripts/meson-buildoptions.sh
|
||||
+++ b/scripts/meson-buildoptions.sh
|
||||
@@ -21,6 +21,8 @@ meson_options_help() {
|
||||
printf "%s\n" ' QEMU'
|
||||
printf "%s\n" ' --enable-cfi Control-Flow Integrity (CFI)'
|
||||
printf "%s\n" ' --enable-cfi-debug Verbose errors in case of CFI violation'
|
||||
+ printf "%s\n" ' --enable-debug-graph-lock'
|
||||
+ printf "%s\n" ' graph lock debugging support'
|
||||
printf "%s\n" ' --enable-debug-mutex mutex debugging support'
|
||||
printf "%s\n" ' --enable-debug-stack-usage'
|
||||
printf "%s\n" ' measure coroutine stack usage'
|
||||
@@ -249,6 +251,8 @@ _meson_option_parse() {
|
||||
--datadir=*) quote_sh "-Ddatadir=$2" ;;
|
||||
--enable-dbus-display) printf "%s" -Ddbus_display=enabled ;;
|
||||
--disable-dbus-display) printf "%s" -Ddbus_display=disabled ;;
|
||||
+ --enable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=true ;;
|
||||
+ --disable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=false ;;
|
||||
--enable-debug-mutex) printf "%s" -Ddebug_mutex=true ;;
|
||||
--disable-debug-mutex) printf "%s" -Ddebug_mutex=false ;;
|
||||
--enable-debug-stack-usage) printf "%s" -Ddebug_stack_usage=true ;;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,330 @@
|
||||
From a67edfb4b591acdffc5b4987601a30224376996f Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Mon, 27 May 2024 11:58:50 -0400
|
||||
Subject: [PATCH 4/5] block/crypto: create ciphers on demand
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 251: block/crypto: create ciphers on demand
|
||||
RH-Jira: RHEL-36159
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/2] 22a4c87fef774cad98a6f5a79f27df50a208013d (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
Ciphers are pre-allocated by qcrypto_block_init_cipher() depending on
|
||||
the given number of threads. The -device
|
||||
virtio-blk-pci,iothread-vq-mapping= feature allows users to assign
|
||||
multiple IOThreads to a virtio-blk device, but the association between
|
||||
the virtio-blk device and the block driver happens after the block
|
||||
driver is already open.
|
||||
|
||||
When the number of threads given to qcrypto_block_init_cipher() is
|
||||
smaller than the actual number of threads at runtime, the
|
||||
block->n_free_ciphers > 0 assertion in qcrypto_block_pop_cipher() can
|
||||
fail.
|
||||
|
||||
Get rid of qcrypto_block_init_cipher() n_thread's argument and allocate
|
||||
ciphers on demand.
|
||||
|
||||
Reported-by: Qing Wang <qinwang@redhat.com>
|
||||
Buglink: https://issues.redhat.com/browse/RHEL-36159
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20240527155851.892885-2-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Acked-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit af206c284e4c1b17cdfb0f17e898b288c0fc1751)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
crypto/block-luks.c | 3 +-
|
||||
crypto/block-qcow.c | 2 +-
|
||||
crypto/block.c | 111 ++++++++++++++++++++++++++------------------
|
||||
crypto/blockpriv.h | 12 +++--
|
||||
4 files changed, 78 insertions(+), 50 deletions(-)
|
||||
|
||||
diff --git a/crypto/block-luks.c b/crypto/block-luks.c
|
||||
index 3ee928fb5a..3357852c0a 100644
|
||||
--- a/crypto/block-luks.c
|
||||
+++ b/crypto/block-luks.c
|
||||
@@ -1262,7 +1262,6 @@ qcrypto_block_luks_open(QCryptoBlock *block,
|
||||
luks->cipher_mode,
|
||||
masterkey,
|
||||
luks->header.master_key_len,
|
||||
- n_threads,
|
||||
errp) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
@@ -1456,7 +1455,7 @@ qcrypto_block_luks_create(QCryptoBlock *block,
|
||||
/* Setup the block device payload encryption objects */
|
||||
if (qcrypto_block_init_cipher(block, luks_opts.cipher_alg,
|
||||
luks_opts.cipher_mode, masterkey,
|
||||
- luks->header.master_key_len, 1, errp) < 0) {
|
||||
+ luks->header.master_key_len, errp) < 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
diff --git a/crypto/block-qcow.c b/crypto/block-qcow.c
|
||||
index 4d7cf36a8f..02305058e3 100644
|
||||
--- a/crypto/block-qcow.c
|
||||
+++ b/crypto/block-qcow.c
|
||||
@@ -75,7 +75,7 @@ qcrypto_block_qcow_init(QCryptoBlock *block,
|
||||
ret = qcrypto_block_init_cipher(block, QCRYPTO_CIPHER_ALG_AES_128,
|
||||
QCRYPTO_CIPHER_MODE_CBC,
|
||||
keybuf, G_N_ELEMENTS(keybuf),
|
||||
- n_threads, errp);
|
||||
+ errp);
|
||||
if (ret < 0) {
|
||||
ret = -ENOTSUP;
|
||||
goto fail;
|
||||
diff --git a/crypto/block.c b/crypto/block.c
|
||||
index 506ea1d1a3..ba6d1cebc7 100644
|
||||
--- a/crypto/block.c
|
||||
+++ b/crypto/block.c
|
||||
@@ -20,6 +20,7 @@
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qapi/error.h"
|
||||
+#include "qemu/lockable.h"
|
||||
#include "blockpriv.h"
|
||||
#include "block-qcow.h"
|
||||
#include "block-luks.h"
|
||||
@@ -57,6 +58,8 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,
|
||||
{
|
||||
QCryptoBlock *block = g_new0(QCryptoBlock, 1);
|
||||
|
||||
+ qemu_mutex_init(&block->mutex);
|
||||
+
|
||||
block->format = options->format;
|
||||
|
||||
if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
|
||||
@@ -76,8 +79,6 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
- qemu_mutex_init(&block->mutex);
|
||||
-
|
||||
return block;
|
||||
}
|
||||
|
||||
@@ -92,6 +93,8 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
|
||||
{
|
||||
QCryptoBlock *block = g_new0(QCryptoBlock, 1);
|
||||
|
||||
+ qemu_mutex_init(&block->mutex);
|
||||
+
|
||||
block->format = options->format;
|
||||
|
||||
if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
|
||||
@@ -111,8 +114,6 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
- qemu_mutex_init(&block->mutex);
|
||||
-
|
||||
return block;
|
||||
}
|
||||
|
||||
@@ -227,37 +228,42 @@ QCryptoCipher *qcrypto_block_get_cipher(QCryptoBlock *block)
|
||||
* This function is used only in test with one thread (it's safe to skip
|
||||
* pop/push interface), so it's enough to assert it here:
|
||||
*/
|
||||
- assert(block->n_ciphers <= 1);
|
||||
- return block->ciphers ? block->ciphers[0] : NULL;
|
||||
+ assert(block->max_free_ciphers <= 1);
|
||||
+ return block->free_ciphers ? block->free_ciphers[0] : NULL;
|
||||
}
|
||||
|
||||
|
||||
-static QCryptoCipher *qcrypto_block_pop_cipher(QCryptoBlock *block)
|
||||
+static QCryptoCipher *qcrypto_block_pop_cipher(QCryptoBlock *block,
|
||||
+ Error **errp)
|
||||
{
|
||||
- QCryptoCipher *cipher;
|
||||
-
|
||||
- qemu_mutex_lock(&block->mutex);
|
||||
-
|
||||
- assert(block->n_free_ciphers > 0);
|
||||
- block->n_free_ciphers--;
|
||||
- cipher = block->ciphers[block->n_free_ciphers];
|
||||
-
|
||||
- qemu_mutex_unlock(&block->mutex);
|
||||
+ /* Usually there is a free cipher available */
|
||||
+ WITH_QEMU_LOCK_GUARD(&block->mutex) {
|
||||
+ if (block->n_free_ciphers > 0) {
|
||||
+ block->n_free_ciphers--;
|
||||
+ return block->free_ciphers[block->n_free_ciphers];
|
||||
+ }
|
||||
+ }
|
||||
|
||||
- return cipher;
|
||||
+ /* Otherwise allocate a new cipher */
|
||||
+ return qcrypto_cipher_new(block->alg, block->mode, block->key,
|
||||
+ block->nkey, errp);
|
||||
}
|
||||
|
||||
|
||||
static void qcrypto_block_push_cipher(QCryptoBlock *block,
|
||||
QCryptoCipher *cipher)
|
||||
{
|
||||
- qemu_mutex_lock(&block->mutex);
|
||||
+ QEMU_LOCK_GUARD(&block->mutex);
|
||||
|
||||
- assert(block->n_free_ciphers < block->n_ciphers);
|
||||
- block->ciphers[block->n_free_ciphers] = cipher;
|
||||
- block->n_free_ciphers++;
|
||||
+ if (block->n_free_ciphers == block->max_free_ciphers) {
|
||||
+ block->max_free_ciphers++;
|
||||
+ block->free_ciphers = g_renew(QCryptoCipher *,
|
||||
+ block->free_ciphers,
|
||||
+ block->max_free_ciphers);
|
||||
+ }
|
||||
|
||||
- qemu_mutex_unlock(&block->mutex);
|
||||
+ block->free_ciphers[block->n_free_ciphers] = cipher;
|
||||
+ block->n_free_ciphers++;
|
||||
}
|
||||
|
||||
|
||||
@@ -265,24 +271,31 @@ int qcrypto_block_init_cipher(QCryptoBlock *block,
|
||||
QCryptoCipherAlgorithm alg,
|
||||
QCryptoCipherMode mode,
|
||||
const uint8_t *key, size_t nkey,
|
||||
- size_t n_threads, Error **errp)
|
||||
+ Error **errp)
|
||||
{
|
||||
- size_t i;
|
||||
+ QCryptoCipher *cipher;
|
||||
|
||||
- assert(!block->ciphers && !block->n_ciphers && !block->n_free_ciphers);
|
||||
+ assert(!block->free_ciphers && !block->max_free_ciphers &&
|
||||
+ !block->n_free_ciphers);
|
||||
|
||||
- block->ciphers = g_new0(QCryptoCipher *, n_threads);
|
||||
+ /* Stash away cipher parameters for qcrypto_block_pop_cipher() */
|
||||
+ block->alg = alg;
|
||||
+ block->mode = mode;
|
||||
+ block->key = g_memdup2(key, nkey);
|
||||
+ block->nkey = nkey;
|
||||
|
||||
- for (i = 0; i < n_threads; i++) {
|
||||
- block->ciphers[i] = qcrypto_cipher_new(alg, mode, key, nkey, errp);
|
||||
- if (!block->ciphers[i]) {
|
||||
- qcrypto_block_free_cipher(block);
|
||||
- return -1;
|
||||
- }
|
||||
- block->n_ciphers++;
|
||||
- block->n_free_ciphers++;
|
||||
+ /*
|
||||
+ * Create a new cipher to validate the parameters now. This reduces the
|
||||
+ * chance of cipher creation failing at I/O time.
|
||||
+ */
|
||||
+ cipher = qcrypto_block_pop_cipher(block, errp);
|
||||
+ if (!cipher) {
|
||||
+ g_free(block->key);
|
||||
+ block->key = NULL;
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
+ qcrypto_block_push_cipher(block, cipher);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -291,19 +304,23 @@ void qcrypto_block_free_cipher(QCryptoBlock *block)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
- if (!block->ciphers) {
|
||||
+ g_free(block->key);
|
||||
+ block->key = NULL;
|
||||
+
|
||||
+ if (!block->free_ciphers) {
|
||||
return;
|
||||
}
|
||||
|
||||
- assert(block->n_ciphers == block->n_free_ciphers);
|
||||
+ /* All popped ciphers were eventually pushed back */
|
||||
+ assert(block->n_free_ciphers == block->max_free_ciphers);
|
||||
|
||||
- for (i = 0; i < block->n_ciphers; i++) {
|
||||
- qcrypto_cipher_free(block->ciphers[i]);
|
||||
+ for (i = 0; i < block->max_free_ciphers; i++) {
|
||||
+ qcrypto_cipher_free(block->free_ciphers[i]);
|
||||
}
|
||||
|
||||
- g_free(block->ciphers);
|
||||
- block->ciphers = NULL;
|
||||
- block->n_ciphers = block->n_free_ciphers = 0;
|
||||
+ g_free(block->free_ciphers);
|
||||
+ block->free_ciphers = NULL;
|
||||
+ block->max_free_ciphers = block->n_free_ciphers = 0;
|
||||
}
|
||||
|
||||
QCryptoIVGen *qcrypto_block_get_ivgen(QCryptoBlock *block)
|
||||
@@ -311,7 +328,7 @@ QCryptoIVGen *qcrypto_block_get_ivgen(QCryptoBlock *block)
|
||||
/* ivgen should be accessed under mutex. However, this function is used only
|
||||
* in test with one thread, so it's enough to assert it here:
|
||||
*/
|
||||
- assert(block->n_ciphers <= 1);
|
||||
+ assert(block->max_free_ciphers <= 1);
|
||||
return block->ivgen;
|
||||
}
|
||||
|
||||
@@ -446,7 +463,10 @@ int qcrypto_block_decrypt_helper(QCryptoBlock *block,
|
||||
Error **errp)
|
||||
{
|
||||
int ret;
|
||||
- QCryptoCipher *cipher = qcrypto_block_pop_cipher(block);
|
||||
+ QCryptoCipher *cipher = qcrypto_block_pop_cipher(block, errp);
|
||||
+ if (!cipher) {
|
||||
+ return -1;
|
||||
+ }
|
||||
|
||||
ret = do_qcrypto_block_cipher_encdec(cipher, block->niv, block->ivgen,
|
||||
&block->mutex, sectorsize, offset, buf,
|
||||
@@ -465,7 +485,10 @@ int qcrypto_block_encrypt_helper(QCryptoBlock *block,
|
||||
Error **errp)
|
||||
{
|
||||
int ret;
|
||||
- QCryptoCipher *cipher = qcrypto_block_pop_cipher(block);
|
||||
+ QCryptoCipher *cipher = qcrypto_block_pop_cipher(block, errp);
|
||||
+ if (!cipher) {
|
||||
+ return -1;
|
||||
+ }
|
||||
|
||||
ret = do_qcrypto_block_cipher_encdec(cipher, block->niv, block->ivgen,
|
||||
&block->mutex, sectorsize, offset, buf,
|
||||
diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h
|
||||
index 836f3b4726..4bf6043d5d 100644
|
||||
--- a/crypto/blockpriv.h
|
||||
+++ b/crypto/blockpriv.h
|
||||
@@ -32,8 +32,14 @@ struct QCryptoBlock {
|
||||
const QCryptoBlockDriver *driver;
|
||||
void *opaque;
|
||||
|
||||
- QCryptoCipher **ciphers;
|
||||
- size_t n_ciphers;
|
||||
+ /* Cipher parameters */
|
||||
+ QCryptoCipherAlgorithm alg;
|
||||
+ QCryptoCipherMode mode;
|
||||
+ uint8_t *key;
|
||||
+ size_t nkey;
|
||||
+
|
||||
+ QCryptoCipher **free_ciphers;
|
||||
+ size_t max_free_ciphers;
|
||||
size_t n_free_ciphers;
|
||||
QCryptoIVGen *ivgen;
|
||||
QemuMutex mutex;
|
||||
@@ -130,7 +136,7 @@ int qcrypto_block_init_cipher(QCryptoBlock *block,
|
||||
QCryptoCipherAlgorithm alg,
|
||||
QCryptoCipherMode mode,
|
||||
const uint8_t *key, size_t nkey,
|
||||
- size_t n_threads, Error **errp);
|
||||
+ Error **errp);
|
||||
|
||||
void qcrypto_block_free_cipher(QCryptoBlock *block);
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,55 +0,0 @@
|
||||
From 961bc392ee60743344236ddd247ab646a0eec914 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 07/21] checkpatch: add qemu_bh_new/aio_bh_new checks
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [3/13] e0473487f0e3186c42559a5c36a8650f27ab26ae (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit ef56ffbdd6b0605dc1e305611287b948c970e236
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:08 2023 -0400
|
||||
|
||||
checkpatch: add qemu_bh_new/aio_bh_new checks
|
||||
|
||||
Advise authors to use the _guarded versions of the APIs, instead.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Message-Id: <20230427211013.2994127-4-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
scripts/checkpatch.pl | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
|
||||
index d768171dcf..eeaec436eb 100755
|
||||
--- a/scripts/checkpatch.pl
|
||||
+++ b/scripts/checkpatch.pl
|
||||
@@ -2865,6 +2865,14 @@ sub process {
|
||||
if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) {
|
||||
ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr);
|
||||
}
|
||||
+# recommend qemu_bh_new_guarded instead of qemu_bh_new
|
||||
+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) {
|
||||
+ ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr);
|
||||
+ }
|
||||
+# recommend aio_bh_new_guarded instead of aio_bh_new
|
||||
+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) {
|
||||
+ ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr);
|
||||
+ }
|
||||
# check for module_init(), use category-specific init macros explicitly please
|
||||
if ($line =~ /^module_init\s*\(/) {
|
||||
ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,90 @@
|
||||
From 0f0a3a860a07addea21a0282556a5022b9cb8b2c Mon Sep 17 00:00:00 2001
|
||||
From: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Date: Thu, 29 Feb 2024 01:00:35 -0500
|
||||
Subject: [PATCH 011/100] confidential guest support: Add kvm_init() and
|
||||
kvm_reset() in class
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [11/91] 21d2178178bf181a8e4d0b051f64bd983f0d0cf1 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Different confidential VMs in different architectures all have the same
|
||||
needs to do their specific initialization (and maybe resetting) stuffs
|
||||
with KVM. Currently each of them exposes individual *_kvm_init()
|
||||
functions and let machine code or kvm code to call it.
|
||||
|
||||
To facilitate the introduction of confidential guest technology from
|
||||
different x86 vendors, add two virtual functions, kvm_init() and kvm_reset()
|
||||
in ConfidentialGuestSupportClass, and expose two helpers functions for
|
||||
invodking them.
|
||||
|
||||
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 41a605944e3fecae43ca18ded95ec31f28e0c7fe)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
include/exec/confidential-guest-support.h | 34 ++++++++++++++++++++++-
|
||||
1 file changed, 33 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h
|
||||
index ba2dd4b5df..e5b188cffb 100644
|
||||
--- a/include/exec/confidential-guest-support.h
|
||||
+++ b/include/exec/confidential-guest-support.h
|
||||
@@ -23,7 +23,10 @@
|
||||
#include "qom/object.h"
|
||||
|
||||
#define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support"
|
||||
-OBJECT_DECLARE_SIMPLE_TYPE(ConfidentialGuestSupport, CONFIDENTIAL_GUEST_SUPPORT)
|
||||
+OBJECT_DECLARE_TYPE(ConfidentialGuestSupport,
|
||||
+ ConfidentialGuestSupportClass,
|
||||
+ CONFIDENTIAL_GUEST_SUPPORT)
|
||||
+
|
||||
|
||||
struct ConfidentialGuestSupport {
|
||||
Object parent;
|
||||
@@ -55,8 +58,37 @@ struct ConfidentialGuestSupport {
|
||||
|
||||
typedef struct ConfidentialGuestSupportClass {
|
||||
ObjectClass parent;
|
||||
+
|
||||
+ int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp);
|
||||
+ int (*kvm_reset)(ConfidentialGuestSupport *cgs, Error **errp);
|
||||
} ConfidentialGuestSupportClass;
|
||||
|
||||
+static inline int confidential_guest_kvm_init(ConfidentialGuestSupport *cgs,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ ConfidentialGuestSupportClass *klass;
|
||||
+
|
||||
+ klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs);
|
||||
+ if (klass->kvm_init) {
|
||||
+ return klass->kvm_init(cgs, errp);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static inline int confidential_guest_kvm_reset(ConfidentialGuestSupport *cgs,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ ConfidentialGuestSupportClass *klass;
|
||||
+
|
||||
+ klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs);
|
||||
+ if (klass->kvm_reset) {
|
||||
+ return klass->kvm_reset(cgs, errp);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
#endif /* !CONFIG_USER_ONLY */
|
||||
|
||||
#endif /* QEMU_CONFIDENTIAL_GUEST_SUPPORT_H */
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,228 @@
|
||||
From 117486e0820f135f191e19f8ebb8838a98b121c6 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Mon, 27 May 2024 11:58:51 -0400
|
||||
Subject: [PATCH 5/5] crypto/block: drop qcrypto_block_open() n_threads
|
||||
argument
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 251: block/crypto: create ciphers on demand
|
||||
RH-Jira: RHEL-36159
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/2] 68290935b174b1f2b76aa857a926da9011e54abe (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
The n_threads argument is no longer used since the previous commit.
|
||||
Remove it.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20240527155851.892885-3-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Acked-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 3ab0f063e58ed9224237d69c4211ca83335164c4)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
block/crypto.c | 1 -
|
||||
block/qcow.c | 2 +-
|
||||
block/qcow2.c | 5 ++---
|
||||
crypto/block-luks.c | 1 -
|
||||
crypto/block-qcow.c | 6 ++----
|
||||
crypto/block.c | 3 +--
|
||||
crypto/blockpriv.h | 1 -
|
||||
include/crypto/block.h | 2 --
|
||||
tests/unit/test-crypto-block.c | 4 ----
|
||||
9 files changed, 6 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/block/crypto.c b/block/crypto.c
|
||||
index 21eed909c1..4eed3ffa6a 100644
|
||||
--- a/block/crypto.c
|
||||
+++ b/block/crypto.c
|
||||
@@ -363,7 +363,6 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
|
||||
block_crypto_read_func,
|
||||
bs,
|
||||
cflags,
|
||||
- 1,
|
||||
errp);
|
||||
|
||||
if (!crypto->block) {
|
||||
diff --git a/block/qcow.c b/block/qcow.c
|
||||
index ca8e1d5ec8..c2f89db055 100644
|
||||
--- a/block/qcow.c
|
||||
+++ b/block/qcow.c
|
||||
@@ -211,7 +211,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
|
||||
}
|
||||
s->crypto = qcrypto_block_open(crypto_opts, "encrypt.",
|
||||
- NULL, NULL, cflags, 1, errp);
|
||||
+ NULL, NULL, cflags, errp);
|
||||
if (!s->crypto) {
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
diff --git a/block/qcow2.c b/block/qcow2.c
|
||||
index 0e8b2f7518..0ebd455dc8 100644
|
||||
--- a/block/qcow2.c
|
||||
+++ b/block/qcow2.c
|
||||
@@ -321,7 +321,7 @@ qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
|
||||
}
|
||||
s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
|
||||
qcow2_crypto_hdr_read_func,
|
||||
- bs, cflags, QCOW2_MAX_THREADS, errp);
|
||||
+ bs, cflags, errp);
|
||||
if (!s->crypto) {
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -1707,8 +1707,7 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
|
||||
}
|
||||
s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
|
||||
- NULL, NULL, cflags,
|
||||
- QCOW2_MAX_THREADS, errp);
|
||||
+ NULL, NULL, cflags, errp);
|
||||
if (!s->crypto) {
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
diff --git a/crypto/block-luks.c b/crypto/block-luks.c
|
||||
index 3357852c0a..5b777c15d3 100644
|
||||
--- a/crypto/block-luks.c
|
||||
+++ b/crypto/block-luks.c
|
||||
@@ -1189,7 +1189,6 @@ qcrypto_block_luks_open(QCryptoBlock *block,
|
||||
QCryptoBlockReadFunc readfunc,
|
||||
void *opaque,
|
||||
unsigned int flags,
|
||||
- size_t n_threads,
|
||||
Error **errp)
|
||||
{
|
||||
QCryptoBlockLUKS *luks = NULL;
|
||||
diff --git a/crypto/block-qcow.c b/crypto/block-qcow.c
|
||||
index 02305058e3..42e9556e42 100644
|
||||
--- a/crypto/block-qcow.c
|
||||
+++ b/crypto/block-qcow.c
|
||||
@@ -44,7 +44,6 @@ qcrypto_block_qcow_has_format(const uint8_t *buf G_GNUC_UNUSED,
|
||||
static int
|
||||
qcrypto_block_qcow_init(QCryptoBlock *block,
|
||||
const char *keysecret,
|
||||
- size_t n_threads,
|
||||
Error **errp)
|
||||
{
|
||||
char *password;
|
||||
@@ -100,7 +99,6 @@ qcrypto_block_qcow_open(QCryptoBlock *block,
|
||||
QCryptoBlockReadFunc readfunc G_GNUC_UNUSED,
|
||||
void *opaque G_GNUC_UNUSED,
|
||||
unsigned int flags,
|
||||
- size_t n_threads,
|
||||
Error **errp)
|
||||
{
|
||||
if (flags & QCRYPTO_BLOCK_OPEN_NO_IO) {
|
||||
@@ -115,7 +113,7 @@ qcrypto_block_qcow_open(QCryptoBlock *block,
|
||||
return -1;
|
||||
}
|
||||
return qcrypto_block_qcow_init(block, options->u.qcow.key_secret,
|
||||
- n_threads, errp);
|
||||
+ errp);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -135,7 +133,7 @@ qcrypto_block_qcow_create(QCryptoBlock *block,
|
||||
return -1;
|
||||
}
|
||||
/* QCow2 has no special header, since everything is hardwired */
|
||||
- return qcrypto_block_qcow_init(block, options->u.qcow.key_secret, 1, errp);
|
||||
+ return qcrypto_block_qcow_init(block, options->u.qcow.key_secret, errp);
|
||||
}
|
||||
|
||||
|
||||
diff --git a/crypto/block.c b/crypto/block.c
|
||||
index ba6d1cebc7..3bcc4270c3 100644
|
||||
--- a/crypto/block.c
|
||||
+++ b/crypto/block.c
|
||||
@@ -53,7 +53,6 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,
|
||||
QCryptoBlockReadFunc readfunc,
|
||||
void *opaque,
|
||||
unsigned int flags,
|
||||
- size_t n_threads,
|
||||
Error **errp)
|
||||
{
|
||||
QCryptoBlock *block = g_new0(QCryptoBlock, 1);
|
||||
@@ -73,7 +72,7 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,
|
||||
block->driver = qcrypto_block_drivers[options->format];
|
||||
|
||||
if (block->driver->open(block, options, optprefix,
|
||||
- readfunc, opaque, flags, n_threads, errp) < 0)
|
||||
+ readfunc, opaque, flags, errp) < 0)
|
||||
{
|
||||
g_free(block);
|
||||
return NULL;
|
||||
diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h
|
||||
index 4bf6043d5d..b8f77cb5eb 100644
|
||||
--- a/crypto/blockpriv.h
|
||||
+++ b/crypto/blockpriv.h
|
||||
@@ -59,7 +59,6 @@ struct QCryptoBlockDriver {
|
||||
QCryptoBlockReadFunc readfunc,
|
||||
void *opaque,
|
||||
unsigned int flags,
|
||||
- size_t n_threads,
|
||||
Error **errp);
|
||||
|
||||
int (*create)(QCryptoBlock *block,
|
||||
diff --git a/include/crypto/block.h b/include/crypto/block.h
|
||||
index 92e823c9f2..5b5d039800 100644
|
||||
--- a/include/crypto/block.h
|
||||
+++ b/include/crypto/block.h
|
||||
@@ -76,7 +76,6 @@ typedef enum {
|
||||
* @readfunc: callback for reading data from the volume
|
||||
* @opaque: data to pass to @readfunc
|
||||
* @flags: bitmask of QCryptoBlockOpenFlags values
|
||||
- * @n_threads: allow concurrent I/O from up to @n_threads threads
|
||||
* @errp: pointer to a NULL-initialized error object
|
||||
*
|
||||
* Create a new block encryption object for an existing
|
||||
@@ -113,7 +112,6 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,
|
||||
QCryptoBlockReadFunc readfunc,
|
||||
void *opaque,
|
||||
unsigned int flags,
|
||||
- size_t n_threads,
|
||||
Error **errp);
|
||||
|
||||
typedef enum {
|
||||
diff --git a/tests/unit/test-crypto-block.c b/tests/unit/test-crypto-block.c
|
||||
index 6cfc817a92..42cfab6067 100644
|
||||
--- a/tests/unit/test-crypto-block.c
|
||||
+++ b/tests/unit/test-crypto-block.c
|
||||
@@ -303,7 +303,6 @@ static void test_block(gconstpointer opaque)
|
||||
test_block_read_func,
|
||||
&header,
|
||||
0,
|
||||
- 1,
|
||||
NULL);
|
||||
g_assert(blk == NULL);
|
||||
|
||||
@@ -312,7 +311,6 @@ static void test_block(gconstpointer opaque)
|
||||
test_block_read_func,
|
||||
&header,
|
||||
QCRYPTO_BLOCK_OPEN_NO_IO,
|
||||
- 1,
|
||||
&error_abort);
|
||||
|
||||
g_assert(qcrypto_block_get_cipher(blk) == NULL);
|
||||
@@ -327,7 +325,6 @@ static void test_block(gconstpointer opaque)
|
||||
test_block_read_func,
|
||||
&header,
|
||||
0,
|
||||
- 1,
|
||||
&error_abort);
|
||||
g_assert(blk);
|
||||
|
||||
@@ -384,7 +381,6 @@ test_luks_bad_header(gconstpointer data)
|
||||
test_block_read_func,
|
||||
&buf,
|
||||
0,
|
||||
- 1,
|
||||
&err);
|
||||
g_assert(!blk);
|
||||
g_assert(err);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,153 +0,0 @@
|
||||
From 516bf44de08a13d97c08e210137078e642ce8e88 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Wed, 17 May 2023 17:28:32 +0200
|
||||
Subject: [PATCH 02/21] graph-lock: Disable locking for now
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 166: block/graph-lock: Disable locking for now
|
||||
RH-Bugzilla: 2186725
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [2/4] 39d42fb527aad0491a018743289de7b762108317 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
In QEMU 8.0, we've been seeing deadlocks in bdrv_graph_wrlock(). They
|
||||
come from callers that hold an AioContext lock, which is not allowed
|
||||
during polling. In theory, we could temporarily release the lock, but
|
||||
callers are inconsistent about whether they hold a lock, and if they do,
|
||||
some are also confused about which one they hold. While all of this is
|
||||
fixable, it's not trivial, and the best course of action for 8.0.1 is
|
||||
probably just disabling the graph locking code temporarily.
|
||||
|
||||
We don't currently rely on graph locking yet. It is supposed to replace
|
||||
the AioContext lock eventually to enable multiqueue support, but as long
|
||||
as we still have the AioContext lock, it is sufficient without the graph
|
||||
lock. Once the AioContext lock goes away, the deadlock doesn't exist any
|
||||
more either and this commit can be reverted. (Of course, it can also be
|
||||
reverted while the AioContext lock still exists if the callers have been
|
||||
fixed.)
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20230517152834.277483-2-kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 80fc5d260002432628710f8b0c7cfc7d9b97bb9d)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block/graph-lock.c | 24 ++++++++++++++++++++++++
|
||||
1 file changed, 24 insertions(+)
|
||||
|
||||
diff --git a/block/graph-lock.c b/block/graph-lock.c
|
||||
index 259a7a0bde..2490926c90 100644
|
||||
--- a/block/graph-lock.c
|
||||
+++ b/block/graph-lock.c
|
||||
@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock;
|
||||
/* Protects the list of aiocontext and orphaned_reader_count */
|
||||
static QemuMutex aio_context_list_lock;
|
||||
|
||||
+#if 0
|
||||
/* Written and read with atomic operations. */
|
||||
static int has_writer;
|
||||
+#endif
|
||||
|
||||
/*
|
||||
* A reader coroutine could move from an AioContext to another.
|
||||
@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx)
|
||||
g_free(ctx->bdrv_graph);
|
||||
}
|
||||
|
||||
+#if 0
|
||||
static uint32_t reader_count(void)
|
||||
{
|
||||
BdrvGraphRWlock *brdv_graph;
|
||||
@@ -105,10 +108,17 @@ static uint32_t reader_count(void)
|
||||
assert((int32_t)rd >= 0);
|
||||
return rd;
|
||||
}
|
||||
+#endif
|
||||
|
||||
void bdrv_graph_wrlock(void)
|
||||
{
|
||||
GLOBAL_STATE_CODE();
|
||||
+ /*
|
||||
+ * TODO Some callers hold an AioContext lock when this is called, which
|
||||
+ * causes deadlocks. Reenable once the AioContext locking is cleaned up (or
|
||||
+ * AioContext locks are gone).
|
||||
+ */
|
||||
+#if 0
|
||||
assert(!qatomic_read(&has_writer));
|
||||
|
||||
/* Make sure that constantly arriving new I/O doesn't cause starvation */
|
||||
@@ -139,11 +149,13 @@ void bdrv_graph_wrlock(void)
|
||||
} while (reader_count() >= 1);
|
||||
|
||||
bdrv_drain_all_end();
|
||||
+#endif
|
||||
}
|
||||
|
||||
void bdrv_graph_wrunlock(void)
|
||||
{
|
||||
GLOBAL_STATE_CODE();
|
||||
+#if 0
|
||||
QEMU_LOCK_GUARD(&aio_context_list_lock);
|
||||
assert(qatomic_read(&has_writer));
|
||||
|
||||
@@ -155,10 +167,13 @@ void bdrv_graph_wrunlock(void)
|
||||
|
||||
/* Wake up all coroutine that are waiting to read the graph */
|
||||
qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
void coroutine_fn bdrv_graph_co_rdlock(void)
|
||||
{
|
||||
+ /* TODO Reenable when wrlock is reenabled */
|
||||
+#if 0
|
||||
BdrvGraphRWlock *bdrv_graph;
|
||||
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
|
||||
|
||||
@@ -223,10 +238,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void)
|
||||
qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
|
||||
}
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
void coroutine_fn bdrv_graph_co_rdunlock(void)
|
||||
{
|
||||
+#if 0
|
||||
BdrvGraphRWlock *bdrv_graph;
|
||||
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
|
||||
|
||||
@@ -249,6 +266,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void)
|
||||
if (qatomic_read(&has_writer)) {
|
||||
aio_wait_kick();
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
void bdrv_graph_rdlock_main_loop(void)
|
||||
@@ -266,13 +284,19 @@ void bdrv_graph_rdunlock_main_loop(void)
|
||||
void assert_bdrv_graph_readable(void)
|
||||
{
|
||||
/* reader_count() is slow due to aio_context_list_lock lock contention */
|
||||
+ /* TODO Reenable when wrlock is reenabled */
|
||||
+#if 0
|
||||
#ifdef CONFIG_DEBUG_GRAPH_LOCK
|
||||
assert(qemu_in_main_thread() || reader_count());
|
||||
#endif
|
||||
+#endif
|
||||
}
|
||||
|
||||
void assert_bdrv_graph_writable(void)
|
||||
{
|
||||
assert(qemu_in_main_thread());
|
||||
+ /* TODO Reenable when wrlock is reenabled */
|
||||
+#if 0
|
||||
assert(qatomic_read(&has_writer));
|
||||
+#endif
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,40 +0,0 @@
|
||||
From b4645e7682aa1bde6f89df0eff2a9de83720eecc Mon Sep 17 00:00:00 2001
|
||||
From: Ani Sinha <anisinha@redhat.com>
|
||||
Date: Tue, 2 May 2023 15:51:53 +0530
|
||||
Subject: [PATCH 3/3] hw/acpi: Mark acpi blobs as resizable on RHEL pc machines
|
||||
version 7.6 and above
|
||||
|
||||
RH-Author: Ani Sinha <None>
|
||||
RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3
|
||||
RH-Bugzilla: 1934134
|
||||
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Commit: [2/2] 95d443af6e75c569d89d04d028012c3c56c0c3a4 (anisinha/centos-qemu-kvm)
|
||||
|
||||
Please look at QEMU upstream commit
|
||||
1af507756bae7 ("hw/acpi: limit warning on acpi table size to pc machines older than version 2.3")
|
||||
This patch adapts the above change so that it applies to RHEL pc machines of
|
||||
version 7.6 and newer. These are the machine types that are currently supported
|
||||
in RHEL. Q35 machines are not affected.
|
||||
|
||||
Signed-off-by: Ani Sinha <anisinha@redhat.com>
|
||||
---
|
||||
hw/i386/pc_piix.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index 4d5880e249..6c7be628e1 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -914,6 +914,7 @@ static void pc_machine_rhel7_options(MachineClass *m)
|
||||
m->default_machine_opts = "firmware=bios-256k.bin,hpet=off";
|
||||
pcmc->default_nic_model = "e1000";
|
||||
pcmc->pci_root_uid = 0;
|
||||
+ pcmc->resizable_acpi_blob = true;
|
||||
m->default_display = "std";
|
||||
m->no_parallel = 1;
|
||||
m->numa_mem_supported = true;
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,101 +0,0 @@
|
||||
From 3f70da88788c398877b8ded0b27689530385302b Mon Sep 17 00:00:00 2001
|
||||
From: Ani Sinha <anisinha@redhat.com>
|
||||
Date: Wed, 29 Mar 2023 10:27:26 +0530
|
||||
Subject: [PATCH 2/3] hw/acpi: limit warning on acpi table size to pc machines
|
||||
older than version 2.3
|
||||
|
||||
RH-Author: Ani Sinha <None>
|
||||
RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3
|
||||
RH-Bugzilla: 1934134
|
||||
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Commit: [1/2] 96c3b6d51e16734eb4e8de52635e0ca036964090 (anisinha/centos-qemu-kvm)
|
||||
|
||||
i440fx machine versions 2.3 and newer supports dynamic ram
|
||||
resizing. See commit a1666142db6233 ("acpi-build: make ROMs RAM blocks resizeable") .
|
||||
Currently supported all q35 machine types (versions 2.4 and newer) supports
|
||||
resizable RAM/ROM blocks.Therefore the warning generated when the ACPI table
|
||||
size exceeds a pre-defined value does not apply to those machine versions.
|
||||
Add a check limiting the warning message to only those machines that does not
|
||||
support expandable ram blocks (that is, i440fx machines with version 2.2
|
||||
and older).
|
||||
|
||||
Signed-off-by: Ani Sinha <anisinha@redhat.com>
|
||||
Message-Id: <20230329045726.14028-1-anisinha@redhat.com>
|
||||
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit 1af507756bae775028c27d30e602e2b9c72cd074)
|
||||
---
|
||||
hw/i386/acpi-build.c | 6 ++++--
|
||||
hw/i386/pc.c | 1 +
|
||||
hw/i386/pc_piix.c | 1 +
|
||||
include/hw/i386/pc.h | 3 +++
|
||||
4 files changed, 9 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
|
||||
index ec857a117e..9bc4d8a981 100644
|
||||
--- a/hw/i386/acpi-build.c
|
||||
+++ b/hw/i386/acpi-build.c
|
||||
@@ -2695,7 +2695,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
|
||||
int legacy_table_size =
|
||||
ROUND_UP(tables_blob->len - aml_len + legacy_aml_len,
|
||||
ACPI_BUILD_ALIGN_SIZE);
|
||||
- if (tables_blob->len > legacy_table_size) {
|
||||
+ if ((tables_blob->len > legacy_table_size) &&
|
||||
+ !pcmc->resizable_acpi_blob) {
|
||||
/* Should happen only with PCI bridges and -M pc-i440fx-2.0. */
|
||||
warn_report("ACPI table size %u exceeds %d bytes,"
|
||||
" migration may not work",
|
||||
@@ -2706,7 +2707,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
|
||||
g_array_set_size(tables_blob, legacy_table_size);
|
||||
} else {
|
||||
/* Make sure we have a buffer in case we need to resize the tables. */
|
||||
- if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) {
|
||||
+ if ((tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) &&
|
||||
+ !pcmc->resizable_acpi_blob) {
|
||||
/* As of QEMU 2.1, this fires with 160 VCPUs and 255 memory slots. */
|
||||
warn_report("ACPI table size %u exceeds %d bytes,"
|
||||
" migration may not work",
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index f216922cee..7db5a2348f 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -2092,6 +2092,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
|
||||
pcmc->acpi_data_size = 0x20000 + 0x8000;
|
||||
pcmc->pvh_enabled = true;
|
||||
pcmc->kvmclock_create_always = true;
|
||||
+ pcmc->resizable_acpi_blob = true;
|
||||
assert(!mc->get_hotplug_handler);
|
||||
mc->async_pf_vmexit_disable = false;
|
||||
mc->get_hotplug_handler = pc_get_hotplug_handler;
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index fc704d783f..4d5880e249 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -750,6 +750,7 @@ static void pc_i440fx_2_2_machine_options(MachineClass *m)
|
||||
compat_props_add(m->compat_props, hw_compat_2_2, hw_compat_2_2_len);
|
||||
compat_props_add(m->compat_props, pc_compat_2_2, pc_compat_2_2_len);
|
||||
pcmc->rsdp_in_ram = false;
|
||||
+ pcmc->resizable_acpi_blob = false;
|
||||
}
|
||||
|
||||
DEFINE_I440FX_MACHINE(v2_2, "pc-i440fx-2.2", pc_compat_2_2_fn,
|
||||
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
|
||||
index d218ad1628..2f514d13d8 100644
|
||||
--- a/include/hw/i386/pc.h
|
||||
+++ b/include/hw/i386/pc.h
|
||||
@@ -130,6 +130,9 @@ struct PCMachineClass {
|
||||
|
||||
/* create kvmclock device even when KVM PV features are not exposed */
|
||||
bool kvmclock_create_always;
|
||||
+
|
||||
+ /* resizable acpi blob compat */
|
||||
+ bool resizable_acpi_blob;
|
||||
};
|
||||
|
||||
#define TYPE_PC_MACHINE "generic-pc-machine"
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,60 +0,0 @@
|
||||
From 7b57aec372fc238cbaafe86557f9fb4b560895b1 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Tue, 27 Jun 2023 20:20:09 +1000
|
||||
Subject: [PATCH 2/6] hw/arm: Validate cluster and NUMA node boundary
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines
|
||||
RH-Bugzilla: 2171363
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [2/3] fcac7ea85d9f73613989903c642fc1bf6c51946b
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363
|
||||
|
||||
There are two ARM machines where NUMA is aware: 'virt' and 'sbsa-ref'.
|
||||
Both of them are required to follow cluster-NUMA-node boundary. To
|
||||
enable the validation to warn about the irregular configuration where
|
||||
multiple CPUs in one cluster have been associated with different NUMA
|
||||
nodes.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Message-Id: <20230509002739.18388-3-gshan@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit fecff672351ace5e39adf7dbcf7a8ee748b201cb)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/sbsa-ref.c | 2 ++
|
||||
hw/arm/virt.c | 2 ++
|
||||
2 files changed, 4 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
|
||||
index 0b93558dde..efb380e7c8 100644
|
||||
--- a/hw/arm/sbsa-ref.c
|
||||
+++ b/hw/arm/sbsa-ref.c
|
||||
@@ -864,6 +864,8 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data)
|
||||
mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids;
|
||||
mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props;
|
||||
mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id;
|
||||
+ /* platform instead of architectural choice */
|
||||
+ mc->cpu_cluster_has_numa_boundary = true;
|
||||
}
|
||||
|
||||
static const TypeInfo sbsa_ref_info = {
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 9be53e9355..df6a0231bc 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3083,6 +3083,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
|
||||
mc->smp_props.clusters_supported = true;
|
||||
mc->auto_enable_numa_with_memhp = true;
|
||||
mc->auto_enable_numa_with_memdev = true;
|
||||
+ /* platform instead of architectural choice */
|
||||
+ mc->cpu_cluster_has_numa_boundary = true;
|
||||
mc->default_ram_id = "mach-virt.ram";
|
||||
|
||||
object_class_property_add(oc, "acpi", "OnOffAuto",
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,166 +0,0 @@
|
||||
From a3412036477e8c91e0b71fcd91de4e24a9904077 Mon Sep 17 00:00:00 2001
|
||||
From: Peter Maydell <peter.maydell@linaro.org>
|
||||
Date: Tue, 25 Jul 2023 10:56:51 +0100
|
||||
Subject: [PATCH 09/14] hw/arm/smmu: Handle big-endian hosts correctly
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes
|
||||
RH-Bugzilla: 2229133
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
RH-Commit: [3/3] df9c8d228b25273e0c4927a10b21e66fb4bef5f0 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133
|
||||
|
||||
The implementation of the SMMUv3 has multiple places where it reads a
|
||||
data structure from the guest and directly operates on it without
|
||||
doing a guest-to-host endianness conversion. Since all SMMU data
|
||||
structures are little-endian, this means that the SMMU doesn't work
|
||||
on a big-endian host. In particular, this causes the Avocado test
|
||||
machine_aarch64_virt.py:Aarch64VirtMachine.test_alpine_virt_tcg_gic_max
|
||||
to fail on an s390x host.
|
||||
|
||||
Add appropriate byte-swapping on reads and writes of guest in-memory
|
||||
data structures so that the device works correctly on big-endian
|
||||
hosts.
|
||||
|
||||
As part of this we constrain queue_read() to operate only on Cmd
|
||||
structs and queue_write() on Evt structs, because in practice these
|
||||
are the only data structures the two functions are used with, and we
|
||||
need to know what the data structure is to be able to byte-swap its
|
||||
parts correctly.
|
||||
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Tested-by: Thomas Huth <thuth@redhat.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Message-id: 20230717132641.764660-1-peter.maydell@linaro.org
|
||||
Cc: qemu-stable@nongnu.org
|
||||
(cherry picked from commit c6445544d4cea2628fbad3bad09f3d3a03c749d3)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/arm/smmu-common.c | 3 +--
|
||||
hw/arm/smmuv3.c | 39 +++++++++++++++++++++++++++++++--------
|
||||
2 files changed, 32 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
|
||||
index e7f1c1f219..daa02ce798 100644
|
||||
--- a/hw/arm/smmu-common.c
|
||||
+++ b/hw/arm/smmu-common.c
|
||||
@@ -192,8 +192,7 @@ static int get_pte(dma_addr_t baseaddr, uint32_t index, uint64_t *pte,
|
||||
dma_addr_t addr = baseaddr + index * sizeof(*pte);
|
||||
|
||||
/* TODO: guarantee 64-bit single-copy atomicity */
|
||||
- ret = dma_memory_read(&address_space_memory, addr, pte, sizeof(*pte),
|
||||
- MEMTXATTRS_UNSPECIFIED);
|
||||
+ ret = ldq_le_dma(&address_space_memory, addr, pte, MEMTXATTRS_UNSPECIFIED);
|
||||
|
||||
if (ret != MEMTX_OK) {
|
||||
info->type = SMMU_PTW_ERR_WALK_EABT;
|
||||
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
|
||||
index 270c80b665..cfb56725a6 100644
|
||||
--- a/hw/arm/smmuv3.c
|
||||
+++ b/hw/arm/smmuv3.c
|
||||
@@ -98,20 +98,34 @@ static void smmuv3_write_gerrorn(SMMUv3State *s, uint32_t new_gerrorn)
|
||||
trace_smmuv3_write_gerrorn(toggled & pending, s->gerrorn);
|
||||
}
|
||||
|
||||
-static inline MemTxResult queue_read(SMMUQueue *q, void *data)
|
||||
+static inline MemTxResult queue_read(SMMUQueue *q, Cmd *cmd)
|
||||
{
|
||||
dma_addr_t addr = Q_CONS_ENTRY(q);
|
||||
+ MemTxResult ret;
|
||||
+ int i;
|
||||
|
||||
- return dma_memory_read(&address_space_memory, addr, data, q->entry_size,
|
||||
- MEMTXATTRS_UNSPECIFIED);
|
||||
+ ret = dma_memory_read(&address_space_memory, addr, cmd, sizeof(Cmd),
|
||||
+ MEMTXATTRS_UNSPECIFIED);
|
||||
+ if (ret != MEMTX_OK) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+ for (i = 0; i < ARRAY_SIZE(cmd->word); i++) {
|
||||
+ le32_to_cpus(&cmd->word[i]);
|
||||
+ }
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
-static MemTxResult queue_write(SMMUQueue *q, void *data)
|
||||
+static MemTxResult queue_write(SMMUQueue *q, Evt *evt_in)
|
||||
{
|
||||
dma_addr_t addr = Q_PROD_ENTRY(q);
|
||||
MemTxResult ret;
|
||||
+ Evt evt = *evt_in;
|
||||
+ int i;
|
||||
|
||||
- ret = dma_memory_write(&address_space_memory, addr, data, q->entry_size,
|
||||
+ for (i = 0; i < ARRAY_SIZE(evt.word); i++) {
|
||||
+ cpu_to_le32s(&evt.word[i]);
|
||||
+ }
|
||||
+ ret = dma_memory_write(&address_space_memory, addr, &evt, sizeof(Evt),
|
||||
MEMTXATTRS_UNSPECIFIED);
|
||||
if (ret != MEMTX_OK) {
|
||||
return ret;
|
||||
@@ -291,7 +305,7 @@ static void smmuv3_init_regs(SMMUv3State *s)
|
||||
static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf,
|
||||
SMMUEventInfo *event)
|
||||
{
|
||||
- int ret;
|
||||
+ int ret, i;
|
||||
|
||||
trace_smmuv3_get_ste(addr);
|
||||
/* TODO: guarantee 64-bit single-copy atomicity */
|
||||
@@ -304,6 +318,9 @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf,
|
||||
event->u.f_ste_fetch.addr = addr;
|
||||
return -EINVAL;
|
||||
}
|
||||
+ for (i = 0; i < ARRAY_SIZE(buf->word); i++) {
|
||||
+ le32_to_cpus(&buf->word[i]);
|
||||
+ }
|
||||
return 0;
|
||||
|
||||
}
|
||||
@@ -313,7 +330,7 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
|
||||
CD *buf, SMMUEventInfo *event)
|
||||
{
|
||||
dma_addr_t addr = STE_CTXPTR(ste);
|
||||
- int ret;
|
||||
+ int ret, i;
|
||||
|
||||
trace_smmuv3_get_cd(addr);
|
||||
/* TODO: guarantee 64-bit single-copy atomicity */
|
||||
@@ -326,6 +343,9 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
|
||||
event->u.f_ste_fetch.addr = addr;
|
||||
return -EINVAL;
|
||||
}
|
||||
+ for (i = 0; i < ARRAY_SIZE(buf->word); i++) {
|
||||
+ le32_to_cpus(&buf->word[i]);
|
||||
+ }
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -407,7 +427,7 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
|
||||
return -EINVAL;
|
||||
}
|
||||
if (s->features & SMMU_FEATURE_2LVL_STE) {
|
||||
- int l1_ste_offset, l2_ste_offset, max_l2_ste, span;
|
||||
+ int l1_ste_offset, l2_ste_offset, max_l2_ste, span, i;
|
||||
dma_addr_t l1ptr, l2ptr;
|
||||
STEDesc l1std;
|
||||
|
||||
@@ -431,6 +451,9 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
|
||||
event->u.f_ste_fetch.addr = l1ptr;
|
||||
return -EINVAL;
|
||||
}
|
||||
+ for (i = 0; i < ARRAY_SIZE(l1std.word); i++) {
|
||||
+ le32_to_cpus(&l1std.word[i]);
|
||||
+ }
|
||||
|
||||
span = L1STD_SPAN(&l1std);
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,120 @@
|
||||
From 41c4083269ec772b406c6c57b496ca2011f928c7 Mon Sep 17 00:00:00 2001
|
||||
From: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
Date: Tue, 9 Jul 2024 23:08:59 -0400
|
||||
Subject: [PATCH 2/2] hw/arm/virt: Avoid unexpected warning from Linux guest on
|
||||
host with Fujitsu CPUs
|
||||
|
||||
RH-Author: zhenyzha <None>
|
||||
RH-MergeRequest: 256: hw/arm/virt: Avoid unexpected warning from Linux guest on host with Fujitsu CPUs
|
||||
RH-Jira: RHEL-39936
|
||||
RH-Acked-by: Gavin Shan <gshan@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Commit: [1/1] fdf156fd05b219a06e2e2ca409fff0f728c1e2cf (zhenyzha/qemu-kvm)
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-39936
|
||||
|
||||
Multiple warning messages and corresponding backtraces are observed when Linux
|
||||
guest is booted on the host with Fujitsu CPUs. One of them is shown as below.
|
||||
|
||||
[ 0.032443] ------------[ cut here ]------------
|
||||
[ 0.032446] uart-pl011 9000000.pl011: ARCH_DMA_MINALIGN smaller than
|
||||
CTR_EL0.CWG (128 < 256)
|
||||
[ 0.032454] WARNING: CPU: 0 PID: 1 at arch/arm64/mm/dma-mapping.c:54
|
||||
arch_setup_dma_ops+0xbc/0xcc
|
||||
[ 0.032470] Modules linked in:
|
||||
[ 0.032475] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-452.el9.aarch64
|
||||
[ 0.032481] Hardware name: linux,dummy-virt (DT)
|
||||
[ 0.032484] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
|
||||
[ 0.032490] pc : arch_setup_dma_ops+0xbc/0xcc
|
||||
[ 0.032496] lr : arch_setup_dma_ops+0xbc/0xcc
|
||||
[ 0.032501] sp : ffff80008003b860
|
||||
[ 0.032503] x29: ffff80008003b860 x28: 0000000000000000 x27: ffffaae4b949049c
|
||||
[ 0.032510] x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000
|
||||
[ 0.032517] x23: 0000000000000100 x22: 0000000000000000 x21: 0000000000000000
|
||||
[ 0.032523] x20: 0000000100000000 x19: ffff2f06c02ea400 x18: ffffffffffffffff
|
||||
[ 0.032529] x17: 00000000208a5f76 x16: 000000006589dbcb x15: ffffaae4ba071c89
|
||||
[ 0.032535] x14: 0000000000000000 x13: ffffaae4ba071c84 x12: 455f525443206e61
|
||||
[ 0.032541] x11: 68742072656c6c61 x10: 0000000000000029 x9 : ffffaae4b7d21da4
|
||||
[ 0.032547] x8 : 0000000000000029 x7 : 4c414e494d5f414d x6 : 0000000000000029
|
||||
[ 0.032553] x5 : 000000000000000f x4 : ffffaae4b9617a00 x3 : 0000000000000001
|
||||
[ 0.032558] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff2f06c029be40
|
||||
[ 0.032564] Call trace:
|
||||
[ 0.032566] arch_setup_dma_ops+0xbc/0xcc
|
||||
[ 0.032572] of_dma_configure_id+0x138/0x300
|
||||
[ 0.032591] amba_dma_configure+0x34/0xc0
|
||||
[ 0.032600] really_probe+0x78/0x3dc
|
||||
[ 0.032614] __driver_probe_device+0x108/0x160
|
||||
[ 0.032619] driver_probe_device+0x44/0x114
|
||||
[ 0.032624] __device_attach_driver+0xb8/0x14c
|
||||
[ 0.032629] bus_for_each_drv+0x88/0xe4
|
||||
[ 0.032634] __device_attach+0xb0/0x1e0
|
||||
[ 0.032638] device_initial_probe+0x18/0x20
|
||||
[ 0.032643] bus_probe_device+0xa8/0xb0
|
||||
[ 0.032648] device_add+0x4b4/0x6c0
|
||||
[ 0.032652] amba_device_try_add.part.0+0x48/0x360
|
||||
[ 0.032657] amba_device_add+0x104/0x144
|
||||
[ 0.032662] of_amba_device_create.isra.0+0x100/0x1c4
|
||||
[ 0.032666] of_platform_bus_create+0x294/0x35c
|
||||
[ 0.032669] of_platform_populate+0x5c/0x150
|
||||
[ 0.032672] of_platform_default_populate_init+0xd0/0xec
|
||||
[ 0.032697] do_one_initcall+0x4c/0x2e0
|
||||
[ 0.032701] do_initcalls+0x100/0x13c
|
||||
[ 0.032707] kernel_init_freeable+0x1c8/0x21c
|
||||
[ 0.032712] kernel_init+0x28/0x140
|
||||
[ 0.032731] ret_from_fork+0x10/0x20
|
||||
[ 0.032735] ---[ end trace 0000000000000000 ]---
|
||||
|
||||
In Linux, a check is applied to every device which is exposed through
|
||||
device-tree node. The warning message is raised when the device isn't
|
||||
DMA coherent and the cache line size is larger than ARCH_DMA_MINALIGN
|
||||
(128 bytes). The cache line is sorted from CTR_EL0[CWG], which corresponds
|
||||
to 256 bytes on the guest CPUs. The DMA coherent capability is claimed
|
||||
through 'dma-coherent' in their device-tree nodes or parent nodes.
|
||||
This happens even when the device doesn't implement or use DMA at all,
|
||||
for legacy reasons.
|
||||
|
||||
Fix the issue by adding 'dma-coherent' property to the device-tree root
|
||||
node, meaning all devices are capable of DMA coherent by default.
|
||||
This both suppresses the spurious kernel warnings and also guards
|
||||
against possible future QEMU bugs where we add a DMA-capable device
|
||||
and forget to mark it as dma-coherent.
|
||||
|
||||
Signed-off-by: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
Reviewed-by: Gavin Shan <gshan@redhat.com>
|
||||
Reviewed-by: Donald Dutile <ddutile@redhat.com
|
||||
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
|
||||
Message-id: 20240612020506.307793-1-zhenyzha@redhat.com
|
||||
[PMM: tweaked commit message]
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit dda533087ad5559674ff486e7031c88dc01e0abd)
|
||||
Signed-off-by: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 11 +++++++++++
|
||||
1 file changed, 11 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 3f0496cdb9..6ece67f11d 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -330,6 +330,17 @@ static void create_fdt(VirtMachineState *vms)
|
||||
qemu_fdt_setprop_cell(fdt, "/", "#size-cells", 0x2);
|
||||
qemu_fdt_setprop_string(fdt, "/", "model", "linux,dummy-virt");
|
||||
|
||||
+ /*
|
||||
+ * For QEMU, all DMA is coherent. Advertising this in the root node
|
||||
+ * has two benefits:
|
||||
+ *
|
||||
+ * - It avoids potential bugs where we forget to mark a DMA
|
||||
+ * capable device as being dma-coherent
|
||||
+ * - It avoids spurious warnings from the Linux kernel about
|
||||
+ * devices which can't do DMA at all
|
||||
+ */
|
||||
+ qemu_fdt_setprop(fdt, "/", "dma-coherent", NULL, 0);
|
||||
+
|
||||
/* /chosen must exist for load_dtb to fill in necessary properties later */
|
||||
qemu_fdt_add_subnode(fdt, "/chosen");
|
||||
if (vms->dtb_randomness) {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,59 @@
|
||||
From e3360c415f7de923d27c3167260a93cb679afabe Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Mon, 6 May 2024 15:09:43 +0200
|
||||
Subject: [PATCH 1/2] hw/arm/virt: Fix spurious call to arm_virt_compat_set()
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 238: hw/arm/virt: Fix spurious call to arm_virt_compat_set()
|
||||
RH-Jira: RHEL-34945
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Gavin Shan <gshan@redhat.com>
|
||||
RH-Commit: [1/1] a858a3e1dff12b28e14f7e4bd2b896a9f06eacbb (eauger1/centos-qemu-kvm)
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-34945
|
||||
Status: RHEL-only
|
||||
|
||||
Downstream, we apply arm_rhel_compat in place of arm_virt_compat.
|
||||
This is done though arm_rhel_compat_set() transparently called in
|
||||
DEFINE_RHEL_MACHINE_LATEST(). So there is no need to call
|
||||
arm_virt_compat_set() in rhel_machine_class_init(). Besides
|
||||
this triggers a "GLib: g_ptr_array_add: assertion 'rarray' failed"
|
||||
warning.
|
||||
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 3 +--
|
||||
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index f1af9495c6..3f0496cdb9 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -85,6 +85,7 @@
|
||||
#include "hw/char/pl011.h"
|
||||
#include "qemu/guest-random.h"
|
||||
|
||||
+#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
static GlobalProperty arm_virt_compat[] = {
|
||||
{ TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "48" },
|
||||
};
|
||||
@@ -101,7 +102,6 @@ static void arm_virt_compat_set(MachineClass *mc)
|
||||
arm_virt_compat_len);
|
||||
}
|
||||
|
||||
-#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
#define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
|
||||
static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
|
||||
void *data) \
|
||||
@@ -3536,7 +3536,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
MachineClass *mc = MACHINE_CLASS(oc);
|
||||
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
|
||||
- arm_virt_compat_set(mc);
|
||||
|
||||
mc->family = "virt-rhel-Z";
|
||||
mc->init = machvirt_init;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,41 +0,0 @@
|
||||
From 022529f6d0ee306da857825c72a98bf7ddf5de22 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Tue, 27 Jun 2023 20:20:09 +1000
|
||||
Subject: [PATCH 3/6] hw/arm/virt: Validate cluster and NUMA node boundary for
|
||||
RHEL machines
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines
|
||||
RH-Bugzilla: 2171363
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [3/3] a396c499259b566861ca007b01f8539bf6113711
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363
|
||||
Upstream Status: RHEL only
|
||||
|
||||
Set mc->cpu_cluster_has_numa_boundary to true so that the boundary of
|
||||
CPU cluster and NUMA node will be validated for 'virt-rhel*' machines.
|
||||
A warning message will be printed if the boundary is broken.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index df6a0231bc..faf68488d5 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3530,6 +3530,8 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data)
|
||||
mc->smp_props.clusters_supported = true;
|
||||
mc->auto_enable_numa_with_memhp = true;
|
||||
mc->auto_enable_numa_with_memdev = true;
|
||||
+ /* platform instead of architectural choice */
|
||||
+ mc->cpu_cluster_has_numa_boundary = true;
|
||||
mc->default_ram_id = "mach-virt.ram";
|
||||
|
||||
object_class_property_add(oc, "acpi", "OnOffAuto",
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,73 @@
|
||||
From e74980be81d641736ea9d44d0fe9af02af63a220 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:40 -0500
|
||||
Subject: [PATCH 083/100] hw/i386: Add support for loading BIOS using
|
||||
guest_memfd
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [83/91] 7b77d212ef7d83b66ad9d8348179ee84e64fb911 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
When guest_memfd is enabled, the BIOS is generally part of the initial
|
||||
encrypted guest image and will be accessed as private guest memory. Add
|
||||
the necessary changes to set up the associated RAM region with a
|
||||
guest_memfd backend to allow for this.
|
||||
|
||||
Current support centers around using -bios to load the BIOS data.
|
||||
Support for loading the BIOS via pflash requires additional enablement
|
||||
since those interfaces rely on the use of ROM memory regions which make
|
||||
use of the KVM_MEM_READONLY memslot flag, which is not supported for
|
||||
guest_memfd-backed memslots.
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-29-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit fc7a69e177e4ba26d11fcf47b853f85115b35a11)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/x86-common.c | 17 ++++++++++++-----
|
||||
1 file changed, 12 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c
|
||||
index 35fe6eabea..6cbb76c25c 100644
|
||||
--- a/hw/i386/x86-common.c
|
||||
+++ b/hw/i386/x86-common.c
|
||||
@@ -969,8 +969,13 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
(bios_size % 65536) != 0) {
|
||||
goto bios_error;
|
||||
}
|
||||
- memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size,
|
||||
- &error_fatal);
|
||||
+ if (machine_require_guest_memfd(MACHINE(x86ms))) {
|
||||
+ memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios",
|
||||
+ bios_size, &error_fatal);
|
||||
+ } else {
|
||||
+ memory_region_init_ram(&x86ms->bios, NULL, "pc.bios",
|
||||
+ bios_size, &error_fatal);
|
||||
+ }
|
||||
if (sev_enabled()) {
|
||||
/*
|
||||
* The concept of a "reset" simply doesn't exist for
|
||||
@@ -991,9 +996,11 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
}
|
||||
g_free(filename);
|
||||
|
||||
- /* map the last 128KB of the BIOS in ISA space */
|
||||
- x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios,
|
||||
- !isapc_ram_fw);
|
||||
+ if (!machine_require_guest_memfd(MACHINE(x86ms))) {
|
||||
+ /* map the last 128KB of the BIOS in ISA space */
|
||||
+ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios,
|
||||
+ !isapc_ram_fw);
|
||||
+ }
|
||||
|
||||
/* map all the bios at the top of memory */
|
||||
memory_region_add_subregion(rom_memory,
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,106 @@
|
||||
From c1e615d6b8f609b72a94ffe6d31a9848a41744ef Mon Sep 17 00:00:00 2001
|
||||
From: Bernhard Beschow <shentey@gmail.com>
|
||||
Date: Tue, 30 Apr 2024 17:06:39 +0200
|
||||
Subject: [PATCH 038/100] hw/i386: Have x86_bios_rom_init() take
|
||||
X86MachineState rather than MachineState
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [38/91] 59f388b1dffc5d0aa2f0fff768194d755bc3efbb (bonzini/rhel-qemu-kvm)
|
||||
|
||||
The function creates and leaks two MemoryRegion objects regarding the BIOS which
|
||||
will be moved into X86MachineState in the next steps to avoid the leakage.
|
||||
|
||||
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Message-ID: <20240430150643.111976-3-shentey@gmail.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
(cherry picked from commit 848351840148f8c3b53ddf6210194506547d3ffd)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/microvm.c | 2 +-
|
||||
hw/i386/pc_sysfw.c | 4 ++--
|
||||
hw/i386/x86.c | 4 ++--
|
||||
include/hw/i386/x86.h | 2 +-
|
||||
4 files changed, 6 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
|
||||
index 61a772dfe6..fec63cacfa 100644
|
||||
--- a/hw/i386/microvm.c
|
||||
+++ b/hw/i386/microvm.c
|
||||
@@ -278,7 +278,7 @@ static void microvm_devices_init(MicrovmMachineState *mms)
|
||||
default_firmware = x86_machine_is_acpi_enabled(x86ms)
|
||||
? MICROVM_BIOS_FILENAME
|
||||
: MICROVM_QBOOT_FILENAME;
|
||||
- x86_bios_rom_init(MACHINE(mms), default_firmware, get_system_memory(), true);
|
||||
+ x86_bios_rom_init(x86ms, default_firmware, get_system_memory(), true);
|
||||
}
|
||||
|
||||
static void microvm_memory_init(MicrovmMachineState *mms)
|
||||
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
|
||||
index 3efabbbab2..ef7dea9798 100644
|
||||
--- a/hw/i386/pc_sysfw.c
|
||||
+++ b/hw/i386/pc_sysfw.c
|
||||
@@ -206,7 +206,7 @@ void pc_system_firmware_init(PCMachineState *pcms,
|
||||
BlockBackend *pflash_blk[ARRAY_SIZE(pcms->flash)];
|
||||
|
||||
if (!pcmc->pci_enabled) {
|
||||
- x86_bios_rom_init(MACHINE(pcms), "bios.bin", rom_memory, true);
|
||||
+ x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, true);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -227,7 +227,7 @@ void pc_system_firmware_init(PCMachineState *pcms,
|
||||
|
||||
if (!pflash_blk[0]) {
|
||||
/* Machine property pflash0 not set, use ROM mode */
|
||||
- x86_bios_rom_init(MACHINE(pcms), "bios.bin", rom_memory, false);
|
||||
+ x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, false);
|
||||
} else {
|
||||
if (kvm_enabled() && !kvm_readonly_mem_enabled()) {
|
||||
/*
|
||||
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
|
||||
index 2a4f3ee285..6d3c72f124 100644
|
||||
--- a/hw/i386/x86.c
|
||||
+++ b/hw/i386/x86.c
|
||||
@@ -1128,7 +1128,7 @@ void x86_load_linux(X86MachineState *x86ms,
|
||||
nb_option_roms++;
|
||||
}
|
||||
|
||||
-void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
|
||||
+void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
MemoryRegion *rom_memory, bool isapc_ram_fw)
|
||||
{
|
||||
const char *bios_name;
|
||||
@@ -1138,7 +1138,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
|
||||
ssize_t ret;
|
||||
|
||||
/* BIOS load */
|
||||
- bios_name = ms->firmware ?: default_firmware;
|
||||
+ bios_name = MACHINE(x86ms)->firmware ?: default_firmware;
|
||||
filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
|
||||
if (filename) {
|
||||
bios_size = get_image_size(filename);
|
||||
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
|
||||
index 4dc30dcb4d..cb07618d19 100644
|
||||
--- a/include/hw/i386/x86.h
|
||||
+++ b/include/hw/i386/x86.h
|
||||
@@ -116,7 +116,7 @@ void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev,
|
||||
void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev,
|
||||
DeviceState *dev, Error **errp);
|
||||
|
||||
-void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
|
||||
+void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
MemoryRegion *rom_memory, bool isapc_ram_fw);
|
||||
|
||||
void x86_load_linux(X86MachineState *x86ms,
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,51 @@
|
||||
From 7bb1f124413891bc5d2187f12cd19da6e794904b Mon Sep 17 00:00:00 2001
|
||||
From: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Date: Wed, 3 Apr 2024 10:59:53 -0400
|
||||
Subject: [PATCH 010/100] hw/i386/acpi: Set PCAT_COMPAT bit only when pic is
|
||||
not disabled
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [10/91] 62110e4bf52cb3e106c8d2a902bbd31548beba00 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
A value 1 of PCAT_COMPAT (bit 0) of MADT.Flags indicates that the system
|
||||
also has a PC-AT-compatible dual-8259 setup, i.e., the PIC. When PIC
|
||||
is not enabled (pic=off) for x86 machine, the PCAT_COMPAT bit needs to
|
||||
be cleared. The PIC probe should then print:
|
||||
|
||||
[ 0.155970] Using NULL legacy PIC
|
||||
|
||||
However, no such log printed in guest kernel unless PCAT_COMPAT is
|
||||
cleared.
|
||||
|
||||
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Message-ID: <20240403145953.3082491-1-xiaoyao.li@intel.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 292dd287e78e0cbafde9d1522c729349d132d844)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/acpi-common.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/i386/acpi-common.c b/hw/i386/acpi-common.c
|
||||
index 20f19269da..0cc2919bb8 100644
|
||||
--- a/hw/i386/acpi-common.c
|
||||
+++ b/hw/i386/acpi-common.c
|
||||
@@ -107,7 +107,9 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker,
|
||||
acpi_table_begin(&table, table_data);
|
||||
/* Local APIC Address */
|
||||
build_append_int_noprefix(table_data, APIC_DEFAULT_ADDRESS, 4);
|
||||
- build_append_int_noprefix(table_data, 1 /* PCAT_COMPAT */, 4); /* Flags */
|
||||
+ /* Flags. bit 0: PCAT_COMPAT */
|
||||
+ build_append_int_noprefix(table_data,
|
||||
+ x86ms->pic != ON_OFF_AUTO_OFF ? 1 : 0 , 4);
|
||||
|
||||
for (i = 0; i < apic_ids->len; i++) {
|
||||
pc_madt_cpu_entry(i, apic_ids, table_data, false);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,164 @@
|
||||
From fd6de3c5e97bdf13a39342fc71815a20c66867ae Mon Sep 17 00:00:00 2001
|
||||
From: Bernhard Beschow <shentey@gmail.com>
|
||||
Date: Wed, 8 May 2024 19:55:07 +0200
|
||||
Subject: [PATCH 043/100] hw/i386/pc_sysfw: Alias rather than copy isa-bios
|
||||
region
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [43/91] f64dab2a091838a10a9b94e3d09ea11432b0809f (bonzini/rhel-qemu-kvm)
|
||||
|
||||
In the -bios case the "isa-bios" memory region is an alias to the BIOS mapped
|
||||
to the top of the 4G memory boundary. Do the same in the -pflash case, but only
|
||||
for new machine versions for migration compatibility. This establishes common
|
||||
behavior and makes pflash commands work in the "isa-bios" region which some
|
||||
real-world legacy bioses rely on.
|
||||
|
||||
Note that in the sev_enabled() case, the "isa-bios" memory region in the -pflash
|
||||
case will now also point to encrypted memory, just like it already does in the
|
||||
-bios case.
|
||||
|
||||
When running `info mtree` before and after this commit with
|
||||
`qemu-system-x86_64 -S -drive \
|
||||
if=pflash,format=raw,readonly=on,file=/usr/share/qemu/bios-256k.bin` and running
|
||||
`diff -u before.mtree after.mtree` results in the following changes in the
|
||||
memory tree:
|
||||
|
||||
| --- before.mtree
|
||||
| +++ after.mtree
|
||||
| @@ -71,7 +71,7 @@
|
||||
| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci
|
||||
| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem
|
||||
| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom
|
||||
| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios
|
||||
| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff
|
||||
| 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff
|
||||
| 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff
|
||||
| 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff
|
||||
| @@ -108,7 +108,7 @@
|
||||
| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci
|
||||
| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem
|
||||
| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom
|
||||
| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios
|
||||
| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff
|
||||
| 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff
|
||||
| 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff
|
||||
| 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff
|
||||
| @@ -131,11 +131,14 @@
|
||||
| memory-region: pc.ram
|
||||
| 0000000000000000-0000000007ffffff (prio 0, ram): pc.ram
|
||||
|
|
||||
| +memory-region: system.flash0
|
||||
| + 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0
|
||||
| +
|
||||
| memory-region: pci
|
||||
| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci
|
||||
| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem
|
||||
| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom
|
||||
| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios
|
||||
| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff
|
||||
|
|
||||
| memory-region: smram
|
||||
| 00000000000a0000-00000000000bffff (prio 0, ram): alias smram-low @pc.ram 00000000000a0000-00000000000bffff
|
||||
|
||||
Note that in both cases the "system" memory region contains the entry
|
||||
|
||||
00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0
|
||||
|
||||
but the "system.flash0" memory region only appears standalone when "isa-bios" is
|
||||
an alias.
|
||||
|
||||
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
|
||||
Message-ID: <20240508175507.22270-7-shentey@gmail.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit a44ea3fa7f2aa1d809fdca1b84a52695b53d8ad0)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/pc.c | 1 +
|
||||
hw/i386/pc_piix.c | 1 +
|
||||
hw/i386/pc_q35.c | 1 +
|
||||
hw/i386/pc_sysfw.c | 8 +++++++-
|
||||
include/hw/i386/pc.h | 1 +
|
||||
5 files changed, 11 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index 1a34bc4522..660a59c63b 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -1967,6 +1967,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
|
||||
pcmc->has_reserved_memory = true;
|
||||
pcmc->enforce_aligned_dimm = true;
|
||||
pcmc->enforce_amd_1tb_hole = true;
|
||||
+ pcmc->isa_bios_alias = true;
|
||||
/* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported
|
||||
* to be used at the moment, 32K should be enough for a while. */
|
||||
pcmc->acpi_data_size = 0x20000 + 0x8000;
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index bef3e8b73e..dbb7f2ed17 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -975,6 +975,7 @@ static void pc_machine_rhel7_options(MachineClass *m)
|
||||
m->alias = "pc";
|
||||
m->is_default = 1;
|
||||
m->smp_props.prefer_sockets = true;
|
||||
+ pcmc->isa_bios_alias = false;
|
||||
}
|
||||
|
||||
static void pc_init_rhel760(MachineState *machine)
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index dedc86eec9..f9900ad798 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -735,6 +735,7 @@ static void pc_q35_machine_rhel940_options(MachineClass *m)
|
||||
m->desc = "RHEL-9.4.0 PC (Q35 + ICH9, 2009)";
|
||||
pcmc->smbios_stream_product = "RHEL";
|
||||
pcmc->smbios_stream_version = "9.4.0";
|
||||
+ pcmc->isa_bios_alias = false;
|
||||
|
||||
compat_props_add(m->compat_props, pc_rhel_9_5_compat,
|
||||
pc_rhel_9_5_compat_len);
|
||||
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
|
||||
index 82d37cb376..ac88ad4eb9 100644
|
||||
--- a/hw/i386/pc_sysfw.c
|
||||
+++ b/hw/i386/pc_sysfw.c
|
||||
@@ -135,6 +135,7 @@ static void pc_system_flash_map(PCMachineState *pcms,
|
||||
MemoryRegion *rom_memory)
|
||||
{
|
||||
X86MachineState *x86ms = X86_MACHINE(pcms);
|
||||
+ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
|
||||
hwaddr total_size = 0;
|
||||
int i;
|
||||
BlockBackend *blk;
|
||||
@@ -184,7 +185,12 @@ static void pc_system_flash_map(PCMachineState *pcms,
|
||||
|
||||
if (i == 0) {
|
||||
flash_mem = pflash_cfi01_get_memory(system_flash);
|
||||
- pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem);
|
||||
+ if (pcmc->isa_bios_alias) {
|
||||
+ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem,
|
||||
+ true);
|
||||
+ } else {
|
||||
+ pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem);
|
||||
+ }
|
||||
|
||||
/* Encrypt the pflash boot ROM */
|
||||
if (sev_enabled()) {
|
||||
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
|
||||
index 467e7fb52f..3f53ec73ac 100644
|
||||
--- a/include/hw/i386/pc.h
|
||||
+++ b/include/hw/i386/pc.h
|
||||
@@ -122,6 +122,7 @@ struct PCMachineClass {
|
||||
bool enforce_aligned_dimm;
|
||||
bool broken_reserved_end;
|
||||
bool enforce_amd_1tb_hole;
|
||||
+ bool isa_bios_alias;
|
||||
|
||||
/* generate legacy CPU hotplug AML */
|
||||
bool legacy_cpu_hotplug;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,53 @@
|
||||
From 9bf1d368c4b53139db39649833d475e097fc98d1 Mon Sep 17 00:00:00 2001
|
||||
From: Bernhard Beschow <shentey@gmail.com>
|
||||
Date: Mon, 22 Apr 2024 22:06:22 +0200
|
||||
Subject: [PATCH 039/100] hw/i386/pc_sysfw: Remove unused parameter from
|
||||
pc_isa_bios_init()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [39/91] c0019dc2706a8e3f40486fd4a4c0dd1fbe23237b (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Message-ID: <20240422200625.2768-2-shentey@gmail.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
(cherry picked from commit f4b63768b91811cdcf1fb7b270587123251dfea5)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/pc_sysfw.c | 5 ++---
|
||||
1 file changed, 2 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
|
||||
index ef7dea9798..59c7a81692 100644
|
||||
--- a/hw/i386/pc_sysfw.c
|
||||
+++ b/hw/i386/pc_sysfw.c
|
||||
@@ -41,8 +41,7 @@
|
||||
#define FLASH_SECTOR_SIZE 4096
|
||||
|
||||
static void pc_isa_bios_init(MemoryRegion *rom_memory,
|
||||
- MemoryRegion *flash_mem,
|
||||
- int ram_size)
|
||||
+ MemoryRegion *flash_mem)
|
||||
{
|
||||
int isa_bios_size;
|
||||
MemoryRegion *isa_bios;
|
||||
@@ -186,7 +185,7 @@ static void pc_system_flash_map(PCMachineState *pcms,
|
||||
|
||||
if (i == 0) {
|
||||
flash_mem = pflash_cfi01_get_memory(system_flash);
|
||||
- pc_isa_bios_init(rom_memory, flash_mem, size);
|
||||
+ pc_isa_bios_init(rom_memory, flash_mem);
|
||||
|
||||
/* Encrypt the pflash boot ROM */
|
||||
if (sev_enabled()) {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,158 @@
|
||||
From e6472ff46cbed97c2a238a8ef7d321351931333a Mon Sep 17 00:00:00 2001
|
||||
From: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:30 -0500
|
||||
Subject: [PATCH 070/100] hw/i386/sev: Add function to get SEV metadata from
|
||||
OVMF header
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [70/91] ba818dade96119c8a51ca1fb222f4f69e2752396 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
A recent version of OVMF expanded the reset vector GUID list to add
|
||||
SEV-specific metadata GUID. The SEV metadata describes the reserved
|
||||
memory regions such as the secrets and CPUID page used during the SEV-SNP
|
||||
guest launch.
|
||||
|
||||
The pc_system_get_ovmf_sev_metadata_ptr() is used to retieve the SEV
|
||||
metadata pointer from the OVMF GUID list.
|
||||
|
||||
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-19-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit f3c30c575d34122573b7370a7da5ca3a27dde481)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/pc_sysfw.c | 4 ++++
|
||||
include/hw/i386/pc.h | 26 ++++++++++++++++++++++++++
|
||||
target/i386/sev-sysemu-stub.c | 4 ++++
|
||||
target/i386/sev.c | 32 ++++++++++++++++++++++++++++++++
|
||||
target/i386/sev.h | 2 ++
|
||||
5 files changed, 68 insertions(+)
|
||||
|
||||
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
|
||||
index ac88ad4eb9..9b8671c441 100644
|
||||
--- a/hw/i386/pc_sysfw.c
|
||||
+++ b/hw/i386/pc_sysfw.c
|
||||
@@ -260,6 +260,10 @@ void x86_firmware_configure(void *ptr, int size)
|
||||
pc_system_parse_ovmf_flash(ptr, size);
|
||||
|
||||
if (sev_enabled()) {
|
||||
+
|
||||
+ /* Copy the SEV metadata table (if it exists) */
|
||||
+ pc_system_parse_sev_metadata(ptr, size);
|
||||
+
|
||||
ret = sev_es_save_reset_vector(ptr, size);
|
||||
if (ret) {
|
||||
error_report("failed to locate and/or save reset vector");
|
||||
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
|
||||
index 3f53ec73ac..94b49310f5 100644
|
||||
--- a/include/hw/i386/pc.h
|
||||
+++ b/include/hw/i386/pc.h
|
||||
@@ -167,6 +167,32 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level);
|
||||
#define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size"
|
||||
#define PCI_HOST_PROP_SMM_RANGES "smm-ranges"
|
||||
|
||||
+typedef enum {
|
||||
+ SEV_DESC_TYPE_UNDEF,
|
||||
+ /* The section contains the region that must be validated by the VMM. */
|
||||
+ SEV_DESC_TYPE_SNP_SEC_MEM,
|
||||
+ /* The section contains the SNP secrets page */
|
||||
+ SEV_DESC_TYPE_SNP_SECRETS,
|
||||
+ /* The section contains address that can be used as a CPUID page */
|
||||
+ SEV_DESC_TYPE_CPUID,
|
||||
+
|
||||
+} ovmf_sev_metadata_desc_type;
|
||||
+
|
||||
+typedef struct __attribute__((__packed__)) OvmfSevMetadataDesc {
|
||||
+ uint32_t base;
|
||||
+ uint32_t len;
|
||||
+ ovmf_sev_metadata_desc_type type;
|
||||
+} OvmfSevMetadataDesc;
|
||||
+
|
||||
+typedef struct __attribute__((__packed__)) OvmfSevMetadata {
|
||||
+ uint8_t signature[4];
|
||||
+ uint32_t len;
|
||||
+ uint32_t version;
|
||||
+ uint32_t num_desc;
|
||||
+ OvmfSevMetadataDesc descs[];
|
||||
+} OvmfSevMetadata;
|
||||
+
|
||||
+OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void);
|
||||
|
||||
void pc_pci_as_mapping_init(MemoryRegion *system_memory,
|
||||
MemoryRegion *pci_address_space);
|
||||
diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c
|
||||
index 96e1c15cc3..fc1c57c411 100644
|
||||
--- a/target/i386/sev-sysemu-stub.c
|
||||
+++ b/target/i386/sev-sysemu-stub.c
|
||||
@@ -67,3 +67,7 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict)
|
||||
{
|
||||
monitor_printf(mon, "SEV is not available in this QEMU\n");
|
||||
}
|
||||
+
|
||||
+void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size)
|
||||
+{
|
||||
+}
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index e84e4395a5..17281bb2c7 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -597,6 +597,38 @@ SevCapability *qmp_query_sev_capabilities(Error **errp)
|
||||
return sev_get_capabilities(errp);
|
||||
}
|
||||
|
||||
+static OvmfSevMetadata *ovmf_sev_metadata_table;
|
||||
+
|
||||
+#define OVMF_SEV_META_DATA_GUID "dc886566-984a-4798-A75e-5585a7bf67cc"
|
||||
+typedef struct __attribute__((__packed__)) OvmfSevMetadataOffset {
|
||||
+ uint32_t offset;
|
||||
+} OvmfSevMetadataOffset;
|
||||
+
|
||||
+OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void)
|
||||
+{
|
||||
+ return ovmf_sev_metadata_table;
|
||||
+}
|
||||
+
|
||||
+void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size)
|
||||
+{
|
||||
+ OvmfSevMetadata *metadata;
|
||||
+ OvmfSevMetadataOffset *data;
|
||||
+
|
||||
+ if (!pc_system_ovmf_table_find(OVMF_SEV_META_DATA_GUID, (uint8_t **)&data,
|
||||
+ NULL)) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ metadata = (OvmfSevMetadata *)(flash_ptr + flash_size - data->offset);
|
||||
+ if (memcmp(metadata->signature, "ASEV", 4) != 0 ||
|
||||
+ metadata->len < sizeof(OvmfSevMetadata) ||
|
||||
+ metadata->len > flash_size - data->offset) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ ovmf_sev_metadata_table = g_memdup2(metadata, metadata->len);
|
||||
+}
|
||||
+
|
||||
static SevAttestationReport *sev_get_attestation_report(const char *mnonce,
|
||||
Error **errp)
|
||||
{
|
||||
diff --git a/target/i386/sev.h b/target/i386/sev.h
|
||||
index 5dc4767b1e..cc12824dd6 100644
|
||||
--- a/target/i386/sev.h
|
||||
+++ b/target/i386/sev.h
|
||||
@@ -66,4 +66,6 @@ int sev_inject_launch_secret(const char *hdr, const char *secret,
|
||||
int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size);
|
||||
void sev_es_set_reset_vector(CPUState *cpu);
|
||||
|
||||
+void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size);
|
||||
+
|
||||
#endif
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,165 @@
|
||||
From 226cf6c3d3e2fd1a35422043dbe0b73d1216df83 Mon Sep 17 00:00:00 2001
|
||||
From: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:36 -0500
|
||||
Subject: [PATCH 073/100] hw/i386/sev: Add support to encrypt BIOS when SEV-SNP
|
||||
is enabled
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [73/91] 844afd322c12c3e8992cf6ec692c94e70747bd0c (bonzini/rhel-qemu-kvm)
|
||||
|
||||
As with SEV, an SNP guest requires that the BIOS be part of the initial
|
||||
encrypted/measured guest payload. Extend sev_encrypt_flash() to handle
|
||||
the SNP case and plumb through the GPA of the BIOS location since this
|
||||
is needed for SNP.
|
||||
|
||||
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-25-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 77d1abd91e5352ad30ae2f83790f95fa6a3c0b6b)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/pc_sysfw.c | 12 +++++++-----
|
||||
hw/i386/x86-common.c | 2 +-
|
||||
include/hw/i386/x86.h | 2 +-
|
||||
target/i386/sev-sysemu-stub.c | 2 +-
|
||||
target/i386/sev.c | 5 +++--
|
||||
target/i386/sev.h | 2 +-
|
||||
6 files changed, 14 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
|
||||
index 9b8671c441..7cdbafc8d2 100644
|
||||
--- a/hw/i386/pc_sysfw.c
|
||||
+++ b/hw/i386/pc_sysfw.c
|
||||
@@ -148,6 +148,8 @@ static void pc_system_flash_map(PCMachineState *pcms,
|
||||
assert(PC_MACHINE_GET_CLASS(pcms)->pci_enabled);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(pcms->flash); i++) {
|
||||
+ hwaddr gpa;
|
||||
+
|
||||
system_flash = pcms->flash[i];
|
||||
blk = pflash_cfi01_get_blk(system_flash);
|
||||
if (!blk) {
|
||||
@@ -177,11 +179,11 @@ static void pc_system_flash_map(PCMachineState *pcms,
|
||||
}
|
||||
|
||||
total_size += size;
|
||||
+ gpa = 0x100000000ULL - total_size; /* where the flash is mapped */
|
||||
qdev_prop_set_uint32(DEVICE(system_flash), "num-blocks",
|
||||
size / FLASH_SECTOR_SIZE);
|
||||
sysbus_realize_and_unref(SYS_BUS_DEVICE(system_flash), &error_fatal);
|
||||
- sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0,
|
||||
- 0x100000000ULL - total_size);
|
||||
+ sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0, gpa);
|
||||
|
||||
if (i == 0) {
|
||||
flash_mem = pflash_cfi01_get_memory(system_flash);
|
||||
@@ -196,7 +198,7 @@ static void pc_system_flash_map(PCMachineState *pcms,
|
||||
if (sev_enabled()) {
|
||||
flash_ptr = memory_region_get_ram_ptr(flash_mem);
|
||||
flash_size = memory_region_size(flash_mem);
|
||||
- x86_firmware_configure(flash_ptr, flash_size);
|
||||
+ x86_firmware_configure(gpa, flash_ptr, flash_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -249,7 +251,7 @@ void pc_system_firmware_init(PCMachineState *pcms,
|
||||
pc_system_flash_cleanup_unused(pcms);
|
||||
}
|
||||
|
||||
-void x86_firmware_configure(void *ptr, int size)
|
||||
+void x86_firmware_configure(hwaddr gpa, void *ptr, int size)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@@ -270,6 +272,6 @@ void x86_firmware_configure(void *ptr, int size)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
- sev_encrypt_flash(ptr, size, &error_fatal);
|
||||
+ sev_encrypt_flash(gpa, ptr, size, &error_fatal);
|
||||
}
|
||||
}
|
||||
diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c
|
||||
index 67b03c913a..35fe6eabea 100644
|
||||
--- a/hw/i386/x86-common.c
|
||||
+++ b/hw/i386/x86-common.c
|
||||
@@ -981,7 +981,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
*/
|
||||
void *ptr = memory_region_get_ram_ptr(&x86ms->bios);
|
||||
load_image_size(filename, ptr, bios_size);
|
||||
- x86_firmware_configure(ptr, bios_size);
|
||||
+ x86_firmware_configure(0x100000000ULL - bios_size, ptr, bios_size);
|
||||
} else {
|
||||
memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw);
|
||||
ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1);
|
||||
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
|
||||
index b006f16b8d..d43cb3908e 100644
|
||||
--- a/include/hw/i386/x86.h
|
||||
+++ b/include/hw/i386/x86.h
|
||||
@@ -154,6 +154,6 @@ void ioapic_init_gsi(GSIState *gsi_state, Object *parent);
|
||||
DeviceState *ioapic_init_secondary(GSIState *gsi_state);
|
||||
|
||||
/* pc_sysfw.c */
|
||||
-void x86_firmware_configure(void *ptr, int size);
|
||||
+void x86_firmware_configure(hwaddr gpa, void *ptr, int size);
|
||||
|
||||
#endif
|
||||
diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c
|
||||
index fc1c57c411..d5bf886e79 100644
|
||||
--- a/target/i386/sev-sysemu-stub.c
|
||||
+++ b/target/i386/sev-sysemu-stub.c
|
||||
@@ -42,7 +42,7 @@ void qmp_sev_inject_launch_secret(const char *packet_header, const char *secret,
|
||||
error_setg(errp, "SEV is not available in this QEMU");
|
||||
}
|
||||
|
||||
-int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp)
|
||||
+int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp)
|
||||
{
|
||||
g_assert_not_reached();
|
||||
}
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 06401f0526..7b5c4b4874 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -1484,7 +1484,7 @@ static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
}
|
||||
|
||||
int
|
||||
-sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp)
|
||||
+sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp)
|
||||
{
|
||||
SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
|
||||
|
||||
@@ -1841,7 +1841,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
|
||||
/* zero the excess data so the measurement can be reliably calculated */
|
||||
memset(padded_ht->padding, 0, sizeof(padded_ht->padding));
|
||||
|
||||
- if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) {
|
||||
+ if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht,
|
||||
+ sizeof(*padded_ht), errp) < 0) {
|
||||
ret = false;
|
||||
}
|
||||
|
||||
diff --git a/target/i386/sev.h b/target/i386/sev.h
|
||||
index cc12824dd6..858005a119 100644
|
||||
--- a/target/i386/sev.h
|
||||
+++ b/target/i386/sev.h
|
||||
@@ -59,7 +59,7 @@ uint32_t sev_get_cbit_position(void);
|
||||
uint32_t sev_get_reduced_phys_bits(void);
|
||||
bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp);
|
||||
|
||||
-int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp);
|
||||
+int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp);
|
||||
int sev_inject_launch_secret(const char *hdr, const char *secret,
|
||||
uint64_t gpa, Error **errp);
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,123 @@
|
||||
From a20b2e3e52b9589ac1abc8b9b818d526c86368cf Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:39 -0500
|
||||
Subject: [PATCH 082/100] hw/i386/sev: Use guest_memfd for legacy ROMs
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [82/91] a591e85e00c353009803b143c80852b8c9b1f15e (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Current SNP guest kernels will attempt to access these regions with
|
||||
with C-bit set, so guest_memfd is needed to handle that. Otherwise,
|
||||
kvm_convert_memory() will fail when the guest kernel tries to access it
|
||||
and QEMU attempts to call KVM_SET_MEMORY_ATTRIBUTES to set these ranges
|
||||
to private.
|
||||
|
||||
Whether guests should actually try to access ROM regions in this way (or
|
||||
need to deal with legacy ROM regions at all), is a separate issue to be
|
||||
addressed on kernel side, but current SNP guest kernels will exhibit
|
||||
this behavior and so this handling is needed to allow QEMU to continue
|
||||
running existing SNP guest kernels.
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
[pankaj: Added sev_snp_enabled() check]
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-28-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 413a67450750e0459efeffc3db3ba9759c3e381c)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/pc.c | 14 ++++++++++----
|
||||
hw/i386/pc_sysfw.c | 19 +++++++++++++------
|
||||
2 files changed, 23 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index 0aca0cc79e..b25d075b59 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -62,6 +62,7 @@
|
||||
#include "hw/mem/memory-device.h"
|
||||
#include "e820_memory_layout.h"
|
||||
#include "trace.h"
|
||||
+#include "sev.h"
|
||||
#include CONFIG_DEVICES
|
||||
|
||||
#ifdef CONFIG_XEN_EMU
|
||||
@@ -1173,10 +1174,15 @@ void pc_memory_init(PCMachineState *pcms,
|
||||
pc_system_firmware_init(pcms, rom_memory);
|
||||
|
||||
option_rom_mr = g_malloc(sizeof(*option_rom_mr));
|
||||
- memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
|
||||
- &error_fatal);
|
||||
- if (pcmc->pci_enabled) {
|
||||
- memory_region_set_readonly(option_rom_mr, true);
|
||||
+ if (machine_require_guest_memfd(machine)) {
|
||||
+ memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
|
||||
+ PC_ROM_SIZE, &error_fatal);
|
||||
+ } else {
|
||||
+ memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
|
||||
+ &error_fatal);
|
||||
+ if (pcmc->pci_enabled) {
|
||||
+ memory_region_set_readonly(option_rom_mr, true);
|
||||
+ }
|
||||
}
|
||||
memory_region_add_subregion_overlap(rom_memory,
|
||||
PC_ROM_MIN_VGA,
|
||||
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
|
||||
index 7cdbafc8d2..ef80281d28 100644
|
||||
--- a/hw/i386/pc_sysfw.c
|
||||
+++ b/hw/i386/pc_sysfw.c
|
||||
@@ -40,8 +40,8 @@
|
||||
|
||||
#define FLASH_SECTOR_SIZE 4096
|
||||
|
||||
-static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory,
|
||||
- MemoryRegion *flash_mem)
|
||||
+static void pc_isa_bios_init(PCMachineState *pcms, MemoryRegion *isa_bios,
|
||||
+ MemoryRegion *rom_memory, MemoryRegion *flash_mem)
|
||||
{
|
||||
int isa_bios_size;
|
||||
uint64_t flash_size;
|
||||
@@ -51,8 +51,13 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory,
|
||||
|
||||
/* map the last 128KB of the BIOS in ISA space */
|
||||
isa_bios_size = MIN(flash_size, 128 * KiB);
|
||||
- memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
|
||||
- &error_fatal);
|
||||
+ if (machine_require_guest_memfd(MACHINE(pcms))) {
|
||||
+ memory_region_init_ram_guest_memfd(isa_bios, NULL, "isa-bios",
|
||||
+ isa_bios_size, &error_fatal);
|
||||
+ } else {
|
||||
+ memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
|
||||
+ &error_fatal);
|
||||
+ }
|
||||
memory_region_add_subregion_overlap(rom_memory,
|
||||
0x100000 - isa_bios_size,
|
||||
isa_bios,
|
||||
@@ -65,7 +70,9 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory,
|
||||
((uint8_t*)flash_ptr) + (flash_size - isa_bios_size),
|
||||
isa_bios_size);
|
||||
|
||||
- memory_region_set_readonly(isa_bios, true);
|
||||
+ if (!machine_require_guest_memfd(current_machine)) {
|
||||
+ memory_region_set_readonly(isa_bios, true);
|
||||
+ }
|
||||
}
|
||||
|
||||
static PFlashCFI01 *pc_pflash_create(PCMachineState *pcms,
|
||||
@@ -191,7 +198,7 @@ static void pc_system_flash_map(PCMachineState *pcms,
|
||||
x86_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem,
|
||||
true);
|
||||
} else {
|
||||
- pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem);
|
||||
+ pc_isa_bios_init(pcms, &x86ms->isa_bios, rom_memory, flash_mem);
|
||||
}
|
||||
|
||||
/* Encrypt the pflash boot ROM */
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,58 @@
|
||||
From 4331180aa09e44550ff8de781c618bae5e99bb70 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Tue, 9 Apr 2024 18:07:43 -0500
|
||||
Subject: [PATCH 025/100] hw/i386/sev: Use legacy SEV VM types for older
|
||||
machine types
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [25/91] 8c73cd312736ccb0818b4d3216fd13712f21f3c9 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Newer 9.1 machine types will default to using the KVM_SEV_INIT2 API for
|
||||
creating SEV/SEV-ES going forward. However, this API results in guest
|
||||
measurement changes which are generally not expected for users of these
|
||||
older guest types and can cause disruption if they switch to a newer
|
||||
QEMU/kernel version. Avoid this by continuing to use the older
|
||||
KVM_SEV_INIT/KVM_SEV_ES_INIT APIs for older machine types.
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Message-ID: <20240409230743.962513-4-michael.roth@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit ea7fbd37537b3a598335c21ccb2ea674630fc810)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/pc.c | 1 +
|
||||
target/i386/sev.c | 1 +
|
||||
2 files changed, 2 insertions(+)
|
||||
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index b9fde3cec1..1a34bc4522 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -351,6 +351,7 @@ const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat);
|
||||
GlobalProperty pc_rhel_9_5_compat[] = {
|
||||
/* pc_rhel_9_5_compat from pc_compat_pc_9_0 (backported from 9.1) */
|
||||
{ TYPE_X86_CPU, "guest-phys-bits", "0" },
|
||||
+ { "sev-guest", "legacy-vm-type", "true" },
|
||||
};
|
||||
const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat);
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index f4ee317cb0..d30b68c11e 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -1417,6 +1417,7 @@ sev_guest_instance_init(Object *obj)
|
||||
object_property_add_uint32_ptr(obj, "reduced-phys-bits",
|
||||
&sev->reduced_phys_bits,
|
||||
OBJ_PROP_FLAG_READWRITE);
|
||||
+ object_apply_compat_props(obj);
|
||||
}
|
||||
|
||||
/* sev guest info */
|
||||
--
|
||||
2.39.3
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,133 @@
|
||||
From ebf08d2a822576acfa60fbd5f552d26de1e4c4be Mon Sep 17 00:00:00 2001
|
||||
From: Bernhard Beschow <shentey@gmail.com>
|
||||
Date: Wed, 8 May 2024 19:55:04 +0200
|
||||
Subject: [PATCH 040/100] hw/i386/x86: Don't leak "isa-bios" memory regions
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [40/91] bb595357c6cc2d5a80bf3873853c69553c5feee5 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Fix the leaking in x86_bios_rom_init() and pc_isa_bios_init() by adding an
|
||||
"isa_bios" attribute to X86MachineState.
|
||||
|
||||
Suggested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
|
||||
Message-ID: <20240508175507.22270-4-shentey@gmail.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
(cherry picked from commit 32d3ee87a17fc91e981a23dba94855bff89f5920)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/pc_sysfw.c | 7 +++----
|
||||
hw/i386/x86.c | 9 ++++-----
|
||||
include/hw/i386/x86.h | 7 +++++++
|
||||
3 files changed, 14 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
|
||||
index 59c7a81692..82d37cb376 100644
|
||||
--- a/hw/i386/pc_sysfw.c
|
||||
+++ b/hw/i386/pc_sysfw.c
|
||||
@@ -40,11 +40,10 @@
|
||||
|
||||
#define FLASH_SECTOR_SIZE 4096
|
||||
|
||||
-static void pc_isa_bios_init(MemoryRegion *rom_memory,
|
||||
+static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory,
|
||||
MemoryRegion *flash_mem)
|
||||
{
|
||||
int isa_bios_size;
|
||||
- MemoryRegion *isa_bios;
|
||||
uint64_t flash_size;
|
||||
void *flash_ptr, *isa_bios_ptr;
|
||||
|
||||
@@ -52,7 +51,6 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory,
|
||||
|
||||
/* map the last 128KB of the BIOS in ISA space */
|
||||
isa_bios_size = MIN(flash_size, 128 * KiB);
|
||||
- isa_bios = g_malloc(sizeof(*isa_bios));
|
||||
memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
|
||||
&error_fatal);
|
||||
memory_region_add_subregion_overlap(rom_memory,
|
||||
@@ -136,6 +134,7 @@ void pc_system_flash_cleanup_unused(PCMachineState *pcms)
|
||||
static void pc_system_flash_map(PCMachineState *pcms,
|
||||
MemoryRegion *rom_memory)
|
||||
{
|
||||
+ X86MachineState *x86ms = X86_MACHINE(pcms);
|
||||
hwaddr total_size = 0;
|
||||
int i;
|
||||
BlockBackend *blk;
|
||||
@@ -185,7 +184,7 @@ static void pc_system_flash_map(PCMachineState *pcms,
|
||||
|
||||
if (i == 0) {
|
||||
flash_mem = pflash_cfi01_get_memory(system_flash);
|
||||
- pc_isa_bios_init(rom_memory, flash_mem);
|
||||
+ pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem);
|
||||
|
||||
/* Encrypt the pflash boot ROM */
|
||||
if (sev_enabled()) {
|
||||
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
|
||||
index 6d3c72f124..457e8a34a5 100644
|
||||
--- a/hw/i386/x86.c
|
||||
+++ b/hw/i386/x86.c
|
||||
@@ -1133,7 +1133,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
{
|
||||
const char *bios_name;
|
||||
char *filename;
|
||||
- MemoryRegion *bios, *isa_bios;
|
||||
+ MemoryRegion *bios;
|
||||
int bios_size, isa_bios_size;
|
||||
ssize_t ret;
|
||||
|
||||
@@ -1173,14 +1173,13 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
|
||||
/* map the last 128KB of the BIOS in ISA space */
|
||||
isa_bios_size = MIN(bios_size, 128 * KiB);
|
||||
- isa_bios = g_malloc(sizeof(*isa_bios));
|
||||
- memory_region_init_alias(isa_bios, NULL, "isa-bios", bios,
|
||||
+ memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", bios,
|
||||
bios_size - isa_bios_size, isa_bios_size);
|
||||
memory_region_add_subregion_overlap(rom_memory,
|
||||
0x100000 - isa_bios_size,
|
||||
- isa_bios,
|
||||
+ &x86ms->isa_bios,
|
||||
1);
|
||||
- memory_region_set_readonly(isa_bios, !isapc_ram_fw);
|
||||
+ memory_region_set_readonly(&x86ms->isa_bios, !isapc_ram_fw);
|
||||
|
||||
/* map all the bios at the top of memory */
|
||||
memory_region_add_subregion(rom_memory,
|
||||
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
|
||||
index cb07618d19..a07de79167 100644
|
||||
--- a/include/hw/i386/x86.h
|
||||
+++ b/include/hw/i386/x86.h
|
||||
@@ -18,6 +18,7 @@
|
||||
#define HW_I386_X86_H
|
||||
|
||||
#include "exec/hwaddr.h"
|
||||
+#include "exec/memory.h"
|
||||
|
||||
#include "hw/boards.h"
|
||||
#include "hw/intc/ioapic.h"
|
||||
@@ -52,6 +53,12 @@ struct X86MachineState {
|
||||
GMappedFile *initrd_mapped_file;
|
||||
HotplugHandler *acpi_dev;
|
||||
|
||||
+ /*
|
||||
+ * Map the upper 128 KiB of the BIOS just underneath the 1 MiB address
|
||||
+ * boundary.
|
||||
+ */
|
||||
+ MemoryRegion isa_bios;
|
||||
+
|
||||
/* RAM information (sizes, addresses, configuration): */
|
||||
ram_addr_t below_4g_mem_size, above_4g_mem_size;
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,105 @@
|
||||
From e1f2265b5f6bf5b63bf3808bb540888f3cf8badb Mon Sep 17 00:00:00 2001
|
||||
From: Bernhard Beschow <shentey@gmail.com>
|
||||
Date: Wed, 8 May 2024 19:55:05 +0200
|
||||
Subject: [PATCH 041/100] hw/i386/x86: Don't leak "pc.bios" memory region
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [41/91] a9cd61d8d240134c09c46e244efb89217cadf60c (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Fix the leaking in x86_bios_rom_init() by adding a "bios" attribute to
|
||||
X86MachineState. Note that it is only used in the -bios case.
|
||||
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
|
||||
Message-ID: <20240508175507.22270-5-shentey@gmail.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
(cherry picked from commit 865d95321ffc8d9941e33000b10140550f094556)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/x86.c | 13 ++++++-------
|
||||
include/hw/i386/x86.h | 6 ++++++
|
||||
2 files changed, 12 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
|
||||
index 457e8a34a5..29167de97d 100644
|
||||
--- a/hw/i386/x86.c
|
||||
+++ b/hw/i386/x86.c
|
||||
@@ -1133,7 +1133,6 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
{
|
||||
const char *bios_name;
|
||||
char *filename;
|
||||
- MemoryRegion *bios;
|
||||
int bios_size, isa_bios_size;
|
||||
ssize_t ret;
|
||||
|
||||
@@ -1149,8 +1148,8 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
(bios_size % 65536) != 0) {
|
||||
goto bios_error;
|
||||
}
|
||||
- bios = g_malloc(sizeof(*bios));
|
||||
- memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal);
|
||||
+ memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size,
|
||||
+ &error_fatal);
|
||||
if (sev_enabled()) {
|
||||
/*
|
||||
* The concept of a "reset" simply doesn't exist for
|
||||
@@ -1159,11 +1158,11 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
* the firmware as rom to properly re-initialize on reset.
|
||||
* Just go for a straight file load instead.
|
||||
*/
|
||||
- void *ptr = memory_region_get_ram_ptr(bios);
|
||||
+ void *ptr = memory_region_get_ram_ptr(&x86ms->bios);
|
||||
load_image_size(filename, ptr, bios_size);
|
||||
x86_firmware_configure(ptr, bios_size);
|
||||
} else {
|
||||
- memory_region_set_readonly(bios, !isapc_ram_fw);
|
||||
+ memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw);
|
||||
ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1);
|
||||
if (ret != 0) {
|
||||
goto bios_error;
|
||||
@@ -1173,7 +1172,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
|
||||
/* map the last 128KB of the BIOS in ISA space */
|
||||
isa_bios_size = MIN(bios_size, 128 * KiB);
|
||||
- memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", bios,
|
||||
+ memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", &x86ms->bios,
|
||||
bios_size - isa_bios_size, isa_bios_size);
|
||||
memory_region_add_subregion_overlap(rom_memory,
|
||||
0x100000 - isa_bios_size,
|
||||
@@ -1184,7 +1183,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
/* map all the bios at the top of memory */
|
||||
memory_region_add_subregion(rom_memory,
|
||||
(uint32_t)(-bios_size),
|
||||
- bios);
|
||||
+ &x86ms->bios);
|
||||
return;
|
||||
|
||||
bios_error:
|
||||
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
|
||||
index a07de79167..55c6809ae0 100644
|
||||
--- a/include/hw/i386/x86.h
|
||||
+++ b/include/hw/i386/x86.h
|
||||
@@ -53,6 +53,12 @@ struct X86MachineState {
|
||||
GMappedFile *initrd_mapped_file;
|
||||
HotplugHandler *acpi_dev;
|
||||
|
||||
+ /*
|
||||
+ * Map the whole BIOS just underneath the 4 GiB address boundary. Only used
|
||||
+ * in the ROM (-bios) case.
|
||||
+ */
|
||||
+ MemoryRegion bios;
|
||||
+
|
||||
/*
|
||||
* Map the upper 128 KiB of the BIOS just underneath the 1 MiB address
|
||||
* boundary.
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,69 @@
|
||||
From b9d0c78f04160fbc1eee6cfd94b17f1133a35d83 Mon Sep 17 00:00:00 2001
|
||||
From: Bernhard Beschow <shentey@gmail.com>
|
||||
Date: Tue, 30 Apr 2024 17:06:38 +0200
|
||||
Subject: [PATCH 037/100] hw/i386/x86: Eliminate two if statements in
|
||||
x86_bios_rom_init()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [37/91] 1ef6a13214e85f6ef773f5c894c720f20330912b (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Given that memory_region_set_readonly() is a no-op when the readonlyness is
|
||||
already as requested it is possible to simplify the pattern
|
||||
|
||||
if (condition) {
|
||||
foo(true);
|
||||
}
|
||||
|
||||
to
|
||||
|
||||
foo(condition);
|
||||
|
||||
which is shorter and allows to see the invariant of the code more easily.
|
||||
|
||||
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Message-ID: <20240430150643.111976-2-shentey@gmail.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
(cherry picked from commit 014dbdac8798799d081abc9dff3e4876ca54f49e)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/x86.c | 8 ++------
|
||||
1 file changed, 2 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
|
||||
index 3d5b51e92d..2a4f3ee285 100644
|
||||
--- a/hw/i386/x86.c
|
||||
+++ b/hw/i386/x86.c
|
||||
@@ -1163,9 +1163,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
|
||||
load_image_size(filename, ptr, bios_size);
|
||||
x86_firmware_configure(ptr, bios_size);
|
||||
} else {
|
||||
- if (!isapc_ram_fw) {
|
||||
- memory_region_set_readonly(bios, true);
|
||||
- }
|
||||
+ memory_region_set_readonly(bios, !isapc_ram_fw);
|
||||
ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1);
|
||||
if (ret != 0) {
|
||||
goto bios_error;
|
||||
@@ -1182,9 +1180,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
|
||||
0x100000 - isa_bios_size,
|
||||
isa_bios,
|
||||
1);
|
||||
- if (!isapc_ram_fw) {
|
||||
- memory_region_set_readonly(isa_bios, true);
|
||||
- }
|
||||
+ memory_region_set_readonly(isa_bios, !isapc_ram_fw);
|
||||
|
||||
/* map all the bios at the top of memory */
|
||||
memory_region_add_subregion(rom_memory,
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,98 @@
|
||||
From 1baf67564d4227d6ba98923217a15814c438c32b Mon Sep 17 00:00:00 2001
|
||||
From: Bernhard Beschow <shentey@gmail.com>
|
||||
Date: Wed, 8 May 2024 19:55:06 +0200
|
||||
Subject: [PATCH 042/100] hw/i386/x86: Extract x86_isa_bios_init() from
|
||||
x86_bios_rom_init()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [42/91] 1db417a5995480924f7fd0661a306f2d2bfa0a77 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
The function is inspired by pc_isa_bios_init() and should eventually replace it.
|
||||
Using x86_isa_bios_init() rather than pc_isa_bios_init() fixes pflash commands
|
||||
to work in the isa-bios region.
|
||||
|
||||
While at it convert the magic number 0x100000 (== 1MiB) to increase readability.
|
||||
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
|
||||
Message-ID: <20240508175507.22270-6-shentey@gmail.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
(cherry picked from commit 5c5ffec12c30d2017cbdee6798f54d8fad3f9656)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/x86.c | 25 ++++++++++++++++---------
|
||||
include/hw/i386/x86.h | 2 ++
|
||||
2 files changed, 18 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
|
||||
index 29167de97d..c61f4ebfa6 100644
|
||||
--- a/hw/i386/x86.c
|
||||
+++ b/hw/i386/x86.c
|
||||
@@ -1128,12 +1128,25 @@ void x86_load_linux(X86MachineState *x86ms,
|
||||
nb_option_roms++;
|
||||
}
|
||||
|
||||
+void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory,
|
||||
+ MemoryRegion *bios, bool read_only)
|
||||
+{
|
||||
+ uint64_t bios_size = memory_region_size(bios);
|
||||
+ uint64_t isa_bios_size = MIN(bios_size, 128 * KiB);
|
||||
+
|
||||
+ memory_region_init_alias(isa_bios, NULL, "isa-bios", bios,
|
||||
+ bios_size - isa_bios_size, isa_bios_size);
|
||||
+ memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size,
|
||||
+ isa_bios, 1);
|
||||
+ memory_region_set_readonly(isa_bios, read_only);
|
||||
+}
|
||||
+
|
||||
void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
MemoryRegion *rom_memory, bool isapc_ram_fw)
|
||||
{
|
||||
const char *bios_name;
|
||||
char *filename;
|
||||
- int bios_size, isa_bios_size;
|
||||
+ int bios_size;
|
||||
ssize_t ret;
|
||||
|
||||
/* BIOS load */
|
||||
@@ -1171,14 +1184,8 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
g_free(filename);
|
||||
|
||||
/* map the last 128KB of the BIOS in ISA space */
|
||||
- isa_bios_size = MIN(bios_size, 128 * KiB);
|
||||
- memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", &x86ms->bios,
|
||||
- bios_size - isa_bios_size, isa_bios_size);
|
||||
- memory_region_add_subregion_overlap(rom_memory,
|
||||
- 0x100000 - isa_bios_size,
|
||||
- &x86ms->isa_bios,
|
||||
- 1);
|
||||
- memory_region_set_readonly(&x86ms->isa_bios, !isapc_ram_fw);
|
||||
+ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios,
|
||||
+ !isapc_ram_fw);
|
||||
|
||||
/* map all the bios at the top of memory */
|
||||
memory_region_add_subregion(rom_memory,
|
||||
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
|
||||
index 55c6809ae0..d7b7d3f3ce 100644
|
||||
--- a/include/hw/i386/x86.h
|
||||
+++ b/include/hw/i386/x86.h
|
||||
@@ -129,6 +129,8 @@ void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev,
|
||||
void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev,
|
||||
DeviceState *dev, Error **errp);
|
||||
|
||||
+void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory,
|
||||
+ MemoryRegion *bios, bool read_only);
|
||||
void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
MemoryRegion *rom_memory, bool isapc_ram_fw);
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,44 +0,0 @@
|
||||
From 491cf9e251026d135f315b7fe0d8771841f06e9f Mon Sep 17 00:00:00 2001
|
||||
From: Leonardo Bras <leobras@redhat.com>
|
||||
Date: Tue, 25 Jul 2023 15:34:45 -0300
|
||||
Subject: [PATCH 8/9] hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type
|
||||
<= pc-q35-rhel9.2.0
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Leonardo Brás <leobras@redhat.com>
|
||||
RH-MergeRequest: 192: hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type <= pc-q35-rhel9.2.0
|
||||
RH-Bugzilla: 2223691
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: quintela1 <quintela@redhat.com>
|
||||
RH-Commit: [1/1] e57816f8ad15a9ce5f342b061c103ae011ec1223 (LeoBras/centos-qemu-kvm)
|
||||
|
||||
This is a downstream-only patch to that sets off the property
|
||||
x-pcie-err-unc-mask for machine types <= pc-q35-rhel9.2.0, allowing
|
||||
live migrations to RHEL9.2 happen successfully.
|
||||
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2223691
|
||||
Fixes: 293a34b4be ("hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine
|
||||
type < 8.0")
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
---
|
||||
hw/core/machine.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 5ea52317b9..6f5117669d 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -62,6 +62,8 @@ GlobalProperty hw_compat_rhel_9_2[] = {
|
||||
{ "virtio-mem", "x-early-migration", "false" },
|
||||
/* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
{ "migration", "x-preempt-pre-7-2", "true" },
|
||||
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
|
||||
+ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" },
|
||||
};
|
||||
const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2);
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,118 +0,0 @@
|
||||
From 3ac01bb90da12538898f95b2fb4e7f6bc1557eb3 Mon Sep 17 00:00:00 2001
|
||||
From: Leonardo Bras <leobras@redhat.com>
|
||||
Date: Tue, 2 May 2023 21:27:02 -0300
|
||||
Subject: [PATCH 18/21] hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine
|
||||
type < 8.0
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Leonardo Brás <leobras@redhat.com>
|
||||
RH-MergeRequest: 170: hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0
|
||||
RH-Bugzilla: 2189423
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/1] ad62dd5a8567f386770577513c00a0bf36bd3df1 (LeoBras/centos-qemu-kvm)
|
||||
|
||||
Since it's implementation on v8.0.0-rc0, having the PCI_ERR_UNCOR_MASK
|
||||
set for machine types < 8.0 will cause migration to fail if the target
|
||||
QEMU version is < 8.0.0 :
|
||||
|
||||
qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10a read: 40 device: 0 cmask: ff wmask: 0 w1cmask:0
|
||||
qemu-system-x86_64: Failed to load PCIDevice:config
|
||||
qemu-system-x86_64: Failed to load e1000e:parent_obj
|
||||
qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:02.0/e1000e'
|
||||
qemu-system-x86_64: load of migration failed: Invalid argument
|
||||
|
||||
The above test migrated a 7.2 machine type from QEMU master to QEMU 7.2.0,
|
||||
with this cmdline:
|
||||
|
||||
./qemu-system-x86_64 -M pc-q35-7.2 [-incoming XXX]
|
||||
|
||||
In order to fix this, property x-pcie-err-unc-mask was introduced to
|
||||
control when PCI_ERR_UNCOR_MASK is enabled. This property is enabled by
|
||||
default, but is disabled if machine type <= 7.2.
|
||||
|
||||
Fixes: 010746ae1d ("hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register")
|
||||
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
Message-Id: <20230503002701.854329-1-leobras@redhat.com>
|
||||
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Reviewed-by: Juan Quintela <quintela@redhat.com>
|
||||
Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1576
|
||||
Tested-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit 5ed3dabe57dd9f4c007404345e5f5bf0e347317f)
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
---
|
||||
hw/core/machine.c | 1 +
|
||||
hw/pci/pci.c | 2 ++
|
||||
hw/pci/pcie_aer.c | 11 +++++++----
|
||||
include/hw/pci/pci.h | 2 ++
|
||||
4 files changed, 12 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 0e0120b7f2..c28702b690 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -43,6 +43,7 @@ GlobalProperty hw_compat_7_2[] = {
|
||||
{ "e1000e", "migrate-timadj", "off" },
|
||||
{ "virtio-mem", "x-early-migration", "false" },
|
||||
{ "migration", "x-preempt-pre-7-2", "true" },
|
||||
+ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" },
|
||||
};
|
||||
const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
|
||||
|
||||
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
|
||||
index def5000e7b..8ad4349e96 100644
|
||||
--- a/hw/pci/pci.c
|
||||
+++ b/hw/pci/pci.c
|
||||
@@ -79,6 +79,8 @@ static Property pci_props[] = {
|
||||
DEFINE_PROP_STRING("failover_pair_id", PCIDevice,
|
||||
failover_pair_id),
|
||||
DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0),
|
||||
+ DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
|
||||
+ QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
|
||||
DEFINE_PROP_END_OF_LIST()
|
||||
};
|
||||
|
||||
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
|
||||
index 103667c368..374d593ead 100644
|
||||
--- a/hw/pci/pcie_aer.c
|
||||
+++ b/hw/pci/pcie_aer.c
|
||||
@@ -112,10 +112,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset,
|
||||
|
||||
pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
|
||||
PCI_ERR_UNC_SUPPORTED);
|
||||
- pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK,
|
||||
- PCI_ERR_UNC_MASK_DEFAULT);
|
||||
- pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
|
||||
- PCI_ERR_UNC_SUPPORTED);
|
||||
+
|
||||
+ if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) {
|
||||
+ pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK,
|
||||
+ PCI_ERR_UNC_MASK_DEFAULT);
|
||||
+ pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
|
||||
+ PCI_ERR_UNC_SUPPORTED);
|
||||
+ }
|
||||
|
||||
pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
|
||||
PCI_ERR_UNC_SEVERITY_DEFAULT);
|
||||
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
|
||||
index d5a40cd058..6dc6742fc4 100644
|
||||
--- a/include/hw/pci/pci.h
|
||||
+++ b/include/hw/pci/pci.h
|
||||
@@ -207,6 +207,8 @@ enum {
|
||||
QEMU_PCIE_EXTCAP_INIT = (1 << QEMU_PCIE_EXTCAP_INIT_BITNR),
|
||||
#define QEMU_PCIE_CXL_BITNR 10
|
||||
QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR),
|
||||
+#define QEMU_PCIE_ERR_UNC_MASK_BITNR 11
|
||||
+ QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
|
||||
};
|
||||
|
||||
typedef struct PCIINTxRoute {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,470 +0,0 @@
|
||||
From d1b7a9b25c0df9016cd8e93d40837314b1a81d70 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 08/21] hw: replace most qemu_bh_new calls with
|
||||
qemu_bh_new_guarded
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [4/13] bcbc67dd0023aee2b3a342665237daa83b183c7b (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit f63192b0544af5d3e4d5edfd85ab520fcf671377
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:09 2023 -0400
|
||||
|
||||
hw: replace most qemu_bh_new calls with qemu_bh_new_guarded
|
||||
|
||||
This protects devices from bh->mmio reentrancy issues.
|
||||
|
||||
Thanks: Thomas Huth <thuth@redhat.com> for diagnosing OS X test failure.
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Reviewed-by: Paul Durrant <paul@xen.org>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Message-Id: <20230427211013.2994127-5-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/9pfs/xen-9p-backend.c | 5 ++++-
|
||||
hw/block/dataplane/virtio-blk.c | 3 ++-
|
||||
hw/block/dataplane/xen-block.c | 5 +++--
|
||||
hw/char/virtio-serial-bus.c | 3 ++-
|
||||
hw/display/qxl.c | 9 ++++++---
|
||||
hw/display/virtio-gpu.c | 6 ++++--
|
||||
hw/ide/ahci.c | 3 ++-
|
||||
hw/ide/ahci_internal.h | 1 +
|
||||
hw/ide/core.c | 4 +++-
|
||||
hw/misc/imx_rngc.c | 6 ++++--
|
||||
hw/misc/macio/mac_dbdma.c | 2 +-
|
||||
hw/net/virtio-net.c | 3 ++-
|
||||
hw/nvme/ctrl.c | 6 ++++--
|
||||
hw/scsi/mptsas.c | 3 ++-
|
||||
hw/scsi/scsi-bus.c | 3 ++-
|
||||
hw/scsi/vmw_pvscsi.c | 3 ++-
|
||||
hw/usb/dev-uas.c | 3 ++-
|
||||
hw/usb/hcd-dwc2.c | 3 ++-
|
||||
hw/usb/hcd-ehci.c | 3 ++-
|
||||
hw/usb/hcd-uhci.c | 2 +-
|
||||
hw/usb/host-libusb.c | 6 ++++--
|
||||
hw/usb/redirect.c | 6 ++++--
|
||||
hw/usb/xen-usb.c | 3 ++-
|
||||
hw/virtio/virtio-balloon.c | 5 +++--
|
||||
hw/virtio/virtio-crypto.c | 3 ++-
|
||||
25 files changed, 66 insertions(+), 33 deletions(-)
|
||||
|
||||
diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
|
||||
index 74f3a05f88..0e266c552b 100644
|
||||
--- a/hw/9pfs/xen-9p-backend.c
|
||||
+++ b/hw/9pfs/xen-9p-backend.c
|
||||
@@ -61,6 +61,7 @@ typedef struct Xen9pfsDev {
|
||||
|
||||
int num_rings;
|
||||
Xen9pfsRing *rings;
|
||||
+ MemReentrancyGuard mem_reentrancy_guard;
|
||||
} Xen9pfsDev;
|
||||
|
||||
static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev);
|
||||
@@ -443,7 +444,9 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
|
||||
xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data +
|
||||
XEN_FLEX_RING_SIZE(ring_order);
|
||||
|
||||
- xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]);
|
||||
+ xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh,
|
||||
+ &xen_9pdev->rings[i],
|
||||
+ &xen_9pdev->mem_reentrancy_guard);
|
||||
xen_9pdev->rings[i].out_cons = 0;
|
||||
xen_9pdev->rings[i].out_size = 0;
|
||||
xen_9pdev->rings[i].inprogress = false;
|
||||
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
|
||||
index b28d81737e..a6202997ee 100644
|
||||
--- a/hw/block/dataplane/virtio-blk.c
|
||||
+++ b/hw/block/dataplane/virtio-blk.c
|
||||
@@ -127,7 +127,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
|
||||
} else {
|
||||
s->ctx = qemu_get_aio_context();
|
||||
}
|
||||
- s->bh = aio_bh_new(s->ctx, notify_guest_bh, s);
|
||||
+ s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s,
|
||||
+ &DEVICE(vdev)->mem_reentrancy_guard);
|
||||
s->batch_notify_vqs = bitmap_new(conf->num_queues);
|
||||
|
||||
*dataplane = s;
|
||||
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
|
||||
index 734da42ea7..d8bc39d359 100644
|
||||
--- a/hw/block/dataplane/xen-block.c
|
||||
+++ b/hw/block/dataplane/xen-block.c
|
||||
@@ -633,8 +633,9 @@ XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev,
|
||||
} else {
|
||||
dataplane->ctx = qemu_get_aio_context();
|
||||
}
|
||||
- dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh,
|
||||
- dataplane);
|
||||
+ dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh,
|
||||
+ dataplane,
|
||||
+ &DEVICE(xendev)->mem_reentrancy_guard);
|
||||
|
||||
return dataplane;
|
||||
}
|
||||
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
|
||||
index 7d4601cb5d..dd619f0731 100644
|
||||
--- a/hw/char/virtio-serial-bus.c
|
||||
+++ b/hw/char/virtio-serial-bus.c
|
||||
@@ -985,7 +985,8 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp)
|
||||
return;
|
||||
}
|
||||
|
||||
- port->bh = qemu_bh_new(flush_queued_data_bh, port);
|
||||
+ port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
port->elem = NULL;
|
||||
}
|
||||
|
||||
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
|
||||
index 80ce1e9a93..f1c0eb7dfc 100644
|
||||
--- a/hw/display/qxl.c
|
||||
+++ b/hw/display/qxl.c
|
||||
@@ -2201,11 +2201,14 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp)
|
||||
|
||||
qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl);
|
||||
|
||||
- qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl);
|
||||
+ qxl->update_irq = qemu_bh_new_guarded(qxl_update_irq_bh, qxl,
|
||||
+ &DEVICE(qxl)->mem_reentrancy_guard);
|
||||
qxl_reset_state(qxl);
|
||||
|
||||
- qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl);
|
||||
- qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd);
|
||||
+ qxl->update_area_bh = qemu_bh_new_guarded(qxl_render_update_area_bh, qxl,
|
||||
+ &DEVICE(qxl)->mem_reentrancy_guard);
|
||||
+ qxl->ssd.cursor_bh = qemu_bh_new_guarded(qemu_spice_cursor_refresh_bh, &qxl->ssd,
|
||||
+ &DEVICE(qxl)->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static void qxl_realize_primary(PCIDevice *dev, Error **errp)
|
||||
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
|
||||
index 5e15c79b94..66ac9b6cc5 100644
|
||||
--- a/hw/display/virtio-gpu.c
|
||||
+++ b/hw/display/virtio-gpu.c
|
||||
@@ -1339,8 +1339,10 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
|
||||
|
||||
g->ctrl_vq = virtio_get_queue(vdev, 0);
|
||||
g->cursor_vq = virtio_get_queue(vdev, 1);
|
||||
- g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g);
|
||||
- g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g);
|
||||
+ g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g,
|
||||
+ &qdev->mem_reentrancy_guard);
|
||||
+ g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g,
|
||||
+ &qdev->mem_reentrancy_guard);
|
||||
QTAILQ_INIT(&g->reslist);
|
||||
QTAILQ_INIT(&g->cmdq);
|
||||
QTAILQ_INIT(&g->fenceq);
|
||||
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
|
||||
index 55902e1df7..4e76d6b191 100644
|
||||
--- a/hw/ide/ahci.c
|
||||
+++ b/hw/ide/ahci.c
|
||||
@@ -1509,7 +1509,8 @@ static void ahci_cmd_done(const IDEDMA *dma)
|
||||
ahci_write_fis_d2h(ad);
|
||||
|
||||
if (ad->port_regs.cmd_issue && !ad->check_bh) {
|
||||
- ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad);
|
||||
+ ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad,
|
||||
+ &ad->mem_reentrancy_guard);
|
||||
qemu_bh_schedule(ad->check_bh);
|
||||
}
|
||||
}
|
||||
diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h
|
||||
index 303fcd7235..2480455372 100644
|
||||
--- a/hw/ide/ahci_internal.h
|
||||
+++ b/hw/ide/ahci_internal.h
|
||||
@@ -321,6 +321,7 @@ struct AHCIDevice {
|
||||
bool init_d2h_sent;
|
||||
AHCICmdHdr *cur_cmd;
|
||||
NCQTransferState ncq_tfs[AHCI_MAX_CMDS];
|
||||
+ MemReentrancyGuard mem_reentrancy_guard;
|
||||
};
|
||||
|
||||
struct AHCIPCIState {
|
||||
diff --git a/hw/ide/core.c b/hw/ide/core.c
|
||||
index 45d14a25e9..de48ff9f86 100644
|
||||
--- a/hw/ide/core.c
|
||||
+++ b/hw/ide/core.c
|
||||
@@ -513,6 +513,7 @@ BlockAIOCB *ide_issue_trim(
|
||||
BlockCompletionFunc *cb, void *cb_opaque, void *opaque)
|
||||
{
|
||||
IDEState *s = opaque;
|
||||
+ IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master;
|
||||
TrimAIOCB *iocb;
|
||||
|
||||
/* Paired with a decrement in ide_trim_bh_cb() */
|
||||
@@ -520,7 +521,8 @@ BlockAIOCB *ide_issue_trim(
|
||||
|
||||
iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque);
|
||||
iocb->s = s;
|
||||
- iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
|
||||
+ iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
iocb->ret = 0;
|
||||
iocb->qiov = qiov;
|
||||
iocb->i = -1;
|
||||
diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c
|
||||
index 632c03779c..082c6980ad 100644
|
||||
--- a/hw/misc/imx_rngc.c
|
||||
+++ b/hw/misc/imx_rngc.c
|
||||
@@ -228,8 +228,10 @@ static void imx_rngc_realize(DeviceState *dev, Error **errp)
|
||||
sysbus_init_mmio(sbd, &s->iomem);
|
||||
|
||||
sysbus_init_irq(sbd, &s->irq);
|
||||
- s->self_test_bh = qemu_bh_new(imx_rngc_self_test, s);
|
||||
- s->seed_bh = qemu_bh_new(imx_rngc_seed, s);
|
||||
+ s->self_test_bh = qemu_bh_new_guarded(imx_rngc_self_test, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
+ s->seed_bh = qemu_bh_new_guarded(imx_rngc_seed, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static void imx_rngc_reset(DeviceState *dev)
|
||||
diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c
|
||||
index 43bb1f56ba..80a789f32b 100644
|
||||
--- a/hw/misc/macio/mac_dbdma.c
|
||||
+++ b/hw/misc/macio/mac_dbdma.c
|
||||
@@ -914,7 +914,7 @@ static void mac_dbdma_realize(DeviceState *dev, Error **errp)
|
||||
{
|
||||
DBDMAState *s = MAC_DBDMA(dev);
|
||||
|
||||
- s->bh = qemu_bh_new(DBDMA_run_bh, s);
|
||||
+ s->bh = qemu_bh_new_guarded(DBDMA_run_bh, s, &dev->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static void mac_dbdma_class_init(ObjectClass *oc, void *data)
|
||||
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
|
||||
index 53e1c32643..447f669921 100644
|
||||
--- a/hw/net/virtio-net.c
|
||||
+++ b/hw/net/virtio-net.c
|
||||
@@ -2917,7 +2917,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index)
|
||||
n->vqs[index].tx_vq =
|
||||
virtio_add_queue(vdev, n->net_conf.tx_queue_size,
|
||||
virtio_net_handle_tx_bh);
|
||||
- n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
|
||||
+ n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
|
||||
+ &DEVICE(vdev)->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
n->vqs[index].tx_waiting = 0;
|
||||
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
|
||||
index ac24eeb5ed..e5a468975e 100644
|
||||
--- a/hw/nvme/ctrl.c
|
||||
+++ b/hw/nvme/ctrl.c
|
||||
@@ -4607,7 +4607,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
|
||||
QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry);
|
||||
}
|
||||
|
||||
- sq->bh = qemu_bh_new(nvme_process_sq, sq);
|
||||
+ sq->bh = qemu_bh_new_guarded(nvme_process_sq, sq,
|
||||
+ &DEVICE(sq->ctrl)->mem_reentrancy_guard);
|
||||
|
||||
if (n->dbbuf_enabled) {
|
||||
sq->db_addr = n->dbbuf_dbs + (sqid << 3);
|
||||
@@ -5253,7 +5254,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
|
||||
}
|
||||
}
|
||||
n->cq[cqid] = cq;
|
||||
- cq->bh = qemu_bh_new(nvme_post_cqes, cq);
|
||||
+ cq->bh = qemu_bh_new_guarded(nvme_post_cqes, cq,
|
||||
+ &DEVICE(cq->ctrl)->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
|
||||
diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c
|
||||
index c485da792c..3de288b454 100644
|
||||
--- a/hw/scsi/mptsas.c
|
||||
+++ b/hw/scsi/mptsas.c
|
||||
@@ -1322,7 +1322,8 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp)
|
||||
}
|
||||
s->max_devices = MPTSAS_NUM_PORTS;
|
||||
|
||||
- s->request_bh = qemu_bh_new(mptsas_fetch_requests, s);
|
||||
+ s->request_bh = qemu_bh_new_guarded(mptsas_fetch_requests, s,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
|
||||
scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info);
|
||||
}
|
||||
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
|
||||
index c97176110c..3c20b47ad0 100644
|
||||
--- a/hw/scsi/scsi-bus.c
|
||||
+++ b/hw/scsi/scsi-bus.c
|
||||
@@ -193,7 +193,8 @@ static void scsi_dma_restart_cb(void *opaque, bool running, RunState state)
|
||||
AioContext *ctx = blk_get_aio_context(s->conf.blk);
|
||||
/* The reference is dropped in scsi_dma_restart_bh.*/
|
||||
object_ref(OBJECT(s));
|
||||
- s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s);
|
||||
+ s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s,
|
||||
+ &DEVICE(s)->mem_reentrancy_guard);
|
||||
qemu_bh_schedule(s->bh);
|
||||
}
|
||||
}
|
||||
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
|
||||
index fa76696855..4de34536e9 100644
|
||||
--- a/hw/scsi/vmw_pvscsi.c
|
||||
+++ b/hw/scsi/vmw_pvscsi.c
|
||||
@@ -1184,7 +1184,8 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp)
|
||||
pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET);
|
||||
}
|
||||
|
||||
- s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s);
|
||||
+ s->completion_worker = qemu_bh_new_guarded(pvscsi_process_completion_queue, s,
|
||||
+ &DEVICE(pci_dev)->mem_reentrancy_guard);
|
||||
|
||||
scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info);
|
||||
/* override default SCSI bus hotplug-handler, with pvscsi's one */
|
||||
diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
|
||||
index 88f99c05d5..f013ded91e 100644
|
||||
--- a/hw/usb/dev-uas.c
|
||||
+++ b/hw/usb/dev-uas.c
|
||||
@@ -937,7 +937,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp)
|
||||
|
||||
QTAILQ_INIT(&uas->results);
|
||||
QTAILQ_INIT(&uas->requests);
|
||||
- uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas);
|
||||
+ uas->status_bh = qemu_bh_new_guarded(usb_uas_send_status_bh, uas,
|
||||
+ &d->mem_reentrancy_guard);
|
||||
|
||||
dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE);
|
||||
scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info);
|
||||
diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c
|
||||
index 8755e9cbb0..a0c4e782b2 100644
|
||||
--- a/hw/usb/hcd-dwc2.c
|
||||
+++ b/hw/usb/hcd-dwc2.c
|
||||
@@ -1364,7 +1364,8 @@ static void dwc2_realize(DeviceState *dev, Error **errp)
|
||||
s->fi = USB_FRMINTVL - 1;
|
||||
s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s);
|
||||
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s);
|
||||
- s->async_bh = qemu_bh_new(dwc2_work_bh, s);
|
||||
+ s->async_bh = qemu_bh_new_guarded(dwc2_work_bh, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
|
||||
sysbus_init_irq(sbd, &s->irq);
|
||||
}
|
||||
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
|
||||
index d4da8dcb8d..c930c60921 100644
|
||||
--- a/hw/usb/hcd-ehci.c
|
||||
+++ b/hw/usb/hcd-ehci.c
|
||||
@@ -2533,7 +2533,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp)
|
||||
}
|
||||
|
||||
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s);
|
||||
- s->async_bh = qemu_bh_new(ehci_work_bh, s);
|
||||
+ s->async_bh = qemu_bh_new_guarded(ehci_work_bh, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
s->device = dev;
|
||||
|
||||
s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s);
|
||||
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
|
||||
index 8ac1175ad2..77baaa7a6b 100644
|
||||
--- a/hw/usb/hcd-uhci.c
|
||||
+++ b/hw/usb/hcd-uhci.c
|
||||
@@ -1190,7 +1190,7 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp)
|
||||
USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL);
|
||||
}
|
||||
}
|
||||
- s->bh = qemu_bh_new(uhci_bh, s);
|
||||
+ s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard);
|
||||
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s);
|
||||
s->num_ports_vmstate = NB_PORTS;
|
||||
QTAILQ_INIT(&s->queues);
|
||||
diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c
|
||||
index 176868d345..f500db85ab 100644
|
||||
--- a/hw/usb/host-libusb.c
|
||||
+++ b/hw/usb/host-libusb.c
|
||||
@@ -1141,7 +1141,8 @@ static void usb_host_nodev_bh(void *opaque)
|
||||
static void usb_host_nodev(USBHostDevice *s)
|
||||
{
|
||||
if (!s->bh_nodev) {
|
||||
- s->bh_nodev = qemu_bh_new(usb_host_nodev_bh, s);
|
||||
+ s->bh_nodev = qemu_bh_new_guarded(usb_host_nodev_bh, s,
|
||||
+ &DEVICE(s)->mem_reentrancy_guard);
|
||||
}
|
||||
qemu_bh_schedule(s->bh_nodev);
|
||||
}
|
||||
@@ -1739,7 +1740,8 @@ static int usb_host_post_load(void *opaque, int version_id)
|
||||
USBHostDevice *dev = opaque;
|
||||
|
||||
if (!dev->bh_postld) {
|
||||
- dev->bh_postld = qemu_bh_new(usb_host_post_load_bh, dev);
|
||||
+ dev->bh_postld = qemu_bh_new_guarded(usb_host_post_load_bh, dev,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
}
|
||||
qemu_bh_schedule(dev->bh_postld);
|
||||
dev->bh_postld_pending = true;
|
||||
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
|
||||
index fd7df599bc..39fbaaab16 100644
|
||||
--- a/hw/usb/redirect.c
|
||||
+++ b/hw/usb/redirect.c
|
||||
@@ -1441,8 +1441,10 @@ static void usbredir_realize(USBDevice *udev, Error **errp)
|
||||
}
|
||||
}
|
||||
|
||||
- dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev);
|
||||
- dev->device_reject_bh = qemu_bh_new(usbredir_device_reject_bh, dev);
|
||||
+ dev->chardev_close_bh = qemu_bh_new_guarded(usbredir_chardev_close_bh, dev,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
+ dev->device_reject_bh = qemu_bh_new_guarded(usbredir_device_reject_bh, dev,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
dev->attach_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, usbredir_do_attach, dev);
|
||||
|
||||
packet_id_queue_init(&dev->cancelled, dev, "cancelled");
|
||||
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
|
||||
index 66cb3f7c24..38ee660a30 100644
|
||||
--- a/hw/usb/xen-usb.c
|
||||
+++ b/hw/usb/xen-usb.c
|
||||
@@ -1032,7 +1032,8 @@ static void usbback_alloc(struct XenLegacyDevice *xendev)
|
||||
|
||||
QTAILQ_INIT(&usbif->req_free_q);
|
||||
QSIMPLEQ_INIT(&usbif->hotplug_q);
|
||||
- usbif->bh = qemu_bh_new(usbback_bh, usbif);
|
||||
+ usbif->bh = qemu_bh_new_guarded(usbback_bh, usbif,
|
||||
+ &DEVICE(xendev)->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static int usbback_free(struct XenLegacyDevice *xendev)
|
||||
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
|
||||
index 43092aa634..5186e831dd 100644
|
||||
--- a/hw/virtio/virtio-balloon.c
|
||||
+++ b/hw/virtio/virtio-balloon.c
|
||||
@@ -909,8 +909,9 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
|
||||
precopy_add_notifier(&s->free_page_hint_notify);
|
||||
|
||||
object_ref(OBJECT(s->iothread));
|
||||
- s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
|
||||
- virtio_ballloon_get_free_page_hints, s);
|
||||
+ s->free_page_bh = aio_bh_new_guarded(iothread_get_aio_context(s->iothread),
|
||||
+ virtio_ballloon_get_free_page_hints, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) {
|
||||
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
|
||||
index 802e1b9659..2fe804510f 100644
|
||||
--- a/hw/virtio/virtio-crypto.c
|
||||
+++ b/hw/virtio/virtio-crypto.c
|
||||
@@ -1074,7 +1074,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp)
|
||||
vcrypto->vqs[i].dataq =
|
||||
virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh);
|
||||
vcrypto->vqs[i].dataq_bh =
|
||||
- qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]);
|
||||
+ qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i],
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
vcrypto->vqs[i].vcrypto = vcrypto;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,141 +0,0 @@
|
||||
From 8075a9e05699ef0c4e078017eefc20db3186328f Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Mon, 29 May 2023 14:21:08 -0400
|
||||
Subject: [PATCH 17/21] hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI
|
||||
controller (CVE-2023-0330)
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 165: memory: prevent dma-reentracy issues
|
||||
RH-Jira: RHEL-516
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [13/13] 0b6fa742075ef2db3a354ee672dccca3747051cc (jmaloy/jmaloy-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-516
|
||||
Upstream: Merged
|
||||
CVE: CVE-2023-2680
|
||||
|
||||
commit b987718bbb1d0eabf95499b976212dd5f0120d75
|
||||
Author: Thomas Huth <thuth@redhat.com>
|
||||
Date: Mon May 22 11:10:11 2023 +0200
|
||||
|
||||
hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI controller (CVE-2023-0330)
|
||||
|
||||
We cannot use the generic reentrancy guard in the LSI code, so
|
||||
we have to manually prevent endless reentrancy here. The problematic
|
||||
lsi_execute_script() function has already a way to detect whether
|
||||
too many instructions have been executed - we just have to slightly
|
||||
change the logic here that it also takes into account if the function
|
||||
has been called too often in a reentrant way.
|
||||
|
||||
The code in fuzz-lsi53c895a-test.c has been taken from an earlier
|
||||
patch by Mauro Matteo Cascella.
|
||||
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1563
|
||||
Message-Id: <20230522091011.1082574-1-thuth@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/scsi/lsi53c895a.c | 23 +++++++++++++++------
|
||||
tests/qtest/fuzz-lsi53c895a-test.c | 33 ++++++++++++++++++++++++++++++
|
||||
2 files changed, 50 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
|
||||
index 048436352b..f7d45b0b20 100644
|
||||
--- a/hw/scsi/lsi53c895a.c
|
||||
+++ b/hw/scsi/lsi53c895a.c
|
||||
@@ -1134,15 +1134,24 @@ static void lsi_execute_script(LSIState *s)
|
||||
uint32_t addr, addr_high;
|
||||
int opcode;
|
||||
int insn_processed = 0;
|
||||
+ static int reentrancy_level;
|
||||
+
|
||||
+ reentrancy_level++;
|
||||
|
||||
s->istat1 |= LSI_ISTAT1_SRUN;
|
||||
again:
|
||||
- if (++insn_processed > LSI_MAX_INSN) {
|
||||
- /* Some windows drivers make the device spin waiting for a memory
|
||||
- location to change. If we have been executed a lot of code then
|
||||
- assume this is the case and force an unexpected device disconnect.
|
||||
- This is apparently sufficient to beat the drivers into submission.
|
||||
- */
|
||||
+ /*
|
||||
+ * Some windows drivers make the device spin waiting for a memory location
|
||||
+ * to change. If we have executed more than LSI_MAX_INSN instructions then
|
||||
+ * assume this is the case and force an unexpected device disconnect. This
|
||||
+ * is apparently sufficient to beat the drivers into submission.
|
||||
+ *
|
||||
+ * Another issue (CVE-2023-0330) can occur if the script is programmed to
|
||||
+ * trigger itself again and again. Avoid this problem by stopping after
|
||||
+ * being called multiple times in a reentrant way (8 is an arbitrary value
|
||||
+ * which should be enough for all valid use cases).
|
||||
+ */
|
||||
+ if (++insn_processed > LSI_MAX_INSN || reentrancy_level > 8) {
|
||||
if (!(s->sien0 & LSI_SIST0_UDC)) {
|
||||
qemu_log_mask(LOG_GUEST_ERROR,
|
||||
"lsi_scsi: inf. loop with UDC masked");
|
||||
@@ -1596,6 +1605,8 @@ again:
|
||||
}
|
||||
}
|
||||
trace_lsi_execute_script_stop();
|
||||
+
|
||||
+ reentrancy_level--;
|
||||
}
|
||||
|
||||
static uint8_t lsi_reg_readb(LSIState *s, int offset)
|
||||
diff --git a/tests/qtest/fuzz-lsi53c895a-test.c b/tests/qtest/fuzz-lsi53c895a-test.c
|
||||
index 2012bd54b7..1b55928b9f 100644
|
||||
--- a/tests/qtest/fuzz-lsi53c895a-test.c
|
||||
+++ b/tests/qtest/fuzz-lsi53c895a-test.c
|
||||
@@ -8,6 +8,36 @@
|
||||
#include "qemu/osdep.h"
|
||||
#include "libqtest.h"
|
||||
|
||||
+/*
|
||||
+ * This used to trigger a DMA reentrancy issue
|
||||
+ * leading to memory corruption bugs like stack
|
||||
+ * overflow or use-after-free
|
||||
+ * https://gitlab.com/qemu-project/qemu/-/issues/1563
|
||||
+ */
|
||||
+static void test_lsi_dma_reentrancy(void)
|
||||
+{
|
||||
+ QTestState *s;
|
||||
+
|
||||
+ s = qtest_init("-M q35 -m 512M -nodefaults "
|
||||
+ "-blockdev driver=null-co,node-name=null0 "
|
||||
+ "-device lsi53c810 -device scsi-cd,drive=null0");
|
||||
+
|
||||
+ qtest_outl(s, 0xcf8, 0x80000804); /* PCI Command Register */
|
||||
+ qtest_outw(s, 0xcfc, 0x7); /* Enables accesses */
|
||||
+ qtest_outl(s, 0xcf8, 0x80000814); /* Memory Bar 1 */
|
||||
+ qtest_outl(s, 0xcfc, 0xff100000); /* Set MMIO Address*/
|
||||
+ qtest_outl(s, 0xcf8, 0x80000818); /* Memory Bar 2 */
|
||||
+ qtest_outl(s, 0xcfc, 0xff000000); /* Set RAM Address*/
|
||||
+ qtest_writel(s, 0xff000000, 0xc0000024);
|
||||
+ qtest_writel(s, 0xff000114, 0x00000080);
|
||||
+ qtest_writel(s, 0xff00012c, 0xff000000);
|
||||
+ qtest_writel(s, 0xff000004, 0xff000114);
|
||||
+ qtest_writel(s, 0xff000008, 0xff100014);
|
||||
+ qtest_writel(s, 0xff10002f, 0x000000ff);
|
||||
+
|
||||
+ qtest_quit(s);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* This used to trigger a UAF in lsi_do_msgout()
|
||||
* https://gitlab.com/qemu-project/qemu/-/issues/972
|
||||
@@ -124,5 +154,8 @@ int main(int argc, char **argv)
|
||||
qtest_add_func("fuzz/lsi53c895a/lsi_do_msgout_cancel_req",
|
||||
test_lsi_do_msgout_cancel_req);
|
||||
|
||||
+ qtest_add_func("fuzz/lsi53c895a/lsi_dma_reentrancy",
|
||||
+ test_lsi_dma_reentrancy);
|
||||
+
|
||||
return g_test_run();
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,76 +0,0 @@
|
||||
From fcd6219a95851d17fd8bde69d87e78c6533be990 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Wed, 12 Jul 2023 17:46:57 +0200
|
||||
Subject: [PATCH 24/37] hw/vfio/pci-quirks: Sanitize capability pointer
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 179: vfio: live migration support
|
||||
RH-Bugzilla: 2192818
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [22/28] cb080409c1912f4365f8e31cd23c914b48f91575 (clegoate/qemu-kvm-c9s)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/2192818
|
||||
|
||||
commit 0ddcb39c9357
|
||||
Author: Alex Williamson <alex.williamson@redhat.com>
|
||||
Date: Fri Jun 30 16:36:08 2023 -0600
|
||||
|
||||
hw/vfio/pci-quirks: Sanitize capability pointer
|
||||
|
||||
Coverity reports a tained scalar when traversing the capabilities
|
||||
chain (CID 1516589). In practice I've never seen a device with a
|
||||
chain so broken as to cause an issue, but it's also pretty easy to
|
||||
sanitize.
|
||||
|
||||
Fixes: f6b30c1984f7 ("hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques")
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
hw/vfio/pci-quirks.c | 10 ++++++++--
|
||||
1 file changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
|
||||
index 0ed2fcd531..f4ff836805 100644
|
||||
--- a/hw/vfio/pci-quirks.c
|
||||
+++ b/hw/vfio/pci-quirks.c
|
||||
@@ -1530,6 +1530,12 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = {
|
||||
.set = set_nv_gpudirect_clique_id,
|
||||
};
|
||||
|
||||
+static bool is_valid_std_cap_offset(uint8_t pos)
|
||||
+{
|
||||
+ return (pos >= PCI_STD_HEADER_SIZEOF &&
|
||||
+ pos <= (PCI_CFG_SPACE_SIZE - PCI_CAP_SIZEOF));
|
||||
+}
|
||||
+
|
||||
static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
{
|
||||
PCIDevice *pdev = &vdev->pdev;
|
||||
@@ -1563,7 +1569,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
*/
|
||||
ret = pread(vdev->vbasedev.fd, &tmp, 1,
|
||||
vdev->config_offset + PCI_CAPABILITY_LIST);
|
||||
- if (ret != 1 || !tmp) {
|
||||
+ if (ret != 1 || !is_valid_std_cap_offset(tmp)) {
|
||||
error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list");
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -1575,7 +1581,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
d4_conflict = true;
|
||||
}
|
||||
tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT];
|
||||
- } while (tmp);
|
||||
+ } while (is_valid_std_cap_offset(tmp));
|
||||
|
||||
if (!c8_conflict) {
|
||||
pos = 0xC8;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,110 +0,0 @@
|
||||
From dd38230a0a375fb8427fa106ff79562e56c51b6c Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Wed, 12 Jul 2023 17:46:57 +0200
|
||||
Subject: [PATCH 18/37] hw/vfio/pci-quirks: Support alternate offset for
|
||||
GPUDirect Cliques
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 179: vfio: live migration support
|
||||
RH-Bugzilla: 2192818
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [16/28] 9befb7c9adaeb58e9d0b49686cf54b751c742832 (clegoate/qemu-kvm-c9s)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/2192818
|
||||
|
||||
commit f6b30c1984f7
|
||||
Author: Alex Williamson <alex.williamson@redhat.com>
|
||||
Date: Thu Jun 8 12:05:07 2023 -0600
|
||||
|
||||
hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques
|
||||
|
||||
NVIDIA Turing and newer GPUs implement the MSI-X capability at the offset
|
||||
previously reserved for use by hypervisors to implement the GPUDirect
|
||||
Cliques capability. A revised specification provides an alternate
|
||||
location. Add a config space walk to the quirk to check for conflicts,
|
||||
allowing us to fall back to the new location or generate an error at the
|
||||
quirk setup rather than when the real conflicting capability is added
|
||||
should there be no available location.
|
||||
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
hw/vfio/pci-quirks.c | 41 ++++++++++++++++++++++++++++++++++++++++-
|
||||
1 file changed, 40 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
|
||||
index f0147a050a..0ed2fcd531 100644
|
||||
--- a/hw/vfio/pci-quirks.c
|
||||
+++ b/hw/vfio/pci-quirks.c
|
||||
@@ -1490,6 +1490,9 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev)
|
||||
* +---------------------------------+---------------------------------+
|
||||
*
|
||||
* https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf
|
||||
+ *
|
||||
+ * Specification for Turning and later GPU architectures:
|
||||
+ * https://lists.gnu.org/archive/html/qemu-devel/2023-06/pdf142OR4O4c2.pdf
|
||||
*/
|
||||
static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v,
|
||||
const char *name, void *opaque,
|
||||
@@ -1530,7 +1533,9 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = {
|
||||
static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
{
|
||||
PCIDevice *pdev = &vdev->pdev;
|
||||
- int ret, pos = 0xC8;
|
||||
+ int ret, pos;
|
||||
+ bool c8_conflict = false, d4_conflict = false;
|
||||
+ uint8_t tmp;
|
||||
|
||||
if (vdev->nv_gpudirect_clique == 0xFF) {
|
||||
return 0;
|
||||
@@ -1547,6 +1552,40 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * Per the updated specification above, it's recommended to use offset
|
||||
+ * D4h for Turing and later GPU architectures due to a conflict of the
|
||||
+ * MSI-X capability at C8h. We don't know how to determine the GPU
|
||||
+ * architecture, instead we walk the capability chain to mark conflicts
|
||||
+ * and choose one or error based on the result.
|
||||
+ *
|
||||
+ * NB. Cap list head in pdev->config is already cleared, read from device.
|
||||
+ */
|
||||
+ ret = pread(vdev->vbasedev.fd, &tmp, 1,
|
||||
+ vdev->config_offset + PCI_CAPABILITY_LIST);
|
||||
+ if (ret != 1 || !tmp) {
|
||||
+ error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ do {
|
||||
+ if (tmp == 0xC8) {
|
||||
+ c8_conflict = true;
|
||||
+ } else if (tmp == 0xD4) {
|
||||
+ d4_conflict = true;
|
||||
+ }
|
||||
+ tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT];
|
||||
+ } while (tmp);
|
||||
+
|
||||
+ if (!c8_conflict) {
|
||||
+ pos = 0xC8;
|
||||
+ } else if (!d4_conflict) {
|
||||
+ pos = 0xD4;
|
||||
+ } else {
|
||||
+ error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid config space");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp);
|
||||
if (ret < 0) {
|
||||
error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: ");
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,108 @@
|
||||
From c554f8768a18ceba173aedbd582c1cae43a41e2c Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Tue, 18 Jun 2024 14:19:58 +0200
|
||||
Subject: [PATCH 1/2] hw/virtio: Fix the de-initialization of vhost-user
|
||||
devices
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Thomas Huth <thuth@redhat.com>
|
||||
RH-MergeRequest: 255: hw/virtio: Fix the de-initialization of vhost-user devices
|
||||
RH-Jira: RHEL-40708
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/1] c7815a249ec135993f45934cab1c1f2c038b80ea (thuth/qemu-kvm-cs9)
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-40708
|
||||
|
||||
The unrealize functions of the various vhost-user devices are
|
||||
calling the corresponding vhost_*_set_status() functions with a
|
||||
status of 0 to shut down the device correctly.
|
||||
|
||||
Now these vhost_*_set_status() functions all follow this scheme:
|
||||
|
||||
bool should_start = virtio_device_should_start(vdev, status);
|
||||
|
||||
if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (should_start) {
|
||||
/* ... do the initialization stuff ... */
|
||||
} else {
|
||||
/* ... do the cleanup stuff ... */
|
||||
}
|
||||
|
||||
The problem here is virtio_device_should_start(vdev, 0) currently
|
||||
always returns "true" since it internally only looks at vdev->started
|
||||
instead of looking at the "status" parameter. Thus once the device
|
||||
got started once, virtio_device_should_start() always returns true
|
||||
and thus the vhost_*_set_status() functions return early, without
|
||||
ever doing any clean-up when being called with status == 0. This
|
||||
causes e.g. problems when trying to hot-plug and hot-unplug a vhost
|
||||
user devices multiple times since the de-initialization step is
|
||||
completely skipped during the unplug operation.
|
||||
|
||||
This bug has been introduced in commit 9f6bcfd99f ("hw/virtio: move
|
||||
vm_running check to virtio_device_started") which replaced
|
||||
|
||||
should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
|
||||
|
||||
with
|
||||
|
||||
should_start = virtio_device_started(vdev, status);
|
||||
|
||||
which later got replaced by virtio_device_should_start(). This blocked
|
||||
the possibility to set should_start to false in case the status flag
|
||||
VIRTIO_CONFIG_S_DRIVER_OK was not set.
|
||||
|
||||
Fix it by adjusting the virtio_device_should_start() function to
|
||||
only consider the status flag instead of vdev->started. Since this
|
||||
function is only used in the various vhost_*_set_status() functions
|
||||
for exactly the same purpose, it should be fine to fix it in this
|
||||
central place there without any risk to change the behavior of other
|
||||
code.
|
||||
|
||||
Fixes: 9f6bcfd99f ("hw/virtio: move vm_running check to virtio_device_started")
|
||||
Buglink: https://issues.redhat.com/browse/RHEL-40708
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
Message-Id: <20240618121958.88673-1-thuth@redhat.com>
|
||||
Reviewed-by: Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit d72479b11797c28893e1e3fc565497a9cae5ca16)
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
include/hw/virtio/virtio.h | 8 ++++----
|
||||
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
|
||||
index 7d5ffdc145..2eafad17b8 100644
|
||||
--- a/include/hw/virtio/virtio.h
|
||||
+++ b/include/hw/virtio/virtio.h
|
||||
@@ -470,9 +470,9 @@ static inline bool virtio_device_started(VirtIODevice *vdev, uint8_t status)
|
||||
* @vdev - the VirtIO device
|
||||
* @status - the devices status bits
|
||||
*
|
||||
- * This is similar to virtio_device_started() but also encapsulates a
|
||||
- * check on the VM status which would prevent a device starting
|
||||
- * anyway.
|
||||
+ * This is similar to virtio_device_started() but ignores vdev->started
|
||||
+ * and also encapsulates a check on the VM status which would prevent a
|
||||
+ * device from starting anyway.
|
||||
*/
|
||||
static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status)
|
||||
{
|
||||
@@ -480,7 +480,7 @@ static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status
|
||||
return false;
|
||||
}
|
||||
|
||||
- return virtio_device_started(vdev, status);
|
||||
+ return status & VIRTIO_CONFIG_S_DRIVER_OK;
|
||||
}
|
||||
|
||||
static inline void virtio_set_started(VirtIODevice *vdev, bool started)
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,62 +0,0 @@
|
||||
From 0a731ac1191182546e80af5f39d178a5a2f3688f Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Mon, 17 Jul 2023 18:21:26 +0200
|
||||
Subject: [PATCH 07/14] hw/virtio-iommu: Fix potential OOB access in
|
||||
virtio_iommu_handle_command()
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes
|
||||
RH-Bugzilla: 2229133
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
RH-Commit: [1/3] ecdb1e1aa6b93761dc87ea79bc0a1093ad649a74 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133
|
||||
|
||||
In the virtio_iommu_handle_command() when a PROBE request is handled,
|
||||
output_size takes a value greater than the tail size and on a subsequent
|
||||
iteration we can get a stack out-of-band access. Initialize the
|
||||
output_size on each iteration.
|
||||
|
||||
The issue was found with ASAN. Credits to:
|
||||
Yiming Tao(Zhejiang University)
|
||||
Gaoning Pan(Zhejiang University)
|
||||
|
||||
Fixes: 1733eebb9e7 ("virtio-iommu: Implement RESV_MEM probe request")
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reported-by: Mauro Matteo Cascella <mcascell@redhat.com>
|
||||
Cc: qemu-stable@nongnu.org
|
||||
|
||||
Message-Id: <20230717162126.11693-1-eric.auger@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit cf2f89edf36a59183166ae8721a8d7ab5cd286bd)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/virtio/virtio-iommu.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
|
||||
index 421e2a944f..17ce630200 100644
|
||||
--- a/hw/virtio/virtio-iommu.c
|
||||
+++ b/hw/virtio/virtio-iommu.c
|
||||
@@ -728,13 +728,15 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
|
||||
VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
|
||||
struct virtio_iommu_req_head head;
|
||||
struct virtio_iommu_req_tail tail = {};
|
||||
- size_t output_size = sizeof(tail), sz;
|
||||
VirtQueueElement *elem;
|
||||
unsigned int iov_cnt;
|
||||
struct iovec *iov;
|
||||
void *buf = NULL;
|
||||
+ size_t sz;
|
||||
|
||||
for (;;) {
|
||||
+ size_t output_size = sizeof(tail);
|
||||
+
|
||||
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
|
||||
if (!elem) {
|
||||
return;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,68 @@
|
||||
From f572a40924c7138072e387111d0f092185972477 Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Thu, 9 May 2024 19:00:39 +0200
|
||||
Subject: [PATCH 044/100] i386: correctly select code in hw/i386 that depends
|
||||
on other components
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [44/91] 1327a5eb2b91edacf56cc4e93255cad456abbbeb (bonzini/rhel-qemu-kvm)
|
||||
|
||||
fw_cfg.c and vapic.c are currently included unconditionally but
|
||||
depend on other components. vapic.c depends on the local APIC,
|
||||
while fw_cfg.c includes a piece of AML builder code that depends
|
||||
on CONFIG_ACPI.
|
||||
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
|
||||
Message-ID: <20240509170044.190795-9-pbonzini@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 7974e51342775c87f6e759a8c525db1045ddfa24)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/fw_cfg.c | 2 ++
|
||||
hw/i386/meson.build | 2 +-
|
||||
2 files changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c
|
||||
index 283c3f4c16..7f97d40616 100644
|
||||
--- a/hw/i386/fw_cfg.c
|
||||
+++ b/hw/i386/fw_cfg.c
|
||||
@@ -204,6 +204,7 @@ void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg)
|
||||
fw_cfg_add_file(fw_cfg, "etc/msr_feature_control", val, sizeof(*val));
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_ACPI
|
||||
void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg)
|
||||
{
|
||||
/*
|
||||
@@ -230,3 +231,4 @@ void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg)
|
||||
aml_append(dev, aml_name_decl("_CRS", crs));
|
||||
aml_append(scope, dev);
|
||||
}
|
||||
+#endif
|
||||
diff --git a/hw/i386/meson.build b/hw/i386/meson.build
|
||||
index d8b70ef3e9..d9da676038 100644
|
||||
--- a/hw/i386/meson.build
|
||||
+++ b/hw/i386/meson.build
|
||||
@@ -1,12 +1,12 @@
|
||||
i386_ss = ss.source_set()
|
||||
i386_ss.add(files(
|
||||
'fw_cfg.c',
|
||||
- 'vapic.c',
|
||||
'e820_memory_layout.c',
|
||||
'multiboot.c',
|
||||
'x86.c',
|
||||
))
|
||||
|
||||
+i386_ss.add(when: 'CONFIG_APIC', if_true: files('vapic.c'))
|
||||
i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'),
|
||||
if_false: files('x86-iommu-stub.c'))
|
||||
i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'),
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,40 @@
|
||||
From 127f3c60668e1bd08ec00856a317cb841adf0440 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:23 -0500
|
||||
Subject: [PATCH 063/100] i386/cpu: Set SEV-SNP CPUID bit when SNP enabled
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [63/91] 0f834a6897c5cdc0e29a5b1862e621f8ce309657 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
SNP guests will rely on this bit to determine certain feature support.
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-12-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 7831221941cccbde922412c1550ed8b4bce7c361)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/cpu.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
|
||||
index 489c853b42..13737cd703 100644
|
||||
--- a/target/i386/cpu.c
|
||||
+++ b/target/i386/cpu.c
|
||||
@@ -6822,6 +6822,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
|
||||
if (sev_enabled()) {
|
||||
*eax = 0x2;
|
||||
*eax |= sev_es_enabled() ? 0x8 : 0;
|
||||
+ *eax |= sev_snp_enabled() ? 0x10 : 0;
|
||||
*ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */
|
||||
*ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,52 +0,0 @@
|
||||
From f9d982fae156aa9db0506e1e098c1e8a7f7eec94 Mon Sep 17 00:00:00 2001
|
||||
From: Bandan Das <bsd@redhat.com>
|
||||
Date: Thu, 3 Aug 2023 14:29:15 -0400
|
||||
Subject: [PATCH 13/14] i386/cpu: Update how the EBX register of CPUID
|
||||
0x8000001F is set
|
||||
|
||||
RH-Author: Bandan Das <None>
|
||||
RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter
|
||||
RH-Bugzilla: 2214839
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [4/4] efc368b2c844fd4fbc3c755a5e2da288329e7a2c (bdas1/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839
|
||||
|
||||
commit fb6bbafc0f19385fb257ee073ed13dcaf613f2f8
|
||||
Author: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Date: Fri Sep 30 10:14:30 2022 -0500
|
||||
|
||||
i386/cpu: Update how the EBX register of CPUID 0x8000001F is set
|
||||
|
||||
Update the setting of CPUID 0x8000001F EBX to clearly document the ranges
|
||||
associated with fields being set.
|
||||
|
||||
Fixes: 6cb8f2a663 ("cpu/i386: populate CPUID 0x8000_001F when SEV is active")
|
||||
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Message-Id: <5822fd7d02b575121380e1f493a8f6d9eba2b11a.1664550870.git.thomas.lendacky@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Bandan Das <bsd@redhat.com>
|
||||
---
|
||||
target/i386/cpu.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
|
||||
index 839706b430..4ac3046313 100644
|
||||
--- a/target/i386/cpu.c
|
||||
+++ b/target/i386/cpu.c
|
||||
@@ -6008,8 +6008,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
|
||||
if (sev_enabled()) {
|
||||
*eax = 0x2;
|
||||
*eax |= sev_es_enabled() ? 0x8 : 0;
|
||||
- *ebx = sev_get_cbit_position();
|
||||
- *ebx |= sev_get_reduced_phys_bits() << 6;
|
||||
+ *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */
|
||||
+ *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */
|
||||
}
|
||||
break;
|
||||
default:
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,145 @@
|
||||
From 14aa42bbacde75b2ce9a59d1267f73d613026461 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:42 -0500
|
||||
Subject: [PATCH 076/100] i386/kvm: Add KVM_EXIT_HYPERCALL handling for
|
||||
KVM_HC_MAP_GPA_RANGE
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [76/91] 3e1201c330dc826af1ec4650974d47053270eb16 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
KVM_HC_MAP_GPA_RANGE will be used to send requests to userspace for
|
||||
private/shared memory attribute updates requested by the guest.
|
||||
Implement handling for that use-case along with some basic
|
||||
infrastructure for enabling specific hypercall events.
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-31-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 47e76d03b155e43beca550251a6eb7ea926c059f)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/kvm/kvm.c | 55 ++++++++++++++++++++++++++++++++++++
|
||||
target/i386/kvm/kvm_i386.h | 1 +
|
||||
target/i386/kvm/trace-events | 1 +
|
||||
3 files changed, 57 insertions(+)
|
||||
|
||||
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
|
||||
index 75e75d9772..2935e3931a 100644
|
||||
--- a/target/i386/kvm/kvm.c
|
||||
+++ b/target/i386/kvm/kvm.c
|
||||
@@ -21,6 +21,7 @@
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include <linux/kvm.h>
|
||||
+#include <linux/kvm_para.h>
|
||||
#include "standard-headers/asm-x86/kvm_para.h"
|
||||
#include "hw/xen/interface/arch-x86/cpuid.h"
|
||||
|
||||
@@ -208,6 +209,13 @@ int kvm_get_vm_type(MachineState *ms)
|
||||
return kvm_type;
|
||||
}
|
||||
|
||||
+bool kvm_enable_hypercall(uint64_t enable_mask)
|
||||
+{
|
||||
+ KVMState *s = KVM_STATE(current_accel());
|
||||
+
|
||||
+ return !kvm_vm_enable_cap(s, KVM_CAP_EXIT_HYPERCALL, 0, enable_mask);
|
||||
+}
|
||||
+
|
||||
bool kvm_has_smm(void)
|
||||
{
|
||||
return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM);
|
||||
@@ -5325,6 +5333,50 @@ static bool host_supports_vmx(void)
|
||||
return ecx & CPUID_EXT_VMX;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Currently the handling here only supports use of KVM_HC_MAP_GPA_RANGE
|
||||
+ * to service guest-initiated memory attribute update requests so that
|
||||
+ * KVM_SET_MEMORY_ATTRIBUTES can update whether or not a page should be
|
||||
+ * backed by the private memory pool provided by guest_memfd, and as such
|
||||
+ * is only applicable to guest_memfd-backed guests (e.g. SNP/TDX).
|
||||
+ *
|
||||
+ * Other other use-cases for KVM_HC_MAP_GPA_RANGE, such as for SEV live
|
||||
+ * migration, are not implemented here currently.
|
||||
+ *
|
||||
+ * For the guest_memfd use-case, these exits will generally be synthesized
|
||||
+ * by KVM based on platform-specific hypercalls, like GHCB requests in the
|
||||
+ * case of SEV-SNP, and not issued directly within the guest though the
|
||||
+ * KVM_HC_MAP_GPA_RANGE hypercall. So in this case, KVM_HC_MAP_GPA_RANGE is
|
||||
+ * not actually advertised to guests via the KVM CPUID feature bit, as
|
||||
+ * opposed to SEV live migration where it would be. Since it is unlikely the
|
||||
+ * SEV live migration use-case would be useful for guest-memfd backed guests,
|
||||
+ * because private/shared page tracking is already provided through other
|
||||
+ * means, these 2 use-cases should be treated as being mutually-exclusive.
|
||||
+ */
|
||||
+static int kvm_handle_hc_map_gpa_range(struct kvm_run *run)
|
||||
+{
|
||||
+ uint64_t gpa, size, attributes;
|
||||
+
|
||||
+ if (!machine_require_guest_memfd(current_machine))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ gpa = run->hypercall.args[0];
|
||||
+ size = run->hypercall.args[1] * TARGET_PAGE_SIZE;
|
||||
+ attributes = run->hypercall.args[2];
|
||||
+
|
||||
+ trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags);
|
||||
+
|
||||
+ return kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED);
|
||||
+}
|
||||
+
|
||||
+static int kvm_handle_hypercall(struct kvm_run *run)
|
||||
+{
|
||||
+ if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE)
|
||||
+ return kvm_handle_hc_map_gpa_range(run);
|
||||
+
|
||||
+ return -EINVAL;
|
||||
+}
|
||||
+
|
||||
#define VMX_INVALID_GUEST_STATE 0x80000021
|
||||
|
||||
int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
|
||||
@@ -5420,6 +5472,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
|
||||
ret = kvm_xen_handle_exit(cpu, &run->xen);
|
||||
break;
|
||||
#endif
|
||||
+ case KVM_EXIT_HYPERCALL:
|
||||
+ ret = kvm_handle_hypercall(run);
|
||||
+ break;
|
||||
default:
|
||||
fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
|
||||
ret = -1;
|
||||
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
|
||||
index 6b44844d95..34fc60774b 100644
|
||||
--- a/target/i386/kvm/kvm_i386.h
|
||||
+++ b/target/i386/kvm/kvm_i386.h
|
||||
@@ -33,6 +33,7 @@
|
||||
bool kvm_has_smm(void);
|
||||
bool kvm_enable_x2apic(void);
|
||||
bool kvm_hv_vpindex_settable(void);
|
||||
+bool kvm_enable_hypercall(uint64_t enable_mask);
|
||||
|
||||
bool kvm_enable_sgx_provisioning(KVMState *s);
|
||||
bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
|
||||
diff --git a/target/i386/kvm/trace-events b/target/i386/kvm/trace-events
|
||||
index b365a8e8e2..74a6234ff7 100644
|
||||
--- a/target/i386/kvm/trace-events
|
||||
+++ b/target/i386/kvm/trace-events
|
||||
@@ -5,6 +5,7 @@ kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %"
|
||||
kvm_x86_add_msi_route(int virq) "Adding route entry for virq %d"
|
||||
kvm_x86_remove_msi_route(int virq) "Removing route entry for virq %d"
|
||||
kvm_x86_update_msi_routes(int num) "Updated %d MSI routes"
|
||||
+kvm_hc_map_gpa_range(uint64_t gpa, uint64_t size, uint64_t attributes, uint64_t flags) "gpa 0x%" PRIx64 " size 0x%" PRIx64 " attributes 0x%" PRIx64 " flags 0x%" PRIx64
|
||||
|
||||
# xen-emu.c
|
||||
kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIx64
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,536 @@
|
||||
From 5ead79f45e8e90b7a04586c89e70cb9d0b66b730 Mon Sep 17 00:00:00 2001
|
||||
From: Sean Christopherson <sean.j.christopherson@intel.com>
|
||||
Date: Thu, 29 Feb 2024 01:36:43 -0500
|
||||
Subject: [PATCH 004/100] i386/kvm: Move architectural CPUID leaf generation to
|
||||
separate helper
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [4/91] 06ecdbcf05ad3d658273980b114f02477d0b0475 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Move the architectural (for lack of a better term) CPUID leaf generation
|
||||
to a separate helper so that the generation code can be reused by TDX,
|
||||
which needs to generate a canonical VM-scoped configuration.
|
||||
|
||||
For now this is just a cleanup, so keep the function static.
|
||||
|
||||
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
|
||||
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Message-ID: <20240229063726.610065-23-xiaoyao.li@intel.com>
|
||||
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit a5acf4f26c208a05d05ef1bde65553ce2ab5e5d0)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/kvm/kvm.c | 417 +++++++++++++++++++++---------------------
|
||||
1 file changed, 211 insertions(+), 206 deletions(-)
|
||||
|
||||
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
|
||||
index 739f33db47..5f30b649a0 100644
|
||||
--- a/target/i386/kvm/kvm.c
|
||||
+++ b/target/i386/kvm/kvm.c
|
||||
@@ -1706,195 +1706,22 @@ static void kvm_init_nested_state(CPUX86State *env)
|
||||
}
|
||||
}
|
||||
|
||||
-int kvm_arch_init_vcpu(CPUState *cs)
|
||||
+static uint32_t kvm_x86_build_cpuid(CPUX86State *env,
|
||||
+ struct kvm_cpuid_entry2 *entries,
|
||||
+ uint32_t cpuid_i)
|
||||
{
|
||||
- struct {
|
||||
- struct kvm_cpuid2 cpuid;
|
||||
- struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
|
||||
- } cpuid_data;
|
||||
- /*
|
||||
- * The kernel defines these structs with padding fields so there
|
||||
- * should be no extra padding in our cpuid_data struct.
|
||||
- */
|
||||
- QEMU_BUILD_BUG_ON(sizeof(cpuid_data) !=
|
||||
- sizeof(struct kvm_cpuid2) +
|
||||
- sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
|
||||
-
|
||||
- X86CPU *cpu = X86_CPU(cs);
|
||||
- CPUX86State *env = &cpu->env;
|
||||
- uint32_t limit, i, j, cpuid_i;
|
||||
+ uint32_t limit, i, j;
|
||||
uint32_t unused;
|
||||
struct kvm_cpuid_entry2 *c;
|
||||
- uint32_t signature[3];
|
||||
- int kvm_base = KVM_CPUID_SIGNATURE;
|
||||
- int max_nested_state_len;
|
||||
- int r;
|
||||
- Error *local_err = NULL;
|
||||
-
|
||||
- memset(&cpuid_data, 0, sizeof(cpuid_data));
|
||||
-
|
||||
- cpuid_i = 0;
|
||||
-
|
||||
- has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2);
|
||||
-
|
||||
- r = kvm_arch_set_tsc_khz(cs);
|
||||
- if (r < 0) {
|
||||
- return r;
|
||||
- }
|
||||
-
|
||||
- /* vcpu's TSC frequency is either specified by user, or following
|
||||
- * the value used by KVM if the former is not present. In the
|
||||
- * latter case, we query it from KVM and record in env->tsc_khz,
|
||||
- * so that vcpu's TSC frequency can be migrated later via this field.
|
||||
- */
|
||||
- if (!env->tsc_khz) {
|
||||
- r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ?
|
||||
- kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) :
|
||||
- -ENOTSUP;
|
||||
- if (r > 0) {
|
||||
- env->tsc_khz = r;
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY;
|
||||
-
|
||||
- /*
|
||||
- * kvm_hyperv_expand_features() is called here for the second time in case
|
||||
- * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle
|
||||
- * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to
|
||||
- * check which Hyper-V enlightenments are supported and which are not, we
|
||||
- * can still proceed and check/expand Hyper-V enlightenments here so legacy
|
||||
- * behavior is preserved.
|
||||
- */
|
||||
- if (!kvm_hyperv_expand_features(cpu, &local_err)) {
|
||||
- error_report_err(local_err);
|
||||
- return -ENOSYS;
|
||||
- }
|
||||
-
|
||||
- if (hyperv_enabled(cpu)) {
|
||||
- r = hyperv_init_vcpu(cpu);
|
||||
- if (r) {
|
||||
- return r;
|
||||
- }
|
||||
-
|
||||
- cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries);
|
||||
- kvm_base = KVM_CPUID_SIGNATURE_NEXT;
|
||||
- has_msr_hv_hypercall = true;
|
||||
- }
|
||||
-
|
||||
- if (cs->kvm_state->xen_version) {
|
||||
-#ifdef CONFIG_XEN_EMU
|
||||
- struct kvm_cpuid_entry2 *xen_max_leaf;
|
||||
-
|
||||
- memcpy(signature, "XenVMMXenVMM", 12);
|
||||
-
|
||||
- xen_max_leaf = c = &cpuid_data.entries[cpuid_i++];
|
||||
- c->function = kvm_base + XEN_CPUID_SIGNATURE;
|
||||
- c->eax = kvm_base + XEN_CPUID_TIME;
|
||||
- c->ebx = signature[0];
|
||||
- c->ecx = signature[1];
|
||||
- c->edx = signature[2];
|
||||
-
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
- c->function = kvm_base + XEN_CPUID_VENDOR;
|
||||
- c->eax = cs->kvm_state->xen_version;
|
||||
- c->ebx = 0;
|
||||
- c->ecx = 0;
|
||||
- c->edx = 0;
|
||||
-
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
- c->function = kvm_base + XEN_CPUID_HVM_MSR;
|
||||
- /* Number of hypercall-transfer pages */
|
||||
- c->eax = 1;
|
||||
- /* Hypercall MSR base address */
|
||||
- if (hyperv_enabled(cpu)) {
|
||||
- c->ebx = XEN_HYPERCALL_MSR_HYPERV;
|
||||
- kvm_xen_init(cs->kvm_state, c->ebx);
|
||||
- } else {
|
||||
- c->ebx = XEN_HYPERCALL_MSR;
|
||||
- }
|
||||
- c->ecx = 0;
|
||||
- c->edx = 0;
|
||||
-
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
- c->function = kvm_base + XEN_CPUID_TIME;
|
||||
- c->eax = ((!!tsc_is_stable_and_known(env) << 1) |
|
||||
- (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2));
|
||||
- /* default=0 (emulate if necessary) */
|
||||
- c->ebx = 0;
|
||||
- /* guest tsc frequency */
|
||||
- c->ecx = env->user_tsc_khz;
|
||||
- /* guest tsc incarnation (migration count) */
|
||||
- c->edx = 0;
|
||||
-
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
- c->function = kvm_base + XEN_CPUID_HVM;
|
||||
- xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM;
|
||||
- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) {
|
||||
- c->function = kvm_base + XEN_CPUID_HVM;
|
||||
-
|
||||
- if (cpu->xen_vapic) {
|
||||
- c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT;
|
||||
- c->eax |= XEN_HVM_CPUID_X2APIC_VIRT;
|
||||
- }
|
||||
-
|
||||
- c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS;
|
||||
-
|
||||
- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) {
|
||||
- c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT;
|
||||
- c->ebx = cs->cpu_index;
|
||||
- }
|
||||
-
|
||||
- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) {
|
||||
- c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR;
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- r = kvm_xen_init_vcpu(cs);
|
||||
- if (r) {
|
||||
- return r;
|
||||
- }
|
||||
-
|
||||
- kvm_base += 0x100;
|
||||
-#else /* CONFIG_XEN_EMU */
|
||||
- /* This should never happen as kvm_arch_init() would have died first. */
|
||||
- fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n");
|
||||
- abort();
|
||||
-#endif
|
||||
- } else if (cpu->expose_kvm) {
|
||||
- memcpy(signature, "KVMKVMKVM\0\0\0", 12);
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
- c->function = KVM_CPUID_SIGNATURE | kvm_base;
|
||||
- c->eax = KVM_CPUID_FEATURES | kvm_base;
|
||||
- c->ebx = signature[0];
|
||||
- c->ecx = signature[1];
|
||||
- c->edx = signature[2];
|
||||
-
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
- c->function = KVM_CPUID_FEATURES | kvm_base;
|
||||
- c->eax = env->features[FEAT_KVM];
|
||||
- c->edx = env->features[FEAT_KVM_HINTS];
|
||||
- }
|
||||
|
||||
cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
|
||||
|
||||
- if (cpu->kvm_pv_enforce_cpuid) {
|
||||
- r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1);
|
||||
- if (r < 0) {
|
||||
- fprintf(stderr,
|
||||
- "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s",
|
||||
- strerror(-r));
|
||||
- abort();
|
||||
- }
|
||||
- }
|
||||
-
|
||||
for (i = 0; i <= limit; i++) {
|
||||
+ j = 0;
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
- fprintf(stderr, "unsupported level value: 0x%x\n", limit);
|
||||
- abort();
|
||||
+ goto full;
|
||||
}
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
-
|
||||
+ c = &entries[cpuid_i++];
|
||||
switch (i) {
|
||||
case 2: {
|
||||
/* Keep reading function 2 till all the input is received */
|
||||
@@ -1908,11 +1735,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
|
||||
for (j = 1; j < times; ++j) {
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
- fprintf(stderr, "cpuid_data is full, no space for "
|
||||
- "cpuid(eax:2):eax & 0xf = 0x%x\n", times);
|
||||
- abort();
|
||||
+ goto full;
|
||||
}
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c = &entries[cpuid_i++];
|
||||
c->function = i;
|
||||
c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
|
||||
cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
@@ -1951,11 +1776,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
continue;
|
||||
}
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
- fprintf(stderr, "cpuid_data is full, no space for "
|
||||
- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
|
||||
- abort();
|
||||
+ goto full;
|
||||
}
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c = &entries[cpuid_i++];
|
||||
}
|
||||
break;
|
||||
case 0x12:
|
||||
@@ -1970,11 +1793,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
}
|
||||
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
- fprintf(stderr, "cpuid_data is full, no space for "
|
||||
- "cpuid(eax:0x12,ecx:0x%x)\n", j);
|
||||
- abort();
|
||||
+ goto full;
|
||||
}
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c = &entries[cpuid_i++];
|
||||
}
|
||||
break;
|
||||
case 0x7:
|
||||
@@ -1991,11 +1812,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
|
||||
for (j = 1; j <= times; ++j) {
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
- fprintf(stderr, "cpuid_data is full, no space for "
|
||||
- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
|
||||
- abort();
|
||||
+ goto full;
|
||||
}
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c = &entries[cpuid_i++];
|
||||
c->function = i;
|
||||
c->index = j;
|
||||
c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
@@ -2048,11 +1867,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
|
||||
|
||||
for (i = 0x80000000; i <= limit; i++) {
|
||||
+ j = 0;
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
- fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit);
|
||||
- abort();
|
||||
+ goto full;
|
||||
}
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c = &entries[cpuid_i++];
|
||||
|
||||
switch (i) {
|
||||
case 0x8000001d:
|
||||
@@ -2067,11 +1886,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
break;
|
||||
}
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
- fprintf(stderr, "cpuid_data is full, no space for "
|
||||
- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
|
||||
- abort();
|
||||
+ goto full;
|
||||
}
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c = &entries[cpuid_i++];
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@@ -2094,11 +1911,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused);
|
||||
|
||||
for (i = 0xC0000000; i <= limit; i++) {
|
||||
+ j = 0;
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
- fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit);
|
||||
- abort();
|
||||
+ goto full;
|
||||
}
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c = &entries[cpuid_i++];
|
||||
|
||||
c->function = i;
|
||||
c->flags = 0;
|
||||
@@ -2106,6 +1923,194 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
}
|
||||
}
|
||||
|
||||
+ return cpuid_i;
|
||||
+
|
||||
+full:
|
||||
+ fprintf(stderr, "cpuid_data is full, no space for "
|
||||
+ "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
|
||||
+ abort();
|
||||
+}
|
||||
+
|
||||
+int kvm_arch_init_vcpu(CPUState *cs)
|
||||
+{
|
||||
+ struct {
|
||||
+ struct kvm_cpuid2 cpuid;
|
||||
+ struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
|
||||
+ } cpuid_data;
|
||||
+ /*
|
||||
+ * The kernel defines these structs with padding fields so there
|
||||
+ * should be no extra padding in our cpuid_data struct.
|
||||
+ */
|
||||
+ QEMU_BUILD_BUG_ON(sizeof(cpuid_data) !=
|
||||
+ sizeof(struct kvm_cpuid2) +
|
||||
+ sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
|
||||
+
|
||||
+ X86CPU *cpu = X86_CPU(cs);
|
||||
+ CPUX86State *env = &cpu->env;
|
||||
+ uint32_t cpuid_i;
|
||||
+ struct kvm_cpuid_entry2 *c;
|
||||
+ uint32_t signature[3];
|
||||
+ int kvm_base = KVM_CPUID_SIGNATURE;
|
||||
+ int max_nested_state_len;
|
||||
+ int r;
|
||||
+ Error *local_err = NULL;
|
||||
+
|
||||
+ memset(&cpuid_data, 0, sizeof(cpuid_data));
|
||||
+
|
||||
+ cpuid_i = 0;
|
||||
+
|
||||
+ has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2);
|
||||
+
|
||||
+ r = kvm_arch_set_tsc_khz(cs);
|
||||
+ if (r < 0) {
|
||||
+ return r;
|
||||
+ }
|
||||
+
|
||||
+ /* vcpu's TSC frequency is either specified by user, or following
|
||||
+ * the value used by KVM if the former is not present. In the
|
||||
+ * latter case, we query it from KVM and record in env->tsc_khz,
|
||||
+ * so that vcpu's TSC frequency can be migrated later via this field.
|
||||
+ */
|
||||
+ if (!env->tsc_khz) {
|
||||
+ r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ?
|
||||
+ kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) :
|
||||
+ -ENOTSUP;
|
||||
+ if (r > 0) {
|
||||
+ env->tsc_khz = r;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY;
|
||||
+
|
||||
+ /*
|
||||
+ * kvm_hyperv_expand_features() is called here for the second time in case
|
||||
+ * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle
|
||||
+ * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to
|
||||
+ * check which Hyper-V enlightenments are supported and which are not, we
|
||||
+ * can still proceed and check/expand Hyper-V enlightenments here so legacy
|
||||
+ * behavior is preserved.
|
||||
+ */
|
||||
+ if (!kvm_hyperv_expand_features(cpu, &local_err)) {
|
||||
+ error_report_err(local_err);
|
||||
+ return -ENOSYS;
|
||||
+ }
|
||||
+
|
||||
+ if (hyperv_enabled(cpu)) {
|
||||
+ r = hyperv_init_vcpu(cpu);
|
||||
+ if (r) {
|
||||
+ return r;
|
||||
+ }
|
||||
+
|
||||
+ cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries);
|
||||
+ kvm_base = KVM_CPUID_SIGNATURE_NEXT;
|
||||
+ has_msr_hv_hypercall = true;
|
||||
+ }
|
||||
+
|
||||
+ if (cs->kvm_state->xen_version) {
|
||||
+#ifdef CONFIG_XEN_EMU
|
||||
+ struct kvm_cpuid_entry2 *xen_max_leaf;
|
||||
+
|
||||
+ memcpy(signature, "XenVMMXenVMM", 12);
|
||||
+
|
||||
+ xen_max_leaf = c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c->function = kvm_base + XEN_CPUID_SIGNATURE;
|
||||
+ c->eax = kvm_base + XEN_CPUID_TIME;
|
||||
+ c->ebx = signature[0];
|
||||
+ c->ecx = signature[1];
|
||||
+ c->edx = signature[2];
|
||||
+
|
||||
+ c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c->function = kvm_base + XEN_CPUID_VENDOR;
|
||||
+ c->eax = cs->kvm_state->xen_version;
|
||||
+ c->ebx = 0;
|
||||
+ c->ecx = 0;
|
||||
+ c->edx = 0;
|
||||
+
|
||||
+ c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c->function = kvm_base + XEN_CPUID_HVM_MSR;
|
||||
+ /* Number of hypercall-transfer pages */
|
||||
+ c->eax = 1;
|
||||
+ /* Hypercall MSR base address */
|
||||
+ if (hyperv_enabled(cpu)) {
|
||||
+ c->ebx = XEN_HYPERCALL_MSR_HYPERV;
|
||||
+ kvm_xen_init(cs->kvm_state, c->ebx);
|
||||
+ } else {
|
||||
+ c->ebx = XEN_HYPERCALL_MSR;
|
||||
+ }
|
||||
+ c->ecx = 0;
|
||||
+ c->edx = 0;
|
||||
+
|
||||
+ c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c->function = kvm_base + XEN_CPUID_TIME;
|
||||
+ c->eax = ((!!tsc_is_stable_and_known(env) << 1) |
|
||||
+ (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2));
|
||||
+ /* default=0 (emulate if necessary) */
|
||||
+ c->ebx = 0;
|
||||
+ /* guest tsc frequency */
|
||||
+ c->ecx = env->user_tsc_khz;
|
||||
+ /* guest tsc incarnation (migration count) */
|
||||
+ c->edx = 0;
|
||||
+
|
||||
+ c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c->function = kvm_base + XEN_CPUID_HVM;
|
||||
+ xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM;
|
||||
+ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) {
|
||||
+ c->function = kvm_base + XEN_CPUID_HVM;
|
||||
+
|
||||
+ if (cpu->xen_vapic) {
|
||||
+ c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT;
|
||||
+ c->eax |= XEN_HVM_CPUID_X2APIC_VIRT;
|
||||
+ }
|
||||
+
|
||||
+ c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS;
|
||||
+
|
||||
+ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) {
|
||||
+ c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT;
|
||||
+ c->ebx = cs->cpu_index;
|
||||
+ }
|
||||
+
|
||||
+ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) {
|
||||
+ c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ r = kvm_xen_init_vcpu(cs);
|
||||
+ if (r) {
|
||||
+ return r;
|
||||
+ }
|
||||
+
|
||||
+ kvm_base += 0x100;
|
||||
+#else /* CONFIG_XEN_EMU */
|
||||
+ /* This should never happen as kvm_arch_init() would have died first. */
|
||||
+ fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n");
|
||||
+ abort();
|
||||
+#endif
|
||||
+ } else if (cpu->expose_kvm) {
|
||||
+ memcpy(signature, "KVMKVMKVM\0\0\0", 12);
|
||||
+ c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c->function = KVM_CPUID_SIGNATURE | kvm_base;
|
||||
+ c->eax = KVM_CPUID_FEATURES | kvm_base;
|
||||
+ c->ebx = signature[0];
|
||||
+ c->ecx = signature[1];
|
||||
+ c->edx = signature[2];
|
||||
+
|
||||
+ c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c->function = KVM_CPUID_FEATURES | kvm_base;
|
||||
+ c->eax = env->features[FEAT_KVM];
|
||||
+ c->edx = env->features[FEAT_KVM_HINTS];
|
||||
+ }
|
||||
+
|
||||
+ if (cpu->kvm_pv_enforce_cpuid) {
|
||||
+ r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1);
|
||||
+ if (r < 0) {
|
||||
+ fprintf(stderr,
|
||||
+ "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s",
|
||||
+ strerror(-r));
|
||||
+ abort();
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ cpuid_i = kvm_x86_build_cpuid(env, cpuid_data.entries, cpuid_i);
|
||||
cpuid_data.cpuid.nent = cpuid_i;
|
||||
|
||||
if (((env->cpuid_version >> 8)&0xF) >= 6
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,91 @@
|
||||
From 03e275023b482ac79b4f92ca4ceef6de3caa634f Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Thu, 9 May 2024 19:00:40 +0200
|
||||
Subject: [PATCH 045/100] i386: pc: remove unnecessary MachineClass overrides
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [45/91] c03d5b57014d0d02f6ce0cdfb19a34996d100dea (bonzini/rhel-qemu-kvm)
|
||||
|
||||
There is no need to override these fields of MachineClass because they are
|
||||
already set to the right value in the superclass.
|
||||
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
|
||||
Message-ID: <20240509170044.190795-10-pbonzini@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit b348fdcdac9f9fc70be9ae56c54e41765e9aae24)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/pc.c | 3 ---
|
||||
hw/i386/x86.c | 6 +++---
|
||||
include/hw/i386/x86.h | 4 ----
|
||||
3 files changed, 3 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index 660a59c63b..0aca0cc79e 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -1979,9 +1979,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
|
||||
mc->async_pf_vmexit_disable = false;
|
||||
mc->get_hotplug_handler = pc_get_hotplug_handler;
|
||||
mc->hotplug_allowed = pc_hotplug_allowed;
|
||||
- mc->cpu_index_to_instance_props = x86_cpu_index_to_props;
|
||||
- mc->get_default_cpu_node_id = x86_get_default_cpu_node_id;
|
||||
- mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids;
|
||||
mc->auto_enable_numa_with_memhp = true;
|
||||
mc->auto_enable_numa_with_memdev = true;
|
||||
mc->has_hotpluggable_cpus = true;
|
||||
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
|
||||
index c61f4ebfa6..fcef652c1e 100644
|
||||
--- a/hw/i386/x86.c
|
||||
+++ b/hw/i386/x86.c
|
||||
@@ -443,7 +443,7 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
|
||||
numa_cpu_pre_plug(cpu_slot, dev, errp);
|
||||
}
|
||||
|
||||
-CpuInstanceProperties
|
||||
+static CpuInstanceProperties
|
||||
x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
|
||||
{
|
||||
MachineClass *mc = MACHINE_GET_CLASS(ms);
|
||||
@@ -453,7 +453,7 @@ x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
|
||||
return possible_cpus->cpus[cpu_index].props;
|
||||
}
|
||||
|
||||
-int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx)
|
||||
+static int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx)
|
||||
{
|
||||
X86CPUTopoIDs topo_ids;
|
||||
X86MachineState *x86ms = X86_MACHINE(ms);
|
||||
@@ -467,7 +467,7 @@ int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx)
|
||||
return topo_ids.pkg_id % ms->numa_state->num_nodes;
|
||||
}
|
||||
|
||||
-const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms)
|
||||
+static const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms)
|
||||
{
|
||||
X86MachineState *x86ms = X86_MACHINE(ms);
|
||||
unsigned int max_cpus = ms->smp.max_cpus;
|
||||
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
|
||||
index d7b7d3f3ce..c2062db13f 100644
|
||||
--- a/include/hw/i386/x86.h
|
||||
+++ b/include/hw/i386/x86.h
|
||||
@@ -114,10 +114,6 @@ uint32_t x86_cpu_apic_id_from_index(X86MachineState *pcms,
|
||||
|
||||
void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, Error **errp);
|
||||
void x86_cpus_init(X86MachineState *pcms, int default_cpu_version);
|
||||
-CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms,
|
||||
- unsigned cpu_index);
|
||||
-int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx);
|
||||
-const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms);
|
||||
CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx);
|
||||
void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count);
|
||||
void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,116 @@
|
||||
From 652793962000d6906e219ceae36348a476b78c28 Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Fri, 31 May 2024 12:44:44 +0200
|
||||
Subject: [PATCH 065/100] i386/sev: Add a class method to determine KVM VM type
|
||||
for SNP guests
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [65/91] c6cbeac0a6f691138df212b80efaa9b1143fdaa8 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
SEV guests can use either KVM_X86_DEFAULT_VM, KVM_X86_SEV_VM,
|
||||
or KVM_X86_SEV_ES_VM depending on the configuration and what
|
||||
the host kernel supports. SNP guests on the other hand can only
|
||||
ever use KVM_X86_SNP_VM, so split determination of VM type out
|
||||
into a separate class method that can be set accordingly for
|
||||
sev-guest vs. sev-snp-guest objects and add handling for SNP.
|
||||
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-14-pankaj.gupta@amd.com>
|
||||
[Remove unnecessary function pointer declaration. - Paolo]
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit a808132f6d8e855bd83a400570ec91d2e00bebe3)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/kvm/kvm.c | 1 +
|
||||
target/i386/sev.c | 15 ++++++++++++---
|
||||
2 files changed, 13 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
|
||||
index 408568d053..75e75d9772 100644
|
||||
--- a/target/i386/kvm/kvm.c
|
||||
+++ b/target/i386/kvm/kvm.c
|
||||
@@ -166,6 +166,7 @@ static const char *vm_type_name[] = {
|
||||
[KVM_X86_DEFAULT_VM] = "default",
|
||||
[KVM_X86_SEV_VM] = "SEV",
|
||||
[KVM_X86_SEV_ES_VM] = "SEV-ES",
|
||||
+ [KVM_X86_SNP_VM] = "SEV-SNP",
|
||||
};
|
||||
|
||||
bool kvm_is_vm_type_supported(int type)
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index c3daaf1ad5..072cc4f853 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -885,6 +885,11 @@ out:
|
||||
return sev_common->kvm_type;
|
||||
}
|
||||
|
||||
+static int sev_snp_kvm_type(X86ConfidentialGuest *cg)
|
||||
+{
|
||||
+ return KVM_X86_SNP_VM;
|
||||
+}
|
||||
+
|
||||
static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
{
|
||||
char *devname;
|
||||
@@ -894,6 +899,8 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
struct sev_user_data_status status = {};
|
||||
SevCommonState *sev_common = SEV_COMMON(cgs);
|
||||
SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs);
|
||||
+ X86ConfidentialGuestClass *x86_klass =
|
||||
+ X86_CONFIDENTIAL_GUEST_GET_CLASS(cgs);
|
||||
|
||||
sev_common->state = SEV_STATE_UNINIT;
|
||||
|
||||
@@ -964,7 +971,7 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
}
|
||||
|
||||
trace_kvm_sev_init();
|
||||
- if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) {
|
||||
+ if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) {
|
||||
cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT;
|
||||
|
||||
ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error);
|
||||
@@ -1441,10 +1448,8 @@ static void
|
||||
sev_common_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
|
||||
- X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
|
||||
|
||||
klass->kvm_init = sev_common_kvm_init;
|
||||
- x86_klass->kvm_type = sev_kvm_type;
|
||||
|
||||
object_class_property_add_str(oc, "sev-device",
|
||||
sev_common_get_sev_device,
|
||||
@@ -1529,10 +1534,12 @@ static void
|
||||
sev_guest_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
|
||||
+ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
|
||||
|
||||
klass->launch_start = sev_launch_start;
|
||||
klass->launch_finish = sev_launch_finish;
|
||||
klass->kvm_init = sev_kvm_init;
|
||||
+ x86_klass->kvm_type = sev_kvm_type;
|
||||
|
||||
object_class_property_add_str(oc, "dh-cert-file",
|
||||
sev_guest_get_dh_cert_file,
|
||||
@@ -1770,8 +1777,10 @@ static void
|
||||
sev_snp_guest_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
|
||||
+ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
|
||||
|
||||
klass->kvm_init = sev_snp_kvm_init;
|
||||
+ x86_klass->kvm_type = sev_snp_kvm_type;
|
||||
|
||||
object_class_property_add(oc, "policy", "uint64",
|
||||
sev_snp_guest_get_policy,
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,84 @@
|
||||
From 82a714b79851b5c2d1389d2fa7a01548c486a854 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:20 -0500
|
||||
Subject: [PATCH 060/100] i386/sev: Add a sev_snp_enabled() helper
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [60/91] c35ead095028ccfb1e1be0fe010ca4f7688530a0 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Add a simple helper to check if the current guest type is SNP. Also have
|
||||
SNP-enabled imply that SEV-ES is enabled as well, and fix up any places
|
||||
where the sev_es_enabled() check is expecting a pure/non-SNP guest.
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-9-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 99190f805dca9475fe244fbd8041961842657dc2)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 13 ++++++++++++-
|
||||
target/i386/sev.h | 2 ++
|
||||
2 files changed, 14 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index a81b3228d4..4edfedc139 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -325,12 +325,21 @@ sev_enabled(void)
|
||||
return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON);
|
||||
}
|
||||
|
||||
+bool
|
||||
+sev_snp_enabled(void)
|
||||
+{
|
||||
+ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
|
||||
+
|
||||
+ return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_SNP_GUEST);
|
||||
+}
|
||||
+
|
||||
bool
|
||||
sev_es_enabled(void)
|
||||
{
|
||||
ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
|
||||
|
||||
- return sev_enabled() && (SEV_GUEST(cgs)->policy & SEV_POLICY_ES);
|
||||
+ return sev_snp_enabled() ||
|
||||
+ (sev_enabled() && SEV_GUEST(cgs)->policy & SEV_POLICY_ES);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
@@ -946,7 +955,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
"support", __func__);
|
||||
goto err;
|
||||
}
|
||||
+ }
|
||||
|
||||
+ if (sev_es_enabled() && !sev_snp_enabled()) {
|
||||
if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) {
|
||||
error_setg(errp, "%s: guest policy requires SEV-ES, but "
|
||||
"host SEV-ES support unavailable",
|
||||
diff --git a/target/i386/sev.h b/target/i386/sev.h
|
||||
index bedc667eeb..94295ee74f 100644
|
||||
--- a/target/i386/sev.h
|
||||
+++ b/target/i386/sev.h
|
||||
@@ -45,9 +45,11 @@ typedef struct SevKernelLoaderContext {
|
||||
#ifdef CONFIG_SEV
|
||||
bool sev_enabled(void);
|
||||
bool sev_es_enabled(void);
|
||||
+bool sev_snp_enabled(void);
|
||||
#else
|
||||
#define sev_enabled() 0
|
||||
#define sev_es_enabled() 0
|
||||
+#define sev_snp_enabled() 0
|
||||
#endif
|
||||
|
||||
uint32_t sev_get_cbit_position(void);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,187 @@
|
||||
From 0e435819540b0d39da2c828aacc0f35ecaadbdf6 Mon Sep 17 00:00:00 2001
|
||||
From: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:28 -0500
|
||||
Subject: [PATCH 068/100] i386/sev: Add handling to encrypt/finalize guest
|
||||
launch data
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [68/91] fe77931d279aa8df061823da88a320fb5f72ffea (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Process any queued up launch data and encrypt/measure it into the SNP
|
||||
guest instance prior to initial guest launch.
|
||||
|
||||
This also updates the KVM_SEV_SNP_LAUNCH_UPDATE call to handle partial
|
||||
update responses.
|
||||
|
||||
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Co-developed-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Co-developed-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-17-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 9f3a6999f9730a694d7db448a99f9c9cb6515992)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 112 ++++++++++++++++++++++++++++++++++++++-
|
||||
target/i386/trace-events | 2 +
|
||||
2 files changed, 113 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index e89b87d2f5..ef2e592ca7 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -756,6 +756,76 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static const char *
|
||||
+snp_page_type_to_str(int type)
|
||||
+{
|
||||
+ switch (type) {
|
||||
+ case KVM_SEV_SNP_PAGE_TYPE_NORMAL: return "Normal";
|
||||
+ case KVM_SEV_SNP_PAGE_TYPE_ZERO: return "Zero";
|
||||
+ case KVM_SEV_SNP_PAGE_TYPE_UNMEASURED: return "Unmeasured";
|
||||
+ case KVM_SEV_SNP_PAGE_TYPE_SECRETS: return "Secrets";
|
||||
+ case KVM_SEV_SNP_PAGE_TYPE_CPUID: return "Cpuid";
|
||||
+ default: return "unknown";
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+sev_snp_launch_update(SevSnpGuestState *sev_snp_guest,
|
||||
+ SevLaunchUpdateData *data)
|
||||
+{
|
||||
+ int ret, fw_error;
|
||||
+ struct kvm_sev_snp_launch_update update = {0};
|
||||
+
|
||||
+ if (!data->hva || !data->len) {
|
||||
+ error_report("SNP_LAUNCH_UPDATE called with invalid address"
|
||||
+ "/ length: %p / %lx",
|
||||
+ data->hva, data->len);
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
+ update.uaddr = (__u64)(unsigned long)data->hva;
|
||||
+ update.gfn_start = data->gpa >> TARGET_PAGE_BITS;
|
||||
+ update.len = data->len;
|
||||
+ update.type = data->type;
|
||||
+
|
||||
+ /*
|
||||
+ * KVM_SEV_SNP_LAUNCH_UPDATE requires that GPA ranges have the private
|
||||
+ * memory attribute set in advance.
|
||||
+ */
|
||||
+ ret = kvm_set_memory_attributes_private(data->gpa, data->len);
|
||||
+ if (ret) {
|
||||
+ error_report("SEV-SNP: failed to configure initial"
|
||||
+ "private guest memory");
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ while (update.len || ret == -EAGAIN) {
|
||||
+ trace_kvm_sev_snp_launch_update(update.uaddr, update.gfn_start <<
|
||||
+ TARGET_PAGE_BITS, update.len,
|
||||
+ snp_page_type_to_str(update.type));
|
||||
+
|
||||
+ ret = sev_ioctl(SEV_COMMON(sev_snp_guest)->sev_fd,
|
||||
+ KVM_SEV_SNP_LAUNCH_UPDATE,
|
||||
+ &update, &fw_error);
|
||||
+ if (ret && ret != -EAGAIN) {
|
||||
+ error_report("SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'",
|
||||
+ ret, fw_error, fw_error_to_str(fw_error));
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+out:
|
||||
+ if (!ret && update.gfn_start << TARGET_PAGE_BITS != data->gpa + data->len) {
|
||||
+ error_report("SEV-SNP: expected update of GPA range %lx-%lx,"
|
||||
+ "got GPA range %lx-%llx",
|
||||
+ data->gpa, data->gpa + data->len, data->gpa,
|
||||
+ update.gfn_start << TARGET_PAGE_BITS);
|
||||
+ ret = -EIO;
|
||||
+ }
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static int
|
||||
sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len)
|
||||
{
|
||||
@@ -901,6 +971,46 @@ sev_launch_finish(SevCommonState *sev_common)
|
||||
migrate_add_blocker(&sev_mig_blocker, &error_fatal);
|
||||
}
|
||||
|
||||
+static void
|
||||
+sev_snp_launch_finish(SevCommonState *sev_common)
|
||||
+{
|
||||
+ int ret, error;
|
||||
+ Error *local_err = NULL;
|
||||
+ SevLaunchUpdateData *data;
|
||||
+ SevSnpGuestState *sev_snp = SEV_SNP_GUEST(sev_common);
|
||||
+ struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf;
|
||||
+
|
||||
+ QTAILQ_FOREACH(data, &launch_update, next) {
|
||||
+ ret = sev_snp_launch_update(sev_snp, data);
|
||||
+ if (ret) {
|
||||
+ exit(1);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ trace_kvm_sev_snp_launch_finish(sev_snp->id_block, sev_snp->id_auth,
|
||||
+ sev_snp->host_data);
|
||||
+ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_FINISH,
|
||||
+ finish, &error);
|
||||
+ if (ret) {
|
||||
+ error_report("SNP_LAUNCH_FINISH ret=%d fw_error=%d '%s'",
|
||||
+ ret, error, fw_error_to_str(error));
|
||||
+ exit(1);
|
||||
+ }
|
||||
+
|
||||
+ sev_set_guest_state(sev_common, SEV_STATE_RUNNING);
|
||||
+
|
||||
+ /* add migration blocker */
|
||||
+ error_setg(&sev_mig_blocker,
|
||||
+ "SEV-SNP: Migration is not implemented");
|
||||
+ ret = migrate_add_blocker(&sev_mig_blocker, &local_err);
|
||||
+ if (local_err) {
|
||||
+ error_report_err(local_err);
|
||||
+ error_free(sev_mig_blocker);
|
||||
+ exit(1);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+
|
||||
static void
|
||||
sev_vm_state_change(void *opaque, bool running, RunState state)
|
||||
{
|
||||
@@ -1832,10 +1942,10 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data)
|
||||
X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
|
||||
|
||||
klass->launch_start = sev_snp_launch_start;
|
||||
+ klass->launch_finish = sev_snp_launch_finish;
|
||||
klass->kvm_init = sev_snp_kvm_init;
|
||||
x86_klass->kvm_type = sev_snp_kvm_type;
|
||||
|
||||
-
|
||||
object_class_property_add(oc, "policy", "uint64",
|
||||
sev_snp_guest_get_policy,
|
||||
sev_snp_guest_set_policy, NULL, NULL);
|
||||
diff --git a/target/i386/trace-events b/target/i386/trace-events
|
||||
index cb26d8a925..06b44ead2e 100644
|
||||
--- a/target/i386/trace-events
|
||||
+++ b/target/i386/trace-events
|
||||
@@ -12,3 +12,5 @@ kvm_sev_launch_finish(void) ""
|
||||
kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d"
|
||||
kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s"
|
||||
kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s"
|
||||
+kvm_sev_snp_launch_update(uint64_t src, uint64_t gpa, uint64_t len, const char *type) "src 0x%" PRIx64 " gpa 0x%" PRIx64 " len 0x%" PRIx64 " (%s page)"
|
||||
+kvm_sev_snp_launch_finish(char *id_block, char *id_auth, char *host_data) "id_block %s id_auth %s host_data %s"
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,127 @@
|
||||
From 2872c423fa44dcbf50b581a5c3feac064a0473a0 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Tue, 9 Apr 2024 18:07:41 -0500
|
||||
Subject: [PATCH 024/100] i386/sev: Add 'legacy-vm-type' parameter for SEV
|
||||
guest objects
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [24/91] ce35d1b09fe8aa8772ff149543f7760455c1e6b5 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
QEMU will currently automatically make use of the KVM_SEV_INIT2 API for
|
||||
initializing SEV and SEV-ES guests verses the older
|
||||
KVM_SEV_INIT/KVM_SEV_ES_INIT interfaces.
|
||||
|
||||
However, the older interfaces will silently avoid sync'ing FPU/XSAVE
|
||||
state to the VMSA prior to encryption, thus relying on behavior and
|
||||
measurements that assume the related fields to be allow zero.
|
||||
|
||||
With KVM_SEV_INIT2, this state is now synced into the VMSA, resulting in
|
||||
measurements changes and, theoretically, behaviorial changes, though the
|
||||
latter are unlikely to be seen in practice.
|
||||
|
||||
To allow a smooth transition to the newer interface, while still
|
||||
providing a mechanism to maintain backward compatibility with VMs
|
||||
created using the older interfaces, provide a new command-line
|
||||
parameter:
|
||||
|
||||
-object sev-guest,legacy-vm-type=true,...
|
||||
|
||||
and have it default to false.
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Message-ID: <20240409230743.962513-2-michael.roth@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 023267334da375226720e62963df9545aa8fc2fd)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
qapi/qom.json | 11 ++++++++++-
|
||||
target/i386/sev.c | 18 +++++++++++++++++-
|
||||
2 files changed, 27 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/qapi/qom.json b/qapi/qom.json
|
||||
index 85e6b4f84a..38dde6d785 100644
|
||||
--- a/qapi/qom.json
|
||||
+++ b/qapi/qom.json
|
||||
@@ -898,6 +898,14 @@
|
||||
# designated guest firmware page for measured boot with -kernel
|
||||
# (default: false) (since 6.2)
|
||||
#
|
||||
+# @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM.
|
||||
+# The newer KVM_SEV_INIT2 interface syncs additional vCPU
|
||||
+# state when initializing the VMSA structures, which will
|
||||
+# result in a different guest measurement. Set this to
|
||||
+# maintain compatibility with older QEMU or kernel versions
|
||||
+# that rely on legacy KVM_SEV_INIT behavior.
|
||||
+# (default: false) (since 9.1)
|
||||
+#
|
||||
# Since: 2.12
|
||||
##
|
||||
{ 'struct': 'SevGuestProperties',
|
||||
@@ -908,7 +916,8 @@
|
||||
'*handle': 'uint32',
|
||||
'*cbitpos': 'uint32',
|
||||
'reduced-phys-bits': 'uint32',
|
||||
- '*kernel-hashes': 'bool' } }
|
||||
+ '*kernel-hashes': 'bool',
|
||||
+ '*legacy-vm-type': 'bool' } }
|
||||
|
||||
##
|
||||
# @ThreadContextProperties:
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 9dab4060b8..f4ee317cb0 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -67,6 +67,7 @@ struct SevGuestState {
|
||||
uint32_t cbitpos;
|
||||
uint32_t reduced_phys_bits;
|
||||
bool kernel_hashes;
|
||||
+ bool legacy_vm_type;
|
||||
|
||||
/* runtime state */
|
||||
uint32_t handle;
|
||||
@@ -356,6 +357,16 @@ static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp)
|
||||
sev->kernel_hashes = value;
|
||||
}
|
||||
|
||||
+static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp)
|
||||
+{
|
||||
+ return SEV_GUEST(obj)->legacy_vm_type;
|
||||
+}
|
||||
+
|
||||
+static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp)
|
||||
+{
|
||||
+ SEV_GUEST(obj)->legacy_vm_type = value;
|
||||
+}
|
||||
+
|
||||
bool
|
||||
sev_enabled(void)
|
||||
{
|
||||
@@ -863,7 +874,7 @@ static int sev_kvm_type(X86ConfidentialGuest *cg)
|
||||
}
|
||||
|
||||
kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
|
||||
- if (kvm_is_vm_type_supported(kvm_type)) {
|
||||
+ if (kvm_is_vm_type_supported(kvm_type) && !sev->legacy_vm_type) {
|
||||
sev->kvm_type = kvm_type;
|
||||
} else {
|
||||
sev->kvm_type = KVM_X86_DEFAULT_VM;
|
||||
@@ -1381,6 +1392,11 @@ sev_guest_class_init(ObjectClass *oc, void *data)
|
||||
sev_guest_set_kernel_hashes);
|
||||
object_class_property_set_description(oc, "kernel-hashes",
|
||||
"add kernel hashes to guest firmware for measured Linux boot");
|
||||
+ object_class_property_add_bool(oc, "legacy-vm-type",
|
||||
+ sev_guest_get_legacy_vm_type,
|
||||
+ sev_guest_set_legacy_vm_type);
|
||||
+ object_class_property_set_description(oc, "legacy-vm-type",
|
||||
+ "use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions.");
|
||||
}
|
||||
|
||||
static void
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,203 @@
|
||||
From a236548a903aa8350fff9601d481b2f529c8d4a7 Mon Sep 17 00:00:00 2001
|
||||
From: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:21 -0500
|
||||
Subject: [PATCH 061/100] i386/sev: Add sev_kvm_init() override for SEV class
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [61/91] b24fcbc8712e7394e029312229da023c63803969 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Some aspects of the init routine SEV are specific to SEV and not
|
||||
applicable for SNP guests, so move the SEV-specific bits into
|
||||
separate class method and retain only the common functionality.
|
||||
|
||||
Co-developed-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-10-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 990da8d243a8c59dafcbed78b56a0e4ffb1605d9)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 72 +++++++++++++++++++++++++++++++++--------------
|
||||
1 file changed, 51 insertions(+), 21 deletions(-)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 4edfedc139..5519de1c6b 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -73,6 +73,7 @@ struct SevCommonStateClass {
|
||||
/* public */
|
||||
int (*launch_start)(SevCommonState *sev_common);
|
||||
void (*launch_finish)(SevCommonState *sev_common);
|
||||
+ int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp);
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -882,7 +883,7 @@ out:
|
||||
return sev_common->kvm_type;
|
||||
}
|
||||
|
||||
-static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
+static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
{
|
||||
SevCommonState *sev_common = SEV_COMMON(cgs);
|
||||
char *devname;
|
||||
@@ -892,12 +893,6 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
struct sev_user_data_status status = {};
|
||||
SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs);
|
||||
|
||||
- ret = ram_block_discard_disable(true);
|
||||
- if (ret) {
|
||||
- error_report("%s: cannot disable RAM discard", __func__);
|
||||
- return -1;
|
||||
- }
|
||||
-
|
||||
sev_common->state = SEV_STATE_UNINIT;
|
||||
|
||||
host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL);
|
||||
@@ -911,7 +906,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
if (host_cbitpos != sev_common->cbitpos) {
|
||||
error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'",
|
||||
__func__, host_cbitpos, sev_common->cbitpos);
|
||||
- goto err;
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -924,7 +919,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
error_setg(errp, "%s: reduced_phys_bits check failed,"
|
||||
" it should be in the range of 1 to 63, requested '%d'",
|
||||
__func__, sev_common->reduced_phys_bits);
|
||||
- goto err;
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
devname = object_property_get_str(OBJECT(sev_common), "sev-device", NULL);
|
||||
@@ -933,7 +928,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
error_setg(errp, "%s: Failed to open %s '%s'", __func__,
|
||||
devname, strerror(errno));
|
||||
g_free(devname);
|
||||
- goto err;
|
||||
+ return -1;
|
||||
}
|
||||
g_free(devname);
|
||||
|
||||
@@ -943,7 +938,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
error_setg(errp, "%s: failed to get platform status ret=%d "
|
||||
"fw_error='%d: %s'", __func__, ret, fw_error,
|
||||
fw_error_to_str(fw_error));
|
||||
- goto err;
|
||||
+ return -1;
|
||||
}
|
||||
sev_common->build_id = status.build;
|
||||
sev_common->api_major = status.api_major;
|
||||
@@ -953,7 +948,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
if (!kvm_kernel_irqchip_allowed()) {
|
||||
error_setg(errp, "%s: SEV-ES guests require in-kernel irqchip"
|
||||
"support", __func__);
|
||||
- goto err;
|
||||
+ return -1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -962,7 +957,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
error_setg(errp, "%s: guest policy requires SEV-ES, but "
|
||||
"host SEV-ES support unavailable",
|
||||
__func__);
|
||||
- goto err;
|
||||
+ return -1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -980,25 +975,59 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
if (ret) {
|
||||
error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'",
|
||||
__func__, ret, fw_error, fw_error_to_str(fw_error));
|
||||
- goto err;
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
ret = klass->launch_start(sev_common);
|
||||
if (ret) {
|
||||
error_setg(errp, "%s: failed to create encryption context", __func__);
|
||||
- goto err;
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ if (klass->kvm_init && klass->kvm_init(cgs, errp)) {
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
- ram_block_notifier_add(&sev_ram_notifier);
|
||||
- qemu_add_machine_init_done_notifier(&sev_machine_done_notify);
|
||||
qemu_add_vm_change_state_handler(sev_vm_state_change, sev_common);
|
||||
|
||||
cgs->ready = true;
|
||||
|
||||
return 0;
|
||||
-err:
|
||||
- ram_block_discard_disable(false);
|
||||
- return -1;
|
||||
+}
|
||||
+
|
||||
+static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ /*
|
||||
+ * SEV/SEV-ES rely on pinned memory to back guest RAM so discarding
|
||||
+ * isn't actually possible. With SNP, only guest_memfd pages are used
|
||||
+ * for private guest memory, so discarding of shared memory is still
|
||||
+ * possible..
|
||||
+ */
|
||||
+ ret = ram_block_discard_disable(true);
|
||||
+ if (ret) {
|
||||
+ error_setg(errp, "%s: cannot disable RAM discard", __func__);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * SEV uses these notifiers to register/pin pages prior to guest use,
|
||||
+ * but SNP relies on guest_memfd for private pages, which has its
|
||||
+ * own internal mechanisms for registering/pinning private memory.
|
||||
+ */
|
||||
+ ram_block_notifier_add(&sev_ram_notifier);
|
||||
+
|
||||
+ /*
|
||||
+ * The machine done notify event is used for SEV guests to get the
|
||||
+ * measurement of the encrypted images. When SEV-SNP is enabled, the
|
||||
+ * measurement is part of the guest attestation process where it can
|
||||
+ * be collected without any reliance on the VMM. So skip registering
|
||||
+ * the notifier for SNP in favor of using guest attestation instead.
|
||||
+ */
|
||||
+ qemu_add_machine_init_done_notifier(&sev_machine_done_notify);
|
||||
+
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1397,7 +1426,7 @@ sev_common_class_init(ObjectClass *oc, void *data)
|
||||
ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
|
||||
X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
|
||||
|
||||
- klass->kvm_init = sev_kvm_init;
|
||||
+ klass->kvm_init = sev_common_kvm_init;
|
||||
x86_klass->kvm_type = sev_kvm_type;
|
||||
|
||||
object_class_property_add_str(oc, "sev-device",
|
||||
@@ -1486,6 +1515,7 @@ sev_guest_class_init(ObjectClass *oc, void *data)
|
||||
|
||||
klass->launch_start = sev_launch_start;
|
||||
klass->launch_finish = sev_launch_finish;
|
||||
+ klass->kvm_init = sev_kvm_init;
|
||||
|
||||
object_class_property_add_str(oc, "dh-cert-file",
|
||||
sev_guest_get_dh_cert_file,
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,94 @@
|
||||
From 35ceebdeccbf5dceb374c6f89a12e9981def570b Mon Sep 17 00:00:00 2001
|
||||
From: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:22 -0500
|
||||
Subject: [PATCH 062/100] i386/sev: Add snp_kvm_init() override for SNP class
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [62/91] 8fa537961c9262b99a4ffb99e1c25f080d76d1de (bonzini/rhel-qemu-kvm)
|
||||
|
||||
SNP does not support SMM and requires guest_memfd for
|
||||
private guest memory, so add SNP specific kvm_init()
|
||||
functionality in snp_kvm_init() class method.
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Co-developed-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-11-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 125b95a6d465a03ff30816eff0b1889aec01f0c3)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 24 +++++++++++++++++++++++-
|
||||
1 file changed, 23 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 5519de1c6b..6525b3c1a0 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -885,12 +885,12 @@ out:
|
||||
|
||||
static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
{
|
||||
- SevCommonState *sev_common = SEV_COMMON(cgs);
|
||||
char *devname;
|
||||
int ret, fw_error, cmd;
|
||||
uint32_t ebx;
|
||||
uint32_t host_cbitpos;
|
||||
struct sev_user_data_status status = {};
|
||||
+ SevCommonState *sev_common = SEV_COMMON(cgs);
|
||||
SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs);
|
||||
|
||||
sev_common->state = SEV_STATE_UNINIT;
|
||||
@@ -1030,6 +1030,21 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
+{
|
||||
+ MachineState *ms = MACHINE(qdev_get_machine());
|
||||
+ X86MachineState *x86ms = X86_MACHINE(ms);
|
||||
+
|
||||
+ if (x86ms->smm == ON_OFF_AUTO_AUTO) {
|
||||
+ x86ms->smm = ON_OFF_AUTO_OFF;
|
||||
+ } else if (x86ms->smm == ON_OFF_AUTO_ON) {
|
||||
+ error_setg(errp, "SEV-SNP does not support SMM.");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
int
|
||||
sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp)
|
||||
{
|
||||
@@ -1752,6 +1767,10 @@ sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp)
|
||||
static void
|
||||
sev_snp_guest_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
+ SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
|
||||
+
|
||||
+ klass->kvm_init = sev_snp_kvm_init;
|
||||
+
|
||||
object_class_property_add(oc, "policy", "uint64",
|
||||
sev_snp_guest_get_policy,
|
||||
sev_snp_guest_set_policy, NULL, NULL);
|
||||
@@ -1778,8 +1797,11 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data)
|
||||
static void
|
||||
sev_snp_guest_instance_init(Object *obj)
|
||||
{
|
||||
+ ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj);
|
||||
SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
|
||||
|
||||
+ cgs->require_guest_memfd = true;
|
||||
+
|
||||
/* default init/start/finish params for kvm */
|
||||
sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY;
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,262 @@
|
||||
From 4013364679757161d6b9754bfc33ae38be0a1b7f Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:32 -0500
|
||||
Subject: [PATCH 072/100] i386/sev: Add support for SNP CPUID validation
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [72/91] 080e2942552dc7de8966e69d0d0d3b8951392030 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
SEV-SNP firmware allows a special guest page to be populated with a
|
||||
table of guest CPUID values so that they can be validated through
|
||||
firmware before being loaded into encrypted guest memory where they can
|
||||
be used in place of hypervisor-provided values[1].
|
||||
|
||||
As part of SEV-SNP guest initialization, use this interface to validate
|
||||
the CPUID entries reported by KVM_GET_CPUID2 prior to initial guest
|
||||
start and populate the CPUID page reserved by OVMF with the resulting
|
||||
encrypted data.
|
||||
|
||||
[1] SEV SNP Firmware ABI Specification, Rev. 0.8, 8.13.2.6
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-21-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 70943ad8e4dfbe5f77006b880290219be9d03553)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 164 +++++++++++++++++++++++++++++++++++++++++++++-
|
||||
1 file changed, 162 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index c57534fca2..06401f0526 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -200,6 +200,36 @@ static const char *const sev_fw_errlist[] = {
|
||||
|
||||
#define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist)
|
||||
|
||||
+/* <linux/kvm.h> doesn't expose this, so re-use the max from kvm.c */
|
||||
+#define KVM_MAX_CPUID_ENTRIES 100
|
||||
+
|
||||
+typedef struct KvmCpuidInfo {
|
||||
+ struct kvm_cpuid2 cpuid;
|
||||
+ struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
|
||||
+} KvmCpuidInfo;
|
||||
+
|
||||
+#define SNP_CPUID_FUNCTION_MAXCOUNT 64
|
||||
+#define SNP_CPUID_FUNCTION_UNKNOWN 0xFFFFFFFF
|
||||
+
|
||||
+typedef struct {
|
||||
+ uint32_t eax_in;
|
||||
+ uint32_t ecx_in;
|
||||
+ uint64_t xcr0_in;
|
||||
+ uint64_t xss_in;
|
||||
+ uint32_t eax;
|
||||
+ uint32_t ebx;
|
||||
+ uint32_t ecx;
|
||||
+ uint32_t edx;
|
||||
+ uint64_t reserved;
|
||||
+} __attribute__((packed)) SnpCpuidFunc;
|
||||
+
|
||||
+typedef struct {
|
||||
+ uint32_t count;
|
||||
+ uint32_t reserved1;
|
||||
+ uint64_t reserved2;
|
||||
+ SnpCpuidFunc entries[SNP_CPUID_FUNCTION_MAXCOUNT];
|
||||
+} __attribute__((packed)) SnpCpuidInfo;
|
||||
+
|
||||
static int
|
||||
sev_ioctl(int fd, int cmd, void *data, int *error)
|
||||
{
|
||||
@@ -788,6 +818,35 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static void
|
||||
+sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old,
|
||||
+ SnpCpuidInfo *new)
|
||||
+{
|
||||
+ size_t i;
|
||||
+
|
||||
+ if (old->count != new->count) {
|
||||
+ error_report("SEV-SNP: CPUID validation failed due to count mismatch,"
|
||||
+ "provided: %d, expected: %d", old->count, new->count);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ for (i = 0; i < old->count; i++) {
|
||||
+ SnpCpuidFunc *old_func, *new_func;
|
||||
+
|
||||
+ old_func = &old->entries[i];
|
||||
+ new_func = &new->entries[i];
|
||||
+
|
||||
+ if (memcmp(old_func, new_func, sizeof(SnpCpuidFunc))) {
|
||||
+ error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x"
|
||||
+ "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x"
|
||||
+ "expected: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x",
|
||||
+ old_func->eax_in, old_func->ecx_in,
|
||||
+ old_func->eax, old_func->ebx, old_func->ecx, old_func->edx,
|
||||
+ new_func->eax, new_func->ebx, new_func->ecx, new_func->edx);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static const char *
|
||||
snp_page_type_to_str(int type)
|
||||
{
|
||||
@@ -806,6 +865,7 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest,
|
||||
SevLaunchUpdateData *data)
|
||||
{
|
||||
int ret, fw_error;
|
||||
+ SnpCpuidInfo snp_cpuid_info;
|
||||
struct kvm_sev_snp_launch_update update = {0};
|
||||
|
||||
if (!data->hva || !data->len) {
|
||||
@@ -815,6 +875,11 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest,
|
||||
return 1;
|
||||
}
|
||||
|
||||
+ if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) {
|
||||
+ /* Save a copy for comparison in case the LAUNCH_UPDATE fails */
|
||||
+ memcpy(&snp_cpuid_info, data->hva, sizeof(snp_cpuid_info));
|
||||
+ }
|
||||
+
|
||||
update.uaddr = (__u64)(unsigned long)data->hva;
|
||||
update.gfn_start = data->gpa >> TARGET_PAGE_BITS;
|
||||
update.len = data->len;
|
||||
@@ -842,6 +907,11 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest,
|
||||
if (ret && ret != -EAGAIN) {
|
||||
error_report("SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'",
|
||||
ret, fw_error, fw_error_to_str(fw_error));
|
||||
+
|
||||
+ if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) {
|
||||
+ sev_snp_cpuid_report_mismatches(&snp_cpuid_info, data->hva);
|
||||
+ error_report("SEV-SNP: failed update CPUID page");
|
||||
+ }
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -1004,7 +1074,8 @@ sev_launch_finish(SevCommonState *sev_common)
|
||||
}
|
||||
|
||||
static int
|
||||
-snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type)
|
||||
+snp_launch_update_data(uint64_t gpa, void *hva,
|
||||
+ uint32_t len, int type)
|
||||
{
|
||||
SevLaunchUpdateData *data;
|
||||
|
||||
@@ -1019,6 +1090,90 @@ snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int
|
||||
+sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info,
|
||||
+ const KvmCpuidInfo *kvm_cpuid_info)
|
||||
+{
|
||||
+ size_t i;
|
||||
+
|
||||
+ if (kvm_cpuid_info->cpuid.nent > SNP_CPUID_FUNCTION_MAXCOUNT) {
|
||||
+ error_report("SEV-SNP: CPUID entry count (%d) exceeds max (%d)",
|
||||
+ kvm_cpuid_info->cpuid.nent, SNP_CPUID_FUNCTION_MAXCOUNT);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ memset(snp_cpuid_info, 0, sizeof(*snp_cpuid_info));
|
||||
+
|
||||
+ for (i = 0; i < kvm_cpuid_info->cpuid.nent; i++) {
|
||||
+ const struct kvm_cpuid_entry2 *kvm_cpuid_entry;
|
||||
+ SnpCpuidFunc *snp_cpuid_entry;
|
||||
+
|
||||
+ kvm_cpuid_entry = &kvm_cpuid_info->entries[i];
|
||||
+ snp_cpuid_entry = &snp_cpuid_info->entries[i];
|
||||
+
|
||||
+ snp_cpuid_entry->eax_in = kvm_cpuid_entry->function;
|
||||
+ if (kvm_cpuid_entry->flags == KVM_CPUID_FLAG_SIGNIFCANT_INDEX) {
|
||||
+ snp_cpuid_entry->ecx_in = kvm_cpuid_entry->index;
|
||||
+ }
|
||||
+ snp_cpuid_entry->eax = kvm_cpuid_entry->eax;
|
||||
+ snp_cpuid_entry->ebx = kvm_cpuid_entry->ebx;
|
||||
+ snp_cpuid_entry->ecx = kvm_cpuid_entry->ecx;
|
||||
+ snp_cpuid_entry->edx = kvm_cpuid_entry->edx;
|
||||
+
|
||||
+ /*
|
||||
+ * Guest kernels will calculate EBX themselves using the 0xD
|
||||
+ * subfunctions corresponding to the individual XSAVE areas, so only
|
||||
+ * encode the base XSAVE size in the initial leaves, corresponding
|
||||
+ * to the initial XCR0=1 state.
|
||||
+ */
|
||||
+ if (snp_cpuid_entry->eax_in == 0xD &&
|
||||
+ (snp_cpuid_entry->ecx_in == 0x0 || snp_cpuid_entry->ecx_in == 0x1)) {
|
||||
+ snp_cpuid_entry->ebx = 0x240;
|
||||
+ snp_cpuid_entry->xcr0_in = 1;
|
||||
+ snp_cpuid_entry->xss_in = 0;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ snp_cpuid_info->count = i;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, uint32_t cpuid_len)
|
||||
+{
|
||||
+ KvmCpuidInfo kvm_cpuid_info = {0};
|
||||
+ SnpCpuidInfo snp_cpuid_info;
|
||||
+ CPUState *cs = first_cpu;
|
||||
+ int ret;
|
||||
+ uint32_t i = 0;
|
||||
+
|
||||
+ assert(sizeof(snp_cpuid_info) <= cpuid_len);
|
||||
+
|
||||
+ /* get the cpuid list from KVM */
|
||||
+ do {
|
||||
+ kvm_cpuid_info.cpuid.nent = ++i;
|
||||
+ ret = kvm_vcpu_ioctl(cs, KVM_GET_CPUID2, &kvm_cpuid_info);
|
||||
+ } while (ret == -E2BIG);
|
||||
+
|
||||
+ if (ret) {
|
||||
+ error_report("SEV-SNP: unable to query CPUID values for CPU: '%s'",
|
||||
+ strerror(-ret));
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
+ ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info);
|
||||
+ if (ret) {
|
||||
+ error_report("SEV-SNP: failed to generate CPUID table information");
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
+ memcpy(hva, &snp_cpuid_info, sizeof(snp_cpuid_info));
|
||||
+
|
||||
+ return snp_launch_update_data(cpuid_addr, hva, cpuid_len,
|
||||
+ KVM_SEV_SNP_PAGE_TYPE_CPUID);
|
||||
+}
|
||||
+
|
||||
static int
|
||||
snp_metadata_desc_to_page_type(int desc_type)
|
||||
{
|
||||
@@ -1053,7 +1208,12 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp,
|
||||
exit(1);
|
||||
}
|
||||
|
||||
- ret = snp_launch_update_data(desc->base, hva, desc->len, type);
|
||||
+ if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) {
|
||||
+ ret = snp_launch_update_cpuid(desc->base, hva, desc->len);
|
||||
+ } else {
|
||||
+ ret = snp_launch_update_data(desc->base, hva, desc->len, type);
|
||||
+ }
|
||||
+
|
||||
if (ret) {
|
||||
error_report("%s: Failed to add metadata page gpa 0x%x+%x type %d",
|
||||
__func__, desc->base, desc->len, desc->type);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,127 @@
|
||||
From b2cfd4d89026e76ba86ea7adea323f2c3a588790 Mon Sep 17 00:00:00 2001
|
||||
From: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:31 -0500
|
||||
Subject: [PATCH 071/100] i386/sev: Add support for populating OVMF metadata
|
||||
pages
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [71/91] b563442c0e2f6ea01937425d300b56d9e641fd57 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
OVMF reserves various pages so they can be pre-initialized/validated
|
||||
prior to launching the guest. Add support for populating these pages
|
||||
with the expected content.
|
||||
|
||||
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Co-developed-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-20-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 3d8c2a7f4806ff39423312e503737fd76c34dcae)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 74 insertions(+)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 17281bb2c7..c57534fca2 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -1003,15 +1003,89 @@ sev_launch_finish(SevCommonState *sev_common)
|
||||
migrate_add_blocker(&sev_mig_blocker, &error_fatal);
|
||||
}
|
||||
|
||||
+static int
|
||||
+snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type)
|
||||
+{
|
||||
+ SevLaunchUpdateData *data;
|
||||
+
|
||||
+ data = g_new0(SevLaunchUpdateData, 1);
|
||||
+ data->gpa = gpa;
|
||||
+ data->hva = hva;
|
||||
+ data->len = len;
|
||||
+ data->type = type;
|
||||
+
|
||||
+ QTAILQ_INSERT_TAIL(&launch_update, data, next);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+snp_metadata_desc_to_page_type(int desc_type)
|
||||
+{
|
||||
+ switch (desc_type) {
|
||||
+ /* Add the umeasured prevalidated pages as a zero page */
|
||||
+ case SEV_DESC_TYPE_SNP_SEC_MEM: return KVM_SEV_SNP_PAGE_TYPE_ZERO;
|
||||
+ case SEV_DESC_TYPE_SNP_SECRETS: return KVM_SEV_SNP_PAGE_TYPE_SECRETS;
|
||||
+ case SEV_DESC_TYPE_CPUID: return KVM_SEV_SNP_PAGE_TYPE_CPUID;
|
||||
+ default:
|
||||
+ return KVM_SEV_SNP_PAGE_TYPE_ZERO;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+snp_populate_metadata_pages(SevSnpGuestState *sev_snp,
|
||||
+ OvmfSevMetadata *metadata)
|
||||
+{
|
||||
+ OvmfSevMetadataDesc *desc;
|
||||
+ int type, ret, i;
|
||||
+ void *hva;
|
||||
+ MemoryRegion *mr = NULL;
|
||||
+
|
||||
+ for (i = 0; i < metadata->num_desc; i++) {
|
||||
+ desc = &metadata->descs[i];
|
||||
+
|
||||
+ type = snp_metadata_desc_to_page_type(desc->type);
|
||||
+
|
||||
+ hva = gpa2hva(&mr, desc->base, desc->len, NULL);
|
||||
+ if (!hva) {
|
||||
+ error_report("%s: Failed to get HVA for GPA 0x%x sz 0x%x",
|
||||
+ __func__, desc->base, desc->len);
|
||||
+ exit(1);
|
||||
+ }
|
||||
+
|
||||
+ ret = snp_launch_update_data(desc->base, hva, desc->len, type);
|
||||
+ if (ret) {
|
||||
+ error_report("%s: Failed to add metadata page gpa 0x%x+%x type %d",
|
||||
+ __func__, desc->base, desc->len, desc->type);
|
||||
+ exit(1);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void
|
||||
sev_snp_launch_finish(SevCommonState *sev_common)
|
||||
{
|
||||
int ret, error;
|
||||
Error *local_err = NULL;
|
||||
+ OvmfSevMetadata *metadata;
|
||||
SevLaunchUpdateData *data;
|
||||
SevSnpGuestState *sev_snp = SEV_SNP_GUEST(sev_common);
|
||||
struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf;
|
||||
|
||||
+ /*
|
||||
+ * To boot the SNP guest, the hypervisor is required to populate the CPUID
|
||||
+ * and Secrets page before finalizing the launch flow. The location of
|
||||
+ * the secrets and CPUID page is available through the OVMF metadata GUID.
|
||||
+ */
|
||||
+ metadata = pc_system_get_ovmf_sev_metadata_ptr();
|
||||
+ if (metadata == NULL) {
|
||||
+ error_report("%s: Failed to locate SEV metadata header", __func__);
|
||||
+ exit(1);
|
||||
+ }
|
||||
+
|
||||
+ /* Populate all the metadata pages */
|
||||
+ snp_populate_metadata_pages(sev_snp, metadata);
|
||||
+
|
||||
QTAILQ_FOREACH(data, &launch_update, next) {
|
||||
ret = sev_snp_launch_update(sev_snp, data);
|
||||
if (ret) {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,122 @@
|
||||
From 0f7432f2b968298b64fd243df793b176f67a538f Mon Sep 17 00:00:00 2001
|
||||
From: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:27 -0500
|
||||
Subject: [PATCH 067/100] i386/sev: Add the SNP launch start context
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [67/91] 63759a25a413a7a9a7274fb4c3b8bc2528634855 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
The SNP_LAUNCH_START is called first to create a cryptographic launch
|
||||
context within the firmware.
|
||||
|
||||
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Co-developed-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-16-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit d3107f882ec22cfb211eab7efa0c4e95f5ce11bb)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 39 +++++++++++++++++++++++++++++++++++++++
|
||||
target/i386/trace-events | 1 +
|
||||
2 files changed, 40 insertions(+)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 43d1c48bd9..e89b87d2f5 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -39,6 +39,7 @@
|
||||
#include "confidential-guest.h"
|
||||
#include "hw/i386/pc.h"
|
||||
#include "exec/address-spaces.h"
|
||||
+#include "qemu/queue.h"
|
||||
|
||||
OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON)
|
||||
OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST)
|
||||
@@ -115,6 +116,16 @@ struct SevSnpGuestState {
|
||||
#define DEFAULT_SEV_DEVICE "/dev/sev"
|
||||
#define DEFAULT_SEV_SNP_POLICY 0x30000
|
||||
|
||||
+typedef struct SevLaunchUpdateData {
|
||||
+ QTAILQ_ENTRY(SevLaunchUpdateData) next;
|
||||
+ hwaddr gpa;
|
||||
+ void *hva;
|
||||
+ uint64_t len;
|
||||
+ int type;
|
||||
+} SevLaunchUpdateData;
|
||||
+
|
||||
+static QTAILQ_HEAD(, SevLaunchUpdateData) launch_update;
|
||||
+
|
||||
#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e"
|
||||
typedef struct __attribute__((__packed__)) SevInfoBlock {
|
||||
/* SEV-ES Reset Vector Address */
|
||||
@@ -674,6 +685,31 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int
|
||||
+sev_snp_launch_start(SevCommonState *sev_common)
|
||||
+{
|
||||
+ int fw_error, rc;
|
||||
+ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common);
|
||||
+ struct kvm_sev_snp_launch_start *start = &sev_snp_guest->kvm_start_conf;
|
||||
+
|
||||
+ trace_kvm_sev_snp_launch_start(start->policy,
|
||||
+ sev_snp_guest->guest_visible_workarounds);
|
||||
+
|
||||
+ rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_START,
|
||||
+ start, &fw_error);
|
||||
+ if (rc < 0) {
|
||||
+ error_report("%s: SNP_LAUNCH_START ret=%d fw_error=%d '%s'",
|
||||
+ __func__, rc, fw_error, fw_error_to_str(fw_error));
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
+ QTAILQ_INIT(&launch_update);
|
||||
+
|
||||
+ sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_UPDATE);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int
|
||||
sev_launch_start(SevCommonState *sev_common)
|
||||
{
|
||||
@@ -1003,6 +1039,7 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
}
|
||||
|
||||
ret = klass->launch_start(sev_common);
|
||||
+
|
||||
if (ret) {
|
||||
error_setg(errp, "%s: failed to create encryption context", __func__);
|
||||
return -1;
|
||||
@@ -1794,9 +1831,11 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data)
|
||||
SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
|
||||
X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
|
||||
|
||||
+ klass->launch_start = sev_snp_launch_start;
|
||||
klass->kvm_init = sev_snp_kvm_init;
|
||||
x86_klass->kvm_type = sev_snp_kvm_type;
|
||||
|
||||
+
|
||||
object_class_property_add(oc, "policy", "uint64",
|
||||
sev_snp_guest_get_policy,
|
||||
sev_snp_guest_set_policy, NULL, NULL);
|
||||
diff --git a/target/i386/trace-events b/target/i386/trace-events
|
||||
index 2cd8726eeb..cb26d8a925 100644
|
||||
--- a/target/i386/trace-events
|
||||
+++ b/target/i386/trace-events
|
||||
@@ -11,3 +11,4 @@ kvm_sev_launch_measurement(const char *value) "data %s"
|
||||
kvm_sev_launch_finish(void) ""
|
||||
kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d"
|
||||
kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s"
|
||||
+kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s"
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,237 @@
|
||||
From ec786a1ec0a76775e980862d77500f5196a937e3 Mon Sep 17 00:00:00 2001
|
||||
From: Dov Murik <dovmurik@linux.ibm.com>
|
||||
Date: Thu, 30 May 2024 06:16:35 -0500
|
||||
Subject: [PATCH 080/100] i386/sev: Allow measured direct kernel boot on SNP
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [80/91] 11c629862519c1a279566febf5a537c63c5fcf61 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
In SNP, the hashes page designated with a specific metadata entry
|
||||
published in AmdSev OVMF.
|
||||
|
||||
Therefore, if the user enabled kernel hashes (for measured direct boot),
|
||||
QEMU should prepare the content of hashes table, and during the
|
||||
processing of the metadata entry it copy the content into the designated
|
||||
page and encrypt it.
|
||||
|
||||
Note that in SNP (unlike SEV and SEV-ES) the measurements is done in
|
||||
whole 4KB pages. Therefore QEMU zeros the whole page that includes the
|
||||
hashes table, and fills in the kernel hashes area in that page, and then
|
||||
encrypts the whole page. The rest of the page is reserved for SEV
|
||||
launch secrets which are not usable anyway on SNP.
|
||||
|
||||
If the user disabled kernel hashes, QEMU pre-validates the kernel hashes
|
||||
page as a zero page.
|
||||
|
||||
Signed-off-by: Dov Murik <dovmurik@linux.ibm.com>
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-24-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit c1996992cc882b00139f78067d6a64e2ec9cb0d8)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
include/hw/i386/pc.h | 2 +
|
||||
target/i386/sev.c | 111 ++++++++++++++++++++++++++++++++-----------
|
||||
2 files changed, 85 insertions(+), 28 deletions(-)
|
||||
|
||||
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
|
||||
index 94b49310f5..ee3bfb7be9 100644
|
||||
--- a/include/hw/i386/pc.h
|
||||
+++ b/include/hw/i386/pc.h
|
||||
@@ -175,6 +175,8 @@ typedef enum {
|
||||
SEV_DESC_TYPE_SNP_SECRETS,
|
||||
/* The section contains address that can be used as a CPUID page */
|
||||
SEV_DESC_TYPE_CPUID,
|
||||
+ /* The section contains the region for kernel hashes for measured direct boot */
|
||||
+ SEV_DESC_TYPE_SNP_KERNEL_HASHES = 0x10,
|
||||
|
||||
} ovmf_sev_metadata_desc_type;
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 3fce4c08eb..004c667ac1 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -115,6 +115,10 @@ struct SevCommonStateClass {
|
||||
X86ConfidentialGuestClass parent_class;
|
||||
|
||||
/* public */
|
||||
+ bool (*build_kernel_loader_hashes)(SevCommonState *sev_common,
|
||||
+ SevHashTableDescriptor *area,
|
||||
+ SevKernelLoaderContext *ctx,
|
||||
+ Error **errp);
|
||||
int (*launch_start)(SevCommonState *sev_common);
|
||||
void (*launch_finish)(SevCommonState *sev_common);
|
||||
int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, uint64_t len);
|
||||
@@ -154,6 +158,9 @@ struct SevSnpGuestState {
|
||||
|
||||
struct kvm_sev_snp_launch_start kvm_start_conf;
|
||||
struct kvm_sev_snp_launch_finish kvm_finish_conf;
|
||||
+
|
||||
+ uint32_t kernel_hashes_offset;
|
||||
+ PaddedSevHashTable *kernel_hashes_data;
|
||||
};
|
||||
|
||||
#define DEFAULT_GUEST_POLICY 0x1 /* disable debug */
|
||||
@@ -1189,6 +1196,23 @@ snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, uint32_t cpuid_len)
|
||||
KVM_SEV_SNP_PAGE_TYPE_CPUID);
|
||||
}
|
||||
|
||||
+static int
|
||||
+snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr,
|
||||
+ void *hva, uint32_t len)
|
||||
+{
|
||||
+ int type = KVM_SEV_SNP_PAGE_TYPE_ZERO;
|
||||
+ if (sev_snp->parent_obj.kernel_hashes) {
|
||||
+ assert(sev_snp->kernel_hashes_data);
|
||||
+ assert((sev_snp->kernel_hashes_offset +
|
||||
+ sizeof(*sev_snp->kernel_hashes_data)) <= len);
|
||||
+ memset(hva, 0, len);
|
||||
+ memcpy(hva + sev_snp->kernel_hashes_offset, sev_snp->kernel_hashes_data,
|
||||
+ sizeof(*sev_snp->kernel_hashes_data));
|
||||
+ type = KVM_SEV_SNP_PAGE_TYPE_NORMAL;
|
||||
+ }
|
||||
+ return snp_launch_update_data(addr, hva, len, type);
|
||||
+}
|
||||
+
|
||||
static int
|
||||
snp_metadata_desc_to_page_type(int desc_type)
|
||||
{
|
||||
@@ -1225,6 +1249,9 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp,
|
||||
|
||||
if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) {
|
||||
ret = snp_launch_update_cpuid(desc->base, hva, desc->len);
|
||||
+ } else if (desc->type == SEV_DESC_TYPE_SNP_KERNEL_HASHES) {
|
||||
+ ret = snp_launch_update_kernel_hashes(sev_snp, desc->base, hva,
|
||||
+ desc->len);
|
||||
} else {
|
||||
ret = snp_launch_update_data(desc->base, hva, desc->len, type);
|
||||
}
|
||||
@@ -1823,6 +1850,58 @@ static bool build_kernel_loader_hashes(PaddedSevHashTable *padded_ht,
|
||||
return true;
|
||||
}
|
||||
|
||||
+static bool sev_snp_build_kernel_loader_hashes(SevCommonState *sev_common,
|
||||
+ SevHashTableDescriptor *area,
|
||||
+ SevKernelLoaderContext *ctx,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ /*
|
||||
+ * SNP: Populate the hashes table in an area that later in
|
||||
+ * snp_launch_update_kernel_hashes() will be copied to the guest memory
|
||||
+ * and encrypted.
|
||||
+ */
|
||||
+ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common);
|
||||
+ sev_snp_guest->kernel_hashes_offset = area->base & ~TARGET_PAGE_MASK;
|
||||
+ sev_snp_guest->kernel_hashes_data = g_new0(PaddedSevHashTable, 1);
|
||||
+ return build_kernel_loader_hashes(sev_snp_guest->kernel_hashes_data, ctx, errp);
|
||||
+}
|
||||
+
|
||||
+static bool sev_build_kernel_loader_hashes(SevCommonState *sev_common,
|
||||
+ SevHashTableDescriptor *area,
|
||||
+ SevKernelLoaderContext *ctx,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ PaddedSevHashTable *padded_ht;
|
||||
+ hwaddr mapped_len = sizeof(*padded_ht);
|
||||
+ MemTxAttrs attrs = { 0 };
|
||||
+ bool ret = true;
|
||||
+
|
||||
+ /*
|
||||
+ * Populate the hashes table in the guest's memory at the OVMF-designated
|
||||
+ * area for the SEV hashes table
|
||||
+ */
|
||||
+ padded_ht = address_space_map(&address_space_memory, area->base,
|
||||
+ &mapped_len, true, attrs);
|
||||
+ if (!padded_ht || mapped_len != sizeof(*padded_ht)) {
|
||||
+ error_setg(errp, "SEV: cannot map hashes table guest memory area");
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ if (build_kernel_loader_hashes(padded_ht, ctx, errp)) {
|
||||
+ if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht,
|
||||
+ sizeof(*padded_ht), errp) < 0) {
|
||||
+ ret = false;
|
||||
+ }
|
||||
+ } else {
|
||||
+ ret = false;
|
||||
+ }
|
||||
+
|
||||
+ address_space_unmap(&address_space_memory, padded_ht,
|
||||
+ mapped_len, true, mapped_len);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page
|
||||
* which is included in SEV's initial memory measurement.
|
||||
@@ -1831,11 +1910,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
|
||||
{
|
||||
uint8_t *data;
|
||||
SevHashTableDescriptor *area;
|
||||
- PaddedSevHashTable *padded_ht;
|
||||
- hwaddr mapped_len = sizeof(*padded_ht);
|
||||
- MemTxAttrs attrs = { 0 };
|
||||
- bool ret = true;
|
||||
SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
|
||||
+ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common);
|
||||
|
||||
/*
|
||||
* Only add the kernel hashes if the sev-guest configuration explicitly
|
||||
@@ -1858,30 +1934,7 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
|
||||
return false;
|
||||
}
|
||||
|
||||
- /*
|
||||
- * Populate the hashes table in the guest's memory at the OVMF-designated
|
||||
- * area for the SEV hashes table
|
||||
- */
|
||||
- padded_ht = address_space_map(&address_space_memory, area->base,
|
||||
- &mapped_len, true, attrs);
|
||||
- if (!padded_ht || mapped_len != sizeof(*padded_ht)) {
|
||||
- error_setg(errp, "SEV: cannot map hashes table guest memory area");
|
||||
- return false;
|
||||
- }
|
||||
-
|
||||
- if (build_kernel_loader_hashes(padded_ht, ctx, errp)) {
|
||||
- if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht,
|
||||
- sizeof(*padded_ht), errp) < 0) {
|
||||
- ret = false;
|
||||
- }
|
||||
- } else {
|
||||
- ret = false;
|
||||
- }
|
||||
-
|
||||
- address_space_unmap(&address_space_memory, padded_ht,
|
||||
- mapped_len, true, mapped_len);
|
||||
-
|
||||
- return ret;
|
||||
+ return klass->build_kernel_loader_hashes(sev_common, area, ctx, errp);
|
||||
}
|
||||
|
||||
static char *
|
||||
@@ -1998,6 +2051,7 @@ sev_guest_class_init(ObjectClass *oc, void *data)
|
||||
SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
|
||||
X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
|
||||
|
||||
+ klass->build_kernel_loader_hashes = sev_build_kernel_loader_hashes;
|
||||
klass->launch_start = sev_launch_start;
|
||||
klass->launch_finish = sev_launch_finish;
|
||||
klass->launch_update_data = sev_launch_update_data;
|
||||
@@ -2242,6 +2296,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data)
|
||||
SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
|
||||
X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
|
||||
|
||||
+ klass->build_kernel_loader_hashes = sev_snp_build_kernel_loader_hashes;
|
||||
klass->launch_start = sev_snp_launch_start;
|
||||
klass->launch_finish = sev_snp_launch_finish;
|
||||
klass->launch_update_data = sev_snp_launch_update_data;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,268 @@
|
||||
From ab6197309551bd6ddd9f8239191f68dfac23684b Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Tue, 9 Jul 2024 23:10:05 -0500
|
||||
Subject: [PATCH 090/100] i386/sev: Don't allow automatic fallback to legacy
|
||||
KVM_SEV*_INIT
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [90/91] 2b1345faa56f993bb6e13d63e11656c784e20412 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Currently if the 'legacy-vm-type' property of the sev-guest object is
|
||||
'on', QEMU will attempt to use the newer KVM_SEV_INIT2 kernel
|
||||
interface in conjunction with the newer KVM_X86_SEV_VM and
|
||||
KVM_X86_SEV_ES_VM KVM VM types.
|
||||
|
||||
This can lead to measurement changes if, for instance, an SEV guest was
|
||||
created on a host that originally had an older kernel that didn't
|
||||
support KVM_SEV_INIT2, but is booted on the same host later on after the
|
||||
host kernel was upgraded.
|
||||
|
||||
Instead, if legacy-vm-type is 'off', QEMU should fail if the
|
||||
KVM_SEV_INIT2 interface is not provided by the current host kernel.
|
||||
Modify the fallback handling accordingly.
|
||||
|
||||
In the future, VMSA features and other flags might be added to QEMU
|
||||
which will require legacy-vm-type to be 'off' because they will rely
|
||||
on the newer KVM_SEV_INIT2 interface. It may be difficult to convey to
|
||||
users what values of legacy-vm-type are compatible with which
|
||||
features/options, so as part of this rework, switch legacy-vm-type to a
|
||||
tri-state OnOffAuto option. 'auto' in this case will automatically
|
||||
switch to using the newer KVM_SEV_INIT2, but only if it is required to
|
||||
make use of new VMSA features or other options only available via
|
||||
KVM_SEV_INIT2.
|
||||
|
||||
Defining 'auto' in this way would avoid inadvertantly breaking
|
||||
compatibility with older kernels since it would only be used in cases
|
||||
where users opt into newer features that are only available via
|
||||
KVM_SEV_INIT2 and newer kernels, and provide better default behavior
|
||||
than the legacy-vm-type=off behavior that was previously in place, so
|
||||
make it the default for 9.1+ machine types.
|
||||
|
||||
Cc: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
||||
cc: kvm@vger.kernel.org
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Link: https://lore.kernel.org/r/20240710041005.83720-1-michael.roth@amd.com
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 9d38d9dca2a81aaf5752d45d221021ef96d496cd)
|
||||
|
||||
RHEL: adjust compatiility setting, applying it to 9.4 machine type
|
||||
---
|
||||
hw/i386/pc.c | 2 +-
|
||||
qapi/qom.json | 18 ++++++----
|
||||
target/i386/sev.c | 85 +++++++++++++++++++++++++++++++++++++++--------
|
||||
3 files changed, 83 insertions(+), 22 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index b25d075b59..e9c5ea5d8f 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -352,7 +352,7 @@ const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat);
|
||||
GlobalProperty pc_rhel_9_5_compat[] = {
|
||||
/* pc_rhel_9_5_compat from pc_compat_pc_9_0 (backported from 9.1) */
|
||||
{ TYPE_X86_CPU, "guest-phys-bits", "0" },
|
||||
- { "sev-guest", "legacy-vm-type", "true" },
|
||||
+ { "sev-guest", "legacy-vm-type", "on" },
|
||||
};
|
||||
const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat);
|
||||
|
||||
diff --git a/qapi/qom.json b/qapi/qom.json
|
||||
index 8bd299265e..17bd5a0cf7 100644
|
||||
--- a/qapi/qom.json
|
||||
+++ b/qapi/qom.json
|
||||
@@ -912,12 +912,16 @@
|
||||
# @handle: SEV firmware handle (default: 0)
|
||||
#
|
||||
# @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM.
|
||||
-# The newer KVM_SEV_INIT2 interface syncs additional vCPU
|
||||
-# state when initializing the VMSA structures, which will
|
||||
-# result in a different guest measurement. Set this to
|
||||
-# maintain compatibility with older QEMU or kernel versions
|
||||
-# that rely on legacy KVM_SEV_INIT behavior.
|
||||
-# (default: false) (since 9.1)
|
||||
+# The newer KVM_SEV_INIT2 interface, from Linux >= 6.10, syncs
|
||||
+# additional vCPU state when initializing the VMSA structures,
|
||||
+# which will result in a different guest measurement. Set
|
||||
+# this to 'on' to force compatibility with older QEMU or kernel
|
||||
+# versions that rely on legacy KVM_SEV_INIT behavior. 'auto'
|
||||
+# will behave identically to 'on', but will automatically
|
||||
+# switch to using KVM_SEV_INIT2 if the user specifies any
|
||||
+# additional options that require it. If set to 'off', QEMU
|
||||
+# will require KVM_SEV_INIT2 unconditionally.
|
||||
+# (default: off) (since 9.1)
|
||||
#
|
||||
# Since: 2.12
|
||||
##
|
||||
@@ -927,7 +931,7 @@
|
||||
'*session-file': 'str',
|
||||
'*policy': 'uint32',
|
||||
'*handle': 'uint32',
|
||||
- '*legacy-vm-type': 'bool' } }
|
||||
+ '*legacy-vm-type': 'OnOffAuto' } }
|
||||
|
||||
##
|
||||
# @SevSnpGuestProperties:
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 491fab74fd..b921defb63 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -144,7 +144,7 @@ struct SevGuestState {
|
||||
uint32_t policy;
|
||||
char *dh_cert_file;
|
||||
char *session_file;
|
||||
- bool legacy_vm_type;
|
||||
+ OnOffAuto legacy_vm_type;
|
||||
};
|
||||
|
||||
struct SevSnpGuestState {
|
||||
@@ -1334,6 +1334,17 @@ sev_vm_state_change(void *opaque, bool running, RunState state)
|
||||
}
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * This helper is to examine sev-guest properties and determine if any options
|
||||
+ * have been set which rely on the newer KVM_SEV_INIT2 interface and associated
|
||||
+ * KVM VM types.
|
||||
+ */
|
||||
+static bool sev_init2_required(SevGuestState *sev_guest)
|
||||
+{
|
||||
+ /* Currently no KVM_SEV_INIT2-specific options are exposed via QEMU */
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
static int sev_kvm_type(X86ConfidentialGuest *cg)
|
||||
{
|
||||
SevCommonState *sev_common = SEV_COMMON(cg);
|
||||
@@ -1344,14 +1355,39 @@ static int sev_kvm_type(X86ConfidentialGuest *cg)
|
||||
goto out;
|
||||
}
|
||||
|
||||
+ /* These are the only cases where legacy VM types can be used. */
|
||||
+ if (sev_guest->legacy_vm_type == ON_OFF_AUTO_ON ||
|
||||
+ (sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO &&
|
||||
+ !sev_init2_required(sev_guest))) {
|
||||
+ sev_common->kvm_type = KVM_X86_DEFAULT_VM;
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * Newer VM types are required, either explicitly via legacy-vm-type=on, or
|
||||
+ * implicitly via legacy-vm-type=auto along with additional sev-guest
|
||||
+ * properties that require the newer VM types.
|
||||
+ */
|
||||
kvm_type = (sev_guest->policy & SEV_POLICY_ES) ?
|
||||
KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
|
||||
- if (kvm_is_vm_type_supported(kvm_type) && !sev_guest->legacy_vm_type) {
|
||||
- sev_common->kvm_type = kvm_type;
|
||||
- } else {
|
||||
- sev_common->kvm_type = KVM_X86_DEFAULT_VM;
|
||||
+ if (!kvm_is_vm_type_supported(kvm_type)) {
|
||||
+ if (sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO) {
|
||||
+ error_report("SEV: host kernel does not support requested %s VM type, which is required "
|
||||
+ "for the set of options specified. To allow use of the legacy "
|
||||
+ "KVM_X86_DEFAULT_VM VM type, please disable any options that are not "
|
||||
+ "compatible with the legacy VM type, or upgrade your kernel.",
|
||||
+ kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM");
|
||||
+ } else {
|
||||
+ error_report("SEV: host kernel does not support requested %s VM type. To allow use of "
|
||||
+ "the legacy KVM_X86_DEFAULT_VM VM type, the 'legacy-vm-type' argument "
|
||||
+ "must be set to 'on' or 'auto' for the sev-guest object.",
|
||||
+ kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM");
|
||||
+ }
|
||||
+
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
+ sev_common->kvm_type = kvm_type;
|
||||
out:
|
||||
return sev_common->kvm_type;
|
||||
}
|
||||
@@ -1442,14 +1478,24 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
}
|
||||
|
||||
trace_kvm_sev_init();
|
||||
- if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) {
|
||||
+ switch (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common))) {
|
||||
+ case KVM_X86_DEFAULT_VM:
|
||||
cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT;
|
||||
|
||||
ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error);
|
||||
- } else {
|
||||
+ break;
|
||||
+ case KVM_X86_SEV_VM:
|
||||
+ case KVM_X86_SEV_ES_VM:
|
||||
+ case KVM_X86_SNP_VM: {
|
||||
struct kvm_sev_init args = { 0 };
|
||||
|
||||
ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error);
|
||||
+ break;
|
||||
+ }
|
||||
+ default:
|
||||
+ error_setg(errp, "%s: host kernel does not support the requested SEV configuration.",
|
||||
+ __func__);
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
@@ -2037,14 +2083,23 @@ sev_guest_set_session_file(Object *obj, const char *value, Error **errp)
|
||||
SEV_GUEST(obj)->session_file = g_strdup(value);
|
||||
}
|
||||
|
||||
-static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp)
|
||||
+static void sev_guest_get_legacy_vm_type(Object *obj, Visitor *v,
|
||||
+ const char *name, void *opaque,
|
||||
+ Error **errp)
|
||||
{
|
||||
- return SEV_GUEST(obj)->legacy_vm_type;
|
||||
+ SevGuestState *sev_guest = SEV_GUEST(obj);
|
||||
+ OnOffAuto legacy_vm_type = sev_guest->legacy_vm_type;
|
||||
+
|
||||
+ visit_type_OnOffAuto(v, name, &legacy_vm_type, errp);
|
||||
}
|
||||
|
||||
-static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp)
|
||||
+static void sev_guest_set_legacy_vm_type(Object *obj, Visitor *v,
|
||||
+ const char *name, void *opaque,
|
||||
+ Error **errp)
|
||||
{
|
||||
- SEV_GUEST(obj)->legacy_vm_type = value;
|
||||
+ SevGuestState *sev_guest = SEV_GUEST(obj);
|
||||
+
|
||||
+ visit_type_OnOffAuto(v, name, &sev_guest->legacy_vm_type, errp);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -2070,9 +2125,9 @@ sev_guest_class_init(ObjectClass *oc, void *data)
|
||||
sev_guest_set_session_file);
|
||||
object_class_property_set_description(oc, "session-file",
|
||||
"guest owners session parameters (encoded with base64)");
|
||||
- object_class_property_add_bool(oc, "legacy-vm-type",
|
||||
- sev_guest_get_legacy_vm_type,
|
||||
- sev_guest_set_legacy_vm_type);
|
||||
+ object_class_property_add(oc, "legacy-vm-type", "OnOffAuto",
|
||||
+ sev_guest_get_legacy_vm_type,
|
||||
+ sev_guest_set_legacy_vm_type, NULL, NULL);
|
||||
object_class_property_set_description(oc, "legacy-vm-type",
|
||||
"use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions.");
|
||||
}
|
||||
@@ -2088,6 +2143,8 @@ sev_guest_instance_init(Object *obj)
|
||||
object_property_add_uint32_ptr(obj, "policy", &sev_guest->policy,
|
||||
OBJ_PROP_FLAG_READWRITE);
|
||||
object_apply_compat_props(obj);
|
||||
+
|
||||
+ sev_guest->legacy_vm_type = ON_OFF_AUTO_AUTO;
|
||||
}
|
||||
|
||||
/* guest info specific sev/sev-es */
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,46 @@
|
||||
From ebb3c3536366c383fa09b0987a4efb68d018b7b8 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:24 -0500
|
||||
Subject: [PATCH 064/100] i386/sev: Don't return launch measurements for
|
||||
SEV-SNP guests
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [64/91] 5a29bb2d8b5a07aec6fd271ec37345e665e9cce4 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
For SEV-SNP guests, launch measurement is queried from within the guest
|
||||
during attestation, so don't attempt to return it as part of
|
||||
query-sev-launch-measure.
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-13-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 73ae63b162fc1fed520f53ad200712964d7d0264)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 6525b3c1a0..c3daaf1ad5 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -795,7 +795,9 @@ sev_launch_get_measure(Notifier *notifier, void *unused)
|
||||
|
||||
static char *sev_get_launch_measurement(void)
|
||||
{
|
||||
- SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs);
|
||||
+ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
|
||||
+ SevGuestState *sev_guest =
|
||||
+ (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST);
|
||||
|
||||
if (sev_guest &&
|
||||
SEV_COMMON(sev_guest)->state >= SEV_STATE_LAUNCH_SECRET) {
|
||||
--
|
||||
2.39.3
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue