forked from rpms/qemu-kvm
parent
1e9369d713
commit
daf27afeaa
@ -1 +1 @@
|
||||
SOURCES/qemu-8.2.0.tar.xz
|
||||
SOURCES/qemu-9.0.0.tar.xz
|
||||
|
@ -1 +1 @@
|
||||
1615e59b1bd68324e0819245fe003e33c14a52f9 SOURCES/qemu-8.2.0.tar.xz
|
||||
6699bb03d6da21159b89668bca01c6c958b95d07 SOURCES/qemu-9.0.0.tar.xz
|
||||
|
@ -0,0 +1,121 @@
|
||||
From 59470e8ab849f22b407f55292e540e16a8cad01a Mon Sep 17 00:00:00 2001
|
||||
From: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
Date: Wed, 20 Mar 2024 05:34:32 -0400
|
||||
Subject: Add upstream compatibility bits
|
||||
|
||||
Adding new compats structure for changes introduced during rebase to QEMU 9.0.0.
|
||||
|
||||
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
|
||||
---
|
||||
|
||||
Rebase notes (9.0.0 rc2):
|
||||
- Add aw-bits setting for aarch compat record (overwritten for 9.4 and older)
|
||||
---
|
||||
hw/arm/virt.c | 3 +++
|
||||
hw/core/machine.c | 10 ++++++++++
|
||||
hw/i386/pc_piix.c | 3 ++-
|
||||
hw/i386/pc_q35.c | 3 +++
|
||||
hw/s390x/s390-virtio-ccw.c | 1 +
|
||||
include/hw/boards.h | 3 +++
|
||||
6 files changed, 22 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 22bc345137..f1af9495c6 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -144,6 +144,8 @@ GlobalProperty arm_rhel_compat[] = {
|
||||
{"virtio-net-pci", "romfile", "" },
|
||||
{"virtio-net-pci-transitional", "romfile", "" },
|
||||
{"virtio-net-pci-non-transitional", "romfile", "" },
|
||||
+ /* arm_rhel_compat from arm_virt_compat, added for 9.0.0 rebase */
|
||||
+ { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "48" },
|
||||
};
|
||||
const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat);
|
||||
|
||||
@@ -3728,6 +3730,7 @@ type_init(rhel_machine_init);
|
||||
|
||||
static void rhel940_virt_options(MachineClass *mc)
|
||||
{
|
||||
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_5, hw_compat_rhel_9_5_len);
|
||||
}
|
||||
DEFINE_RHEL_MACHINE_AS_LATEST(9, 4, 0)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 695cb89a46..0f256d9633 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -302,6 +302,16 @@ const size_t hw_compat_2_1_len = G_N_ELEMENTS(hw_compat_2_1);
|
||||
const char *rhel_old_machine_deprecation =
|
||||
"machine types for previous major releases are deprecated";
|
||||
|
||||
+GlobalProperty hw_compat_rhel_9_5[] = {
|
||||
+ /* hw_compat_rhel_9_5 from hw_compat_8_2 */
|
||||
+ { "migration", "zero-page-detection", "legacy"},
|
||||
+ /* hw_compat_rhel_9_5 from hw_compat_8_2 */
|
||||
+ { TYPE_VIRTIO_IOMMU_PCI, "granule", "4k" },
|
||||
+ /* hw_compat_rhel_9_5 from hw_compat_8_2 */
|
||||
+ { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "64" },
|
||||
+};
|
||||
+const size_t hw_compat_rhel_9_5_len = G_N_ELEMENTS(hw_compat_rhel_9_5);
|
||||
+
|
||||
GlobalProperty hw_compat_rhel_9_4[] = {
|
||||
/* hw_compat_rhel_9_4 from hw_compat_8_0 */
|
||||
{ TYPE_VIRTIO_NET, "host_uso", "off"},
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index a647262d63..6b260682eb 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -1015,7 +1015,8 @@ static void pc_machine_rhel760_options(MachineClass *m)
|
||||
object_class_property_set_description(oc, "x-south-bridge",
|
||||
"Use a different south bridge than PIIX3");
|
||||
|
||||
-
|
||||
+ compat_props_add(m->compat_props, hw_compat_rhel_9_5,
|
||||
+ hw_compat_rhel_9_5_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_4,
|
||||
hw_compat_rhel_9_4_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_3,
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index e872dc7e46..2b54944c0f 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -733,6 +733,9 @@ static void pc_q35_machine_rhel940_options(MachineClass *m)
|
||||
m->desc = "RHEL-9.4.0 PC (Q35 + ICH9, 2009)";
|
||||
pcmc->smbios_stream_product = "RHEL";
|
||||
pcmc->smbios_stream_version = "9.4.0";
|
||||
+
|
||||
+ compat_props_add(m->compat_props, hw_compat_rhel_9_5,
|
||||
+ hw_compat_rhel_9_5_len);
|
||||
}
|
||||
|
||||
DEFINE_PC_MACHINE(q35_rhel940, "pc-q35-rhel9.4.0", pc_q35_init_rhel940,
|
||||
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
||||
index ff753a29e0..9ad54682c6 100644
|
||||
--- a/hw/s390x/s390-virtio-ccw.c
|
||||
+++ b/hw/s390x/s390-virtio-ccw.c
|
||||
@@ -1282,6 +1282,7 @@ static void ccw_machine_rhel940_instance_options(MachineState *machine)
|
||||
|
||||
static void ccw_machine_rhel940_class_options(MachineClass *mc)
|
||||
{
|
||||
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_5, hw_compat_rhel_9_5_len);
|
||||
}
|
||||
DEFINE_CCW_MACHINE(rhel940, "rhel9.4.0", true);
|
||||
|
||||
diff --git a/include/hw/boards.h b/include/hw/boards.h
|
||||
index 46b8725c41..cca62f906b 100644
|
||||
--- a/include/hw/boards.h
|
||||
+++ b/include/hw/boards.h
|
||||
@@ -514,6 +514,9 @@ extern const size_t hw_compat_2_2_len;
|
||||
extern GlobalProperty hw_compat_2_1[];
|
||||
extern const size_t hw_compat_2_1_len;
|
||||
|
||||
+extern GlobalProperty hw_compat_rhel_9_5[];
|
||||
+extern const size_t hw_compat_rhel_9_5_len;
|
||||
+
|
||||
extern GlobalProperty hw_compat_rhel_9_4[];
|
||||
extern const size_t hw_compat_rhel_9_4_len;
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,44 +0,0 @@
|
||||
From 3b9b38339346ebfaf3e8ddf0822eba1cc9e78408 Mon Sep 17 00:00:00 2001
|
||||
From: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
Date: Thu, 14 Dec 2023 04:42:01 -0500
|
||||
Subject: Introduce RHEL 9.4.0 qemu-kvm machine type for aarch64
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-17168
|
||||
|
||||
Adding new machine type to support enabling new features.
|
||||
|
||||
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 9 ++++++++-
|
||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index c541efee5e..0b17c94ad7 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3630,14 +3630,21 @@ static void rhel_machine_init(void)
|
||||
}
|
||||
type_init(rhel_machine_init);
|
||||
|
||||
+static void rhel940_virt_options(MachineClass *mc)
|
||||
+{
|
||||
+}
|
||||
+DEFINE_RHEL_MACHINE_AS_LATEST(9, 4, 0)
|
||||
+
|
||||
static void rhel920_virt_options(MachineClass *mc)
|
||||
{
|
||||
+ rhel940_virt_options(mc);
|
||||
+
|
||||
compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
|
||||
}
|
||||
-DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0)
|
||||
+DEFINE_RHEL_MACHINE(9, 2, 0)
|
||||
|
||||
static void rhel900_virt_options(MachineClass *mc)
|
||||
{
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,30 @@
|
||||
From ba574acacf679850e337ec2d5e7836b8277cf393 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Ott <sebott@redhat.com>
|
||||
Date: Thu, 18 Apr 2024 15:04:28 +0200
|
||||
Subject: x86: rhel 9.4.0 machine type compat fix
|
||||
|
||||
Fix up the compatibility for 9.4.0. Ensure that pc-q35-rhel9.4.0
|
||||
still uses SMBIOS 3.X by default.
|
||||
|
||||
Signed-off-by: Sebastian Ott <sebott@redhat.com>
|
||||
---
|
||||
hw/i386/pc_q35.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index 2b54944c0f..2f11f9af7d 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -734,6 +734,9 @@ static void pc_q35_machine_rhel940_options(MachineClass *m)
|
||||
pcmc->smbios_stream_product = "RHEL";
|
||||
pcmc->smbios_stream_version = "9.4.0";
|
||||
|
||||
+ /* From pc_q35_8_2_machine_options() - use SMBIOS 3.X by default */
|
||||
+ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64;
|
||||
+
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_5,
|
||||
hw_compat_rhel_9_5_len);
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,37 +0,0 @@
|
||||
From 363d6aedc82314a70bdfbe9fa23b7e8fdda50138 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Thu, 11 Jan 2024 12:26:19 -0500
|
||||
Subject: [PATCH 066/101] Compile IOMMUFD object on aarch64
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [65/67] 9358030fdd499c5fe122dee3bb4f114966fac9c2 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Upstream: RHEL only
|
||||
|
||||
Compiles the IOMMUFD object on aarch64 to be able to use
|
||||
the IOMMUFD VFIO backend.
|
||||
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
|
||||
index aec1831199..b0191d3c69 100644
|
||||
--- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
|
||||
+++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
|
||||
@@ -39,3 +39,4 @@ CONFIG_PXB=y
|
||||
CONFIG_VHOST_VSOCK=y
|
||||
CONFIG_VHOST_USER_VSOCK=y
|
||||
CONFIG_VHOST_USER_FS=y
|
||||
+CONFIG_IOMMUFD=y
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,37 +0,0 @@
|
||||
From c1e9ddf8d0ea6d358fcaa5cacd3a91920f36e73b Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Thu, 11 Jan 2024 12:33:17 -0500
|
||||
Subject: [PATCH 067/101] Compile IOMMUFD on s390x
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [66/67] d3004aafca2bb76d817ac99c3d65973b8fbd4557 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Upstream: RHEL only
|
||||
|
||||
Compiles the IOMMUFD object on s390x to be able to use
|
||||
the IOMMUFD VFIO backend.
|
||||
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
configs/devices/s390x-softmmu/s390x-rh-devices.mak | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak
|
||||
index 69a799adbd..24cf6dbd03 100644
|
||||
--- a/configs/devices/s390x-softmmu/s390x-rh-devices.mak
|
||||
+++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak
|
||||
@@ -16,3 +16,4 @@ CONFIG_WDT_DIAG288=y
|
||||
CONFIG_VHOST_VSOCK=y
|
||||
CONFIG_VHOST_USER_VSOCK=y
|
||||
CONFIG_VHOST_USER_FS=y
|
||||
+CONFIG_IOMMUFD=y
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,37 +0,0 @@
|
||||
From be2c3d9bbee1bdec061c901f507bc999fa40a53e Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Thu, 11 Jan 2024 12:34:44 -0500
|
||||
Subject: [PATCH 068/101] Compile IOMMUFD on x86_64
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [67/67] 411d48a5cc7ce1f05be793fd9a89c143ce34c91a (eauger1/centos-qemu-kvm)
|
||||
|
||||
Upstream: RHEL only
|
||||
|
||||
Compiles the IOMMUFD object on s390x to be able to use
|
||||
the IOMMUFD VFIO backend.
|
||||
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
|
||||
index ce5be73633..ba41108e0c 100644
|
||||
--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
|
||||
+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
|
||||
@@ -108,3 +108,4 @@ CONFIG_SGX=y
|
||||
CONFIG_VHOST_VSOCK=y
|
||||
CONFIG_VHOST_USER_VSOCK=y
|
||||
CONFIG_VHOST_USER_FS=y
|
||||
+CONFIG_IOMMUFD=y
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,128 +0,0 @@
|
||||
From ef212eb3026a2460f6502a76afa3baedeb74d975 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= <marcandre.lureau@redhat.com>
|
||||
Date: Tue, 6 Aug 2024 14:14:27 +0400
|
||||
Subject: [PATCH 1/5] Fix scanout version with pc-q35-rhel9.4.0
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
RH-MergeRequest: 383: Fix scanout version with pc-q35-rhel9.4.0
|
||||
RH-Jira: RHEL-53565
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/1] ed2860cfb933654b0046c1b59f78d2e50146bd6c
|
||||
|
||||
Resolves: https://issues.redhat.com/browse/RHEL-52940
|
||||
|
||||
Introduce hw_compat_rhel_9_4_extra so that pc-q35-rhel9.4.0 has
|
||||
x-scanout-vmstate-version=1
|
||||
|
||||
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 1 +
|
||||
hw/core/machine.c | 13 +++++++++++--
|
||||
hw/i386/pc_piix.c | 2 ++
|
||||
hw/i386/pc_q35.c | 3 +++
|
||||
hw/s390x/s390-virtio-ccw.c | 1 +
|
||||
include/hw/boards.h | 3 +++
|
||||
6 files changed, 21 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index e4a66affcb..851eff6b28 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3670,6 +3670,7 @@ type_init(rhel_machine_init);
|
||||
|
||||
static void rhel940_virt_options(MachineClass *mc)
|
||||
{
|
||||
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_4_extra, hw_compat_rhel_9_4_extra_len);
|
||||
}
|
||||
DEFINE_RHEL_MACHINE_AS_LATEST(9, 4, 0)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index c8c460c916..fe0a28ef3b 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -64,6 +64,17 @@ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
|
||||
const char *rhel_old_machine_deprecation =
|
||||
"machine types for previous major releases are deprecated";
|
||||
|
||||
+/*
|
||||
+ * rhel_9_4_extra is used for <= 9.4 machines.
|
||||
+ *
|
||||
+ * (for compatibility and historical reasons, rhel_9_4 is used for <= 9.2 too)
|
||||
+ */
|
||||
+GlobalProperty hw_compat_rhel_9_4_extra[] = {
|
||||
+ /* hw_compat_rhel_9_4_extra from hw_compat_8_2 */
|
||||
+ { "virtio-gpu-device", "x-scanout-vmstate-version", "1" },
|
||||
+};
|
||||
+const size_t hw_compat_rhel_9_4_extra_len = G_N_ELEMENTS(hw_compat_rhel_9_4_extra);
|
||||
+
|
||||
GlobalProperty hw_compat_rhel_9_4[] = {
|
||||
/* hw_compat_rhel_9_4 from hw_compat_8_0 */
|
||||
{ TYPE_VIRTIO_NET, "host_uso", "off"},
|
||||
@@ -81,8 +92,6 @@ GlobalProperty hw_compat_rhel_9_4[] = {
|
||||
{ "igb", "x-pcie-flr-init", "off" },
|
||||
/* hw_compat_rhel_9_4 jira RHEL-24045 */
|
||||
{ "virtio-mem", "dynamic-memslots", "off" },
|
||||
- /* hw_compat_rhel_9_4 from hw_compat_8_1 */
|
||||
- { "virtio-gpu-device", "x-scanout-vmstate-version", "1" },
|
||||
};
|
||||
const size_t hw_compat_rhel_9_4_len = G_N_ELEMENTS(hw_compat_rhel_9_4);
|
||||
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index 54d1c58bce..c846673d87 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -1031,6 +1031,8 @@ static void pc_machine_rhel760_options(MachineClass *m)
|
||||
"Use a different south bridge than PIIX3");
|
||||
|
||||
|
||||
+ compat_props_add(m->compat_props, hw_compat_rhel_9_4_extra,
|
||||
+ hw_compat_rhel_9_4_extra_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_4,
|
||||
hw_compat_rhel_9_4_len);
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_3,
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index cd5fb7380e..02bc3d515f 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -731,6 +731,9 @@ static void pc_q35_machine_rhel940_options(MachineClass *m)
|
||||
m->desc = "RHEL-9.4.0 PC (Q35 + ICH9, 2009)";
|
||||
pcmc->smbios_stream_product = "RHEL";
|
||||
pcmc->smbios_stream_version = "9.4.0";
|
||||
+
|
||||
+ compat_props_add(m->compat_props, hw_compat_rhel_9_4_extra,
|
||||
+ hw_compat_rhel_9_4_extra_len);
|
||||
}
|
||||
|
||||
DEFINE_PC_MACHINE(q35_rhel940, "pc-q35-rhel9.4.0", pc_q35_init_rhel940,
|
||||
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
||||
index 24f4773179..cc6087c37a 100644
|
||||
--- a/hw/s390x/s390-virtio-ccw.c
|
||||
+++ b/hw/s390x/s390-virtio-ccw.c
|
||||
@@ -1277,6 +1277,7 @@ static void ccw_machine_rhel940_instance_options(MachineState *machine)
|
||||
|
||||
static void ccw_machine_rhel940_class_options(MachineClass *mc)
|
||||
{
|
||||
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_4_extra, hw_compat_rhel_9_4_extra_len);
|
||||
}
|
||||
DEFINE_CCW_MACHINE(rhel940, "rhel9.4.0", true);
|
||||
|
||||
diff --git a/include/hw/boards.h b/include/hw/boards.h
|
||||
index 4edfdb0ddb..e6ae0e61cf 100644
|
||||
--- a/include/hw/boards.h
|
||||
+++ b/include/hw/boards.h
|
||||
@@ -505,6 +505,9 @@ extern const size_t hw_compat_2_2_len;
|
||||
extern GlobalProperty hw_compat_2_1[];
|
||||
extern const size_t hw_compat_2_1_len;
|
||||
|
||||
+extern GlobalProperty hw_compat_rhel_9_4_extra[];
|
||||
+extern const size_t hw_compat_rhel_9_4_extra_len;
|
||||
+
|
||||
extern GlobalProperty hw_compat_rhel_9_4[];
|
||||
extern const size_t hw_compat_rhel_9_4_len;
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,139 @@
|
||||
From 93ea86ac8849ad9ca365b1646313dde9a34ba59c Mon Sep 17 00:00:00 2001
|
||||
From: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Date: Wed, 20 Mar 2024 03:39:03 -0500
|
||||
Subject: [PATCH 031/100] HostMem: Add mechanism to opt in kvm guest memfd via
|
||||
MachineState
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [31/91] 43ce32aef954479cdb736301d1adcb919602c321 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Add a new member "guest_memfd" to memory backends. When it's set
|
||||
to true, it enables RAM_GUEST_MEMFD in ram_flags, thus private kvm
|
||||
guest_memfd will be allocated during RAMBlock allocation.
|
||||
|
||||
Memory backend's @guest_memfd is wired with @require_guest_memfd
|
||||
field of MachineState. It avoid looking up the machine in phymem.c.
|
||||
|
||||
MachineState::require_guest_memfd is supposed to be set by any VMs
|
||||
that requires KVM guest memfd as private memory, e.g., TDX VM.
|
||||
|
||||
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Message-ID: <20240320083945.991426-8-michael.roth@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 37662d85b0b7dded0ebdf6747bef6c3bb7ed6a0c)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
backends/hostmem-file.c | 1 +
|
||||
backends/hostmem-memfd.c | 1 +
|
||||
backends/hostmem-ram.c | 1 +
|
||||
backends/hostmem.c | 1 +
|
||||
hw/core/machine.c | 5 +++++
|
||||
include/hw/boards.h | 2 ++
|
||||
include/sysemu/hostmem.h | 1 +
|
||||
7 files changed, 12 insertions(+)
|
||||
|
||||
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
|
||||
index ac3e433cbd..3c69db7946 100644
|
||||
--- a/backends/hostmem-file.c
|
||||
+++ b/backends/hostmem-file.c
|
||||
@@ -85,6 +85,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
|
||||
ram_flags |= fb->readonly ? RAM_READONLY_FD : 0;
|
||||
ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0;
|
||||
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
|
||||
+ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
|
||||
ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
|
||||
ram_flags |= RAM_NAMED_FILE;
|
||||
return memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
|
||||
diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c
|
||||
index 3923ea9364..745ead0034 100644
|
||||
--- a/backends/hostmem-memfd.c
|
||||
+++ b/backends/hostmem-memfd.c
|
||||
@@ -55,6 +55,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
|
||||
name = host_memory_backend_get_name(backend);
|
||||
ram_flags = backend->share ? RAM_SHARED : 0;
|
||||
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
|
||||
+ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
|
||||
return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
|
||||
backend->size, ram_flags, fd, 0, errp);
|
||||
}
|
||||
diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c
|
||||
index d121249f0f..f7d81af783 100644
|
||||
--- a/backends/hostmem-ram.c
|
||||
+++ b/backends/hostmem-ram.c
|
||||
@@ -30,6 +30,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
|
||||
name = host_memory_backend_get_name(backend);
|
||||
ram_flags = backend->share ? RAM_SHARED : 0;
|
||||
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
|
||||
+ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
|
||||
return memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend),
|
||||
name, backend->size,
|
||||
ram_flags, errp);
|
||||
diff --git a/backends/hostmem.c b/backends/hostmem.c
|
||||
index 81a72ce40b..eb9682b4a8 100644
|
||||
--- a/backends/hostmem.c
|
||||
+++ b/backends/hostmem.c
|
||||
@@ -277,6 +277,7 @@ static void host_memory_backend_init(Object *obj)
|
||||
/* TODO: convert access to globals to compat properties */
|
||||
backend->merge = machine_mem_merge(machine);
|
||||
backend->dump = machine_dump_guest_core(machine);
|
||||
+ backend->guest_memfd = machine_require_guest_memfd(machine);
|
||||
backend->reserve = true;
|
||||
backend->prealloc_threads = machine->smp.cpus;
|
||||
}
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 92609aae27..07b994e136 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -1480,6 +1480,11 @@ bool machine_mem_merge(MachineState *machine)
|
||||
return machine->mem_merge;
|
||||
}
|
||||
|
||||
+bool machine_require_guest_memfd(MachineState *machine)
|
||||
+{
|
||||
+ return machine->require_guest_memfd;
|
||||
+}
|
||||
+
|
||||
static char *cpu_slot_to_string(const CPUArchId *cpu)
|
||||
{
|
||||
GString *s = g_string_new(NULL);
|
||||
diff --git a/include/hw/boards.h b/include/hw/boards.h
|
||||
index cca62f906b..815a1c4b26 100644
|
||||
--- a/include/hw/boards.h
|
||||
+++ b/include/hw/boards.h
|
||||
@@ -36,6 +36,7 @@ bool machine_usb(MachineState *machine);
|
||||
int machine_phandle_start(MachineState *machine);
|
||||
bool machine_dump_guest_core(MachineState *machine);
|
||||
bool machine_mem_merge(MachineState *machine);
|
||||
+bool machine_require_guest_memfd(MachineState *machine);
|
||||
HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine);
|
||||
void machine_set_cpu_numa_node(MachineState *machine,
|
||||
const CpuInstanceProperties *props,
|
||||
@@ -372,6 +373,7 @@ struct MachineState {
|
||||
char *dt_compatible;
|
||||
bool dump_guest_core;
|
||||
bool mem_merge;
|
||||
+ bool require_guest_memfd;
|
||||
bool usb;
|
||||
bool usb_disabled;
|
||||
char *firmware;
|
||||
diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h
|
||||
index 0e411aaa29..04b884bf42 100644
|
||||
--- a/include/sysemu/hostmem.h
|
||||
+++ b/include/sysemu/hostmem.h
|
||||
@@ -74,6 +74,7 @@ struct HostMemoryBackend {
|
||||
uint64_t size;
|
||||
bool merge, dump, use_canonical_path;
|
||||
bool prealloc, is_mapped, share, reserve;
|
||||
+ bool guest_memfd;
|
||||
uint32_t prealloc_threads;
|
||||
ThreadContext *prealloc_context;
|
||||
DECLARE_BITMAP(host_nodes, MAX_NODES + 1);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,155 +0,0 @@
|
||||
From 5c639f8ce65183ce8e44ee8e0230e9d627a440d7 Mon Sep 17 00:00:00 2001
|
||||
From: Igor Mammedov <imammedo@redhat.com>
|
||||
Date: Wed, 21 Feb 2024 17:00:27 +0000
|
||||
Subject: [PATCH 05/20] Implement SMBIOS type 9 v2.6
|
||||
|
||||
RH-Author: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS
|
||||
RH-Jira: RHEL-21705
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Acked-by: Ani Sinha <None>
|
||||
RH-Commit: [3/18] ead230527d93938907a561cf5b985ee4f54d82b1
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-21705
|
||||
Author: Felix Wu <flwu@google.com>
|
||||
|
||||
Signed-off-by: Felix Wu <flwu@google.com>
|
||||
Signed-off-by: Nabih Estefan <nabihestefan@google.com>
|
||||
Message-Id: <20240221170027.1027325-3-nabihestefan@google.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
|
||||
(cherry picked from commit 04f143d828845d0fd52dd4a52664d81a4f5431f7)
|
||||
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
|
||||
---
|
||||
hw/smbios/smbios.c | 49 +++++++++++++++++++++++++++++++++---
|
||||
include/hw/firmware/smbios.h | 4 +++
|
||||
qemu-options.hx | 2 +-
|
||||
3 files changed, 51 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
|
||||
index 4f5637d445..074705fa4c 100644
|
||||
--- a/hw/smbios/smbios.c
|
||||
+++ b/hw/smbios/smbios.c
|
||||
@@ -124,7 +124,7 @@ static QTAILQ_HEAD(, type8_instance) type8 = QTAILQ_HEAD_INITIALIZER(type8);
|
||||
|
||||
/* type 9 instance for parsing */
|
||||
struct type9_instance {
|
||||
- const char *slot_designation;
|
||||
+ const char *slot_designation, *pcidev;
|
||||
uint8_t slot_type, slot_data_bus_width, current_usage, slot_length,
|
||||
slot_characteristics1, slot_characteristics2;
|
||||
uint16_t slot_id;
|
||||
@@ -427,6 +427,11 @@ static const QemuOptDesc qemu_smbios_type9_opts[] = {
|
||||
.type = QEMU_OPT_NUMBER,
|
||||
.help = "slot characteristics2, see the spec",
|
||||
},
|
||||
+ {
|
||||
+ .name = "pci_device",
|
||||
+ .type = QEMU_OPT_STRING,
|
||||
+ .help = "PCI device, if provided."
|
||||
+ }
|
||||
};
|
||||
|
||||
static const QemuOptDesc qemu_smbios_type11_opts[] = {
|
||||
@@ -851,7 +856,7 @@ static void smbios_build_type_8_table(void)
|
||||
}
|
||||
}
|
||||
|
||||
-static void smbios_build_type_9_table(void)
|
||||
+static void smbios_build_type_9_table(Error **errp)
|
||||
{
|
||||
unsigned instance = 0;
|
||||
struct type9_instance *t9;
|
||||
@@ -868,6 +873,43 @@ static void smbios_build_type_9_table(void)
|
||||
t->slot_characteristics1 = t9->slot_characteristics1;
|
||||
t->slot_characteristics2 = t9->slot_characteristics2;
|
||||
|
||||
+ if (t9->pcidev) {
|
||||
+ PCIDevice *pdev = NULL;
|
||||
+ int rc = pci_qdev_find_device(t9->pcidev, &pdev);
|
||||
+ if (rc != 0) {
|
||||
+ error_setg(errp,
|
||||
+ "No PCI device %s for SMBIOS type 9 entry %s",
|
||||
+ t9->pcidev, t9->slot_designation);
|
||||
+ return;
|
||||
+ }
|
||||
+ /*
|
||||
+ * We only handle the case were the device is attached to
|
||||
+ * the PCI root bus. The general case is more complex as
|
||||
+ * bridges are enumerated later and the table would need
|
||||
+ * to be updated at this moment.
|
||||
+ */
|
||||
+ if (!pci_bus_is_root(pci_get_bus(pdev))) {
|
||||
+ error_setg(errp,
|
||||
+ "Cannot create type 9 entry for PCI device %s: "
|
||||
+ "not attached to the root bus",
|
||||
+ t9->pcidev);
|
||||
+ return;
|
||||
+ }
|
||||
+ t->segment_group_number = cpu_to_le16(0);
|
||||
+ t->bus_number = pci_dev_bus_num(pdev);
|
||||
+ t->device_number = pdev->devfn;
|
||||
+ } else {
|
||||
+ /*
|
||||
+ * Per SMBIOS spec, For slots that are not of the PCI, AGP, PCI-X,
|
||||
+ * or PCI-Express type that do not have bus/device/function
|
||||
+ * information, 0FFh should be populated in the fields of Segment
|
||||
+ * Group Number, Bus Number, Device/Function Number.
|
||||
+ */
|
||||
+ t->segment_group_number = 0xff;
|
||||
+ t->bus_number = 0xff;
|
||||
+ t->device_number = 0xff;
|
||||
+ }
|
||||
+
|
||||
SMBIOS_BUILD_TABLE_POST;
|
||||
instance++;
|
||||
}
|
||||
@@ -1222,7 +1264,7 @@ void smbios_get_tables(MachineState *ms,
|
||||
}
|
||||
|
||||
smbios_build_type_8_table();
|
||||
- smbios_build_type_9_table();
|
||||
+ smbios_build_type_9_table(errp);
|
||||
smbios_build_type_11_table();
|
||||
|
||||
#define MAX_DIMM_SZ (16 * GiB)
|
||||
@@ -1568,6 +1610,7 @@ void smbios_entry_add(QemuOpts *opts, Error **errp)
|
||||
t->slot_id = qemu_opt_get_number(opts, "slot_id", 0);
|
||||
t->slot_characteristics1 = qemu_opt_get_number(opts, "slot_characteristics1", 0);
|
||||
t->slot_characteristics2 = qemu_opt_get_number(opts, "slot_characteristics2", 0);
|
||||
+ save_opt(&t->pcidev, opts, "pcidev");
|
||||
QTAILQ_INSERT_TAIL(&type9, t, next);
|
||||
return;
|
||||
}
|
||||
diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h
|
||||
index 6bbd5a4c20..f8dd07fe4c 100644
|
||||
--- a/include/hw/firmware/smbios.h
|
||||
+++ b/include/hw/firmware/smbios.h
|
||||
@@ -222,6 +222,10 @@ struct smbios_type_9 {
|
||||
uint16_t slot_id;
|
||||
uint8_t slot_characteristics1;
|
||||
uint8_t slot_characteristics2;
|
||||
+ /* SMBIOS spec v2.6+ */
|
||||
+ uint16_t segment_group_number;
|
||||
+ uint8_t bus_number;
|
||||
+ uint8_t device_number;
|
||||
} QEMU_PACKED;
|
||||
|
||||
/* SMBIOS type 11 - OEM strings */
|
||||
diff --git a/qemu-options.hx b/qemu-options.hx
|
||||
index 94cacc2c63..93364e1765 100644
|
||||
--- a/qemu-options.hx
|
||||
+++ b/qemu-options.hx
|
||||
@@ -2710,7 +2710,7 @@ SRST
|
||||
``-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str][,asset=str][,part=str][,processor-id=%d]``
|
||||
Specify SMBIOS type 4 fields
|
||||
|
||||
-``-smbios type=9[,slot_designation=str][,slot_type=%d][,slot_data_bus_width=%d][,current_usage=%d][,slot_length=%d][,slot_id=%d][,slot_characteristics1=%d][,slot_characteristics12=%d]``
|
||||
+``-smbios type=9[,slot_designation=str][,slot_type=%d][,slot_data_bus_width=%d][,current_usage=%d][,slot_length=%d][,slot_id=%d][,slot_characteristics1=%d][,slot_characteristics12=%d][,pci_device=str]``
|
||||
Specify SMBIOS type 9 fields
|
||||
|
||||
``-smbios type=11[,value=str][,path=filename]``
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,218 +0,0 @@
|
||||
From 84fc607d678bd72397a41d706e91fa241fd97266 Mon Sep 17 00:00:00 2001
|
||||
From: Igor Mammedov <imammedo@redhat.com>
|
||||
Date: Wed, 21 Feb 2024 17:00:26 +0000
|
||||
Subject: [PATCH 04/20] Implement base of SMBIOS type 9 descriptor.
|
||||
|
||||
RH-Author: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS
|
||||
RH-Jira: RHEL-21705
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Acked-by: Ani Sinha <None>
|
||||
RH-Commit: [2/18] 2678cc080bfbf3357fa2f94ceaf42fc61b690d32
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-21705
|
||||
|
||||
commit: 735eee07d1f963635d3c3bf9f5e4bf1bc000870e
|
||||
Author: Felix Wu <flwu@google.com>
|
||||
|
||||
Version 2.1+.
|
||||
|
||||
Signed-off-by: Felix Wu <flwu@google.com>
|
||||
Signed-off-by: Nabih Estefan <nabihestefan@google.com>
|
||||
Message-Id: <20240221170027.1027325-2-nabihestefan@google.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
|
||||
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
|
||||
---
|
||||
hw/smbios/smbios.c | 99 ++++++++++++++++++++++++++++++++++++
|
||||
include/hw/firmware/smbios.h | 13 +++++
|
||||
qemu-options.hx | 3 ++
|
||||
3 files changed, 115 insertions(+)
|
||||
|
||||
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
|
||||
index 7bde23e59d..4f5637d445 100644
|
||||
--- a/hw/smbios/smbios.c
|
||||
+++ b/hw/smbios/smbios.c
|
||||
@@ -122,6 +122,16 @@ struct type8_instance {
|
||||
};
|
||||
static QTAILQ_HEAD(, type8_instance) type8 = QTAILQ_HEAD_INITIALIZER(type8);
|
||||
|
||||
+/* type 9 instance for parsing */
|
||||
+struct type9_instance {
|
||||
+ const char *slot_designation;
|
||||
+ uint8_t slot_type, slot_data_bus_width, current_usage, slot_length,
|
||||
+ slot_characteristics1, slot_characteristics2;
|
||||
+ uint16_t slot_id;
|
||||
+ QTAILQ_ENTRY(type9_instance) next;
|
||||
+};
|
||||
+static QTAILQ_HEAD(, type9_instance) type9 = QTAILQ_HEAD_INITIALIZER(type9);
|
||||
+
|
||||
static struct {
|
||||
size_t nvalues;
|
||||
char **values;
|
||||
@@ -371,6 +381,54 @@ static const QemuOptDesc qemu_smbios_type8_opts[] = {
|
||||
},
|
||||
};
|
||||
|
||||
+static const QemuOptDesc qemu_smbios_type9_opts[] = {
|
||||
+ {
|
||||
+ .name = "type",
|
||||
+ .type = QEMU_OPT_NUMBER,
|
||||
+ .help = "SMBIOS element type",
|
||||
+ },
|
||||
+ {
|
||||
+ .name = "slot_designation",
|
||||
+ .type = QEMU_OPT_STRING,
|
||||
+ .help = "string number for reference designation",
|
||||
+ },
|
||||
+ {
|
||||
+ .name = "slot_type",
|
||||
+ .type = QEMU_OPT_NUMBER,
|
||||
+ .help = "connector type",
|
||||
+ },
|
||||
+ {
|
||||
+ .name = "slot_data_bus_width",
|
||||
+ .type = QEMU_OPT_NUMBER,
|
||||
+ .help = "port type",
|
||||
+ },
|
||||
+ {
|
||||
+ .name = "current_usage",
|
||||
+ .type = QEMU_OPT_NUMBER,
|
||||
+ .help = "current usage",
|
||||
+ },
|
||||
+ {
|
||||
+ .name = "slot_length",
|
||||
+ .type = QEMU_OPT_NUMBER,
|
||||
+ .help = "system slot length",
|
||||
+ },
|
||||
+ {
|
||||
+ .name = "slot_id",
|
||||
+ .type = QEMU_OPT_NUMBER,
|
||||
+ .help = "system slot id",
|
||||
+ },
|
||||
+ {
|
||||
+ .name = "slot_characteristics1",
|
||||
+ .type = QEMU_OPT_NUMBER,
|
||||
+ .help = "slot characteristics1, see the spec",
|
||||
+ },
|
||||
+ {
|
||||
+ .name = "slot_characteristics2",
|
||||
+ .type = QEMU_OPT_NUMBER,
|
||||
+ .help = "slot characteristics2, see the spec",
|
||||
+ },
|
||||
+};
|
||||
+
|
||||
static const QemuOptDesc qemu_smbios_type11_opts[] = {
|
||||
{
|
||||
.name = "value",
|
||||
@@ -594,6 +652,7 @@ bool smbios_skip_table(uint8_t type, bool required_table)
|
||||
#define T2_BASE 0x200
|
||||
#define T3_BASE 0x300
|
||||
#define T4_BASE 0x400
|
||||
+#define T9_BASE 0x900
|
||||
#define T11_BASE 0xe00
|
||||
|
||||
#define T16_BASE 0x1000
|
||||
@@ -792,6 +851,28 @@ static void smbios_build_type_8_table(void)
|
||||
}
|
||||
}
|
||||
|
||||
+static void smbios_build_type_9_table(void)
|
||||
+{
|
||||
+ unsigned instance = 0;
|
||||
+ struct type9_instance *t9;
|
||||
+
|
||||
+ QTAILQ_FOREACH(t9, &type9, next) {
|
||||
+ SMBIOS_BUILD_TABLE_PRE(9, T9_BASE + instance, true);
|
||||
+
|
||||
+ SMBIOS_TABLE_SET_STR(9, slot_designation, t9->slot_designation);
|
||||
+ t->slot_type = t9->slot_type;
|
||||
+ t->slot_data_bus_width = t9->slot_data_bus_width;
|
||||
+ t->current_usage = t9->current_usage;
|
||||
+ t->slot_length = t9->slot_length;
|
||||
+ t->slot_id = t9->slot_id;
|
||||
+ t->slot_characteristics1 = t9->slot_characteristics1;
|
||||
+ t->slot_characteristics2 = t9->slot_characteristics2;
|
||||
+
|
||||
+ SMBIOS_BUILD_TABLE_POST;
|
||||
+ instance++;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void smbios_build_type_11_table(void)
|
||||
{
|
||||
char count_str[128];
|
||||
@@ -1141,6 +1222,7 @@ void smbios_get_tables(MachineState *ms,
|
||||
}
|
||||
|
||||
smbios_build_type_8_table();
|
||||
+ smbios_build_type_9_table();
|
||||
smbios_build_type_11_table();
|
||||
|
||||
#define MAX_DIMM_SZ (16 * GiB)
|
||||
@@ -1472,6 +1554,23 @@ void smbios_entry_add(QemuOpts *opts, Error **errp)
|
||||
t8_i->port_type = qemu_opt_get_number(opts, "port_type", 0);
|
||||
QTAILQ_INSERT_TAIL(&type8, t8_i, next);
|
||||
return;
|
||||
+ case 9: {
|
||||
+ if (!qemu_opts_validate(opts, qemu_smbios_type9_opts, errp)) {
|
||||
+ return;
|
||||
+ }
|
||||
+ struct type9_instance *t;
|
||||
+ t = g_new0(struct type9_instance, 1);
|
||||
+ save_opt(&t->slot_designation, opts, "slot_designation");
|
||||
+ t->slot_type = qemu_opt_get_number(opts, "slot_type", 0);
|
||||
+ t->slot_data_bus_width = qemu_opt_get_number(opts, "slot_data_bus_width", 0);
|
||||
+ t->current_usage = qemu_opt_get_number(opts, "current_usage", 0);
|
||||
+ t->slot_length = qemu_opt_get_number(opts, "slot_length", 0);
|
||||
+ t->slot_id = qemu_opt_get_number(opts, "slot_id", 0);
|
||||
+ t->slot_characteristics1 = qemu_opt_get_number(opts, "slot_characteristics1", 0);
|
||||
+ t->slot_characteristics2 = qemu_opt_get_number(opts, "slot_characteristics2", 0);
|
||||
+ QTAILQ_INSERT_TAIL(&type9, t, next);
|
||||
+ return;
|
||||
+ }
|
||||
case 11:
|
||||
if (!qemu_opts_validate(opts, qemu_smbios_type11_opts, errp)) {
|
||||
return;
|
||||
diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h
|
||||
index d24b3ccd32..6bbd5a4c20 100644
|
||||
--- a/include/hw/firmware/smbios.h
|
||||
+++ b/include/hw/firmware/smbios.h
|
||||
@@ -211,6 +211,19 @@ struct smbios_type_8 {
|
||||
uint8_t port_type;
|
||||
} QEMU_PACKED;
|
||||
|
||||
+/* SMBIOS type 9 - System Slots (v2.1+) */
|
||||
+struct smbios_type_9 {
|
||||
+ struct smbios_structure_header header;
|
||||
+ uint8_t slot_designation;
|
||||
+ uint8_t slot_type;
|
||||
+ uint8_t slot_data_bus_width;
|
||||
+ uint8_t current_usage;
|
||||
+ uint8_t slot_length;
|
||||
+ uint16_t slot_id;
|
||||
+ uint8_t slot_characteristics1;
|
||||
+ uint8_t slot_characteristics2;
|
||||
+} QEMU_PACKED;
|
||||
+
|
||||
/* SMBIOS type 11 - OEM strings */
|
||||
struct smbios_type_11 {
|
||||
struct smbios_structure_header header;
|
||||
diff --git a/qemu-options.hx b/qemu-options.hx
|
||||
index 0814f43066..94cacc2c63 100644
|
||||
--- a/qemu-options.hx
|
||||
+++ b/qemu-options.hx
|
||||
@@ -2710,6 +2710,9 @@ SRST
|
||||
``-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str][,asset=str][,part=str][,processor-id=%d]``
|
||||
Specify SMBIOS type 4 fields
|
||||
|
||||
+``-smbios type=9[,slot_designation=str][,slot_type=%d][,slot_data_bus_width=%d][,current_usage=%d][,slot_length=%d][,slot_id=%d][,slot_characteristics1=%d][,slot_characteristics12=%d]``
|
||||
+ Specify SMBIOS type 9 fields
|
||||
+
|
||||
``-smbios type=11[,value=str][,path=filename]``
|
||||
Specify SMBIOS type 11 fields
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,203 @@
|
||||
From c46ac3db0a4db60e667edeabc9ed451c6e8e0ccf Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Mon, 18 Mar 2024 14:41:33 -0400
|
||||
Subject: [PATCH 020/100] KVM: remove kvm_arch_cpu_check_are_resettable
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [20/91] d7745bd1a0ed1b215847f150f4a1bb2e912beabc (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Board reset requires writing a fresh CPU state. As far as KVM is
|
||||
concerned, the only thing that blocks reset is that CPU state is
|
||||
encrypted; therefore, kvm_cpus_are_resettable() can simply check
|
||||
if that is the case.
|
||||
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit a99c0c66ebe7d8db3af6f16689ade9375247e43e)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
accel/kvm/kvm-accel-ops.c | 2 +-
|
||||
accel/kvm/kvm-all.c | 5 -----
|
||||
include/sysemu/kvm.h | 10 ----------
|
||||
target/arm/kvm.c | 5 -----
|
||||
target/i386/kvm/kvm.c | 5 -----
|
||||
target/loongarch/kvm/kvm.c | 5 -----
|
||||
target/mips/kvm.c | 5 -----
|
||||
target/ppc/kvm.c | 5 -----
|
||||
target/riscv/kvm/kvm-cpu.c | 5 -----
|
||||
target/s390x/kvm/kvm.c | 5 -----
|
||||
10 files changed, 1 insertion(+), 51 deletions(-)
|
||||
|
||||
diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c
|
||||
index b3c946dc4b..74e3c5785b 100644
|
||||
--- a/accel/kvm/kvm-accel-ops.c
|
||||
+++ b/accel/kvm/kvm-accel-ops.c
|
||||
@@ -82,7 +82,7 @@ static bool kvm_vcpu_thread_is_idle(CPUState *cpu)
|
||||
|
||||
static bool kvm_cpus_are_resettable(void)
|
||||
{
|
||||
- return !kvm_enabled() || kvm_cpu_check_are_resettable();
|
||||
+ return !kvm_enabled() || !kvm_state->guest_state_protected;
|
||||
}
|
||||
|
||||
#ifdef KVM_CAP_SET_GUEST_DEBUG
|
||||
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||
index ec0f6df7c5..b51e09a583 100644
|
||||
--- a/accel/kvm/kvm-all.c
|
||||
+++ b/accel/kvm/kvm-all.c
|
||||
@@ -2696,11 +2696,6 @@ void kvm_flush_coalesced_mmio_buffer(void)
|
||||
s->coalesced_flush_in_progress = false;
|
||||
}
|
||||
|
||||
-bool kvm_cpu_check_are_resettable(void)
|
||||
-{
|
||||
- return kvm_arch_cpu_check_are_resettable();
|
||||
-}
|
||||
-
|
||||
static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
|
||||
{
|
||||
if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) {
|
||||
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
|
||||
index 302e8f6f1e..54f4d83a37 100644
|
||||
--- a/include/sysemu/kvm.h
|
||||
+++ b/include/sysemu/kvm.h
|
||||
@@ -525,16 +525,6 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target);
|
||||
/* Notify resamplefd for EOI of specific interrupts. */
|
||||
void kvm_resample_fd_notify(int gsi);
|
||||
|
||||
-/**
|
||||
- * kvm_cpu_check_are_resettable - return whether CPUs can be reset
|
||||
- *
|
||||
- * Returns: true: CPUs are resettable
|
||||
- * false: CPUs are not resettable
|
||||
- */
|
||||
-bool kvm_cpu_check_are_resettable(void);
|
||||
-
|
||||
-bool kvm_arch_cpu_check_are_resettable(void);
|
||||
-
|
||||
bool kvm_dirty_ring_enabled(void);
|
||||
|
||||
uint32_t kvm_dirty_ring_size(void);
|
||||
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
|
||||
index ab85d628a8..21ebbf3b8f 100644
|
||||
--- a/target/arm/kvm.c
|
||||
+++ b/target/arm/kvm.c
|
||||
@@ -1598,11 +1598,6 @@ int kvm_arch_msi_data_to_gsi(uint32_t data)
|
||||
return (data - 32) & 0xffff;
|
||||
}
|
||||
|
||||
-bool kvm_arch_cpu_check_are_resettable(void)
|
||||
-{
|
||||
- return true;
|
||||
-}
|
||||
-
|
||||
static void kvm_arch_get_eager_split_size(Object *obj, Visitor *v,
|
||||
const char *name, void *opaque,
|
||||
Error **errp)
|
||||
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
|
||||
index e271652620..a12207a8ee 100644
|
||||
--- a/target/i386/kvm/kvm.c
|
||||
+++ b/target/i386/kvm/kvm.c
|
||||
@@ -5623,11 +5623,6 @@ bool kvm_has_waitpkg(void)
|
||||
return has_msr_umwait;
|
||||
}
|
||||
|
||||
-bool kvm_arch_cpu_check_are_resettable(void)
|
||||
-{
|
||||
- return !sev_es_enabled();
|
||||
-}
|
||||
-
|
||||
#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
|
||||
|
||||
void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
|
||||
diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c
|
||||
index d630cc39cb..8224d94333 100644
|
||||
--- a/target/loongarch/kvm/kvm.c
|
||||
+++ b/target/loongarch/kvm/kvm.c
|
||||
@@ -733,11 +733,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs)
|
||||
return true;
|
||||
}
|
||||
|
||||
-bool kvm_arch_cpu_check_are_resettable(void)
|
||||
-{
|
||||
- return true;
|
||||
-}
|
||||
-
|
||||
int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
|
||||
{
|
||||
int ret = 0;
|
||||
diff --git a/target/mips/kvm.c b/target/mips/kvm.c
|
||||
index 6c52e59f55..a631ab544f 100644
|
||||
--- a/target/mips/kvm.c
|
||||
+++ b/target/mips/kvm.c
|
||||
@@ -1273,11 +1273,6 @@ int kvm_arch_get_default_type(MachineState *machine)
|
||||
return -1;
|
||||
}
|
||||
|
||||
-bool kvm_arch_cpu_check_are_resettable(void)
|
||||
-{
|
||||
- return true;
|
||||
-}
|
||||
-
|
||||
void kvm_arch_accel_class_init(ObjectClass *oc)
|
||||
{
|
||||
}
|
||||
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
|
||||
index 59f640cf7b..9d9d9f0d79 100644
|
||||
--- a/target/ppc/kvm.c
|
||||
+++ b/target/ppc/kvm.c
|
||||
@@ -2968,11 +2968,6 @@ void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset)
|
||||
}
|
||||
}
|
||||
|
||||
-bool kvm_arch_cpu_check_are_resettable(void)
|
||||
-{
|
||||
- return true;
|
||||
-}
|
||||
-
|
||||
void kvm_arch_accel_class_init(ObjectClass *oc)
|
||||
{
|
||||
}
|
||||
diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
|
||||
index 6a6c6cae80..49d2f3ad58 100644
|
||||
--- a/target/riscv/kvm/kvm-cpu.c
|
||||
+++ b/target/riscv/kvm/kvm-cpu.c
|
||||
@@ -1475,11 +1475,6 @@ void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level)
|
||||
}
|
||||
}
|
||||
|
||||
-bool kvm_arch_cpu_check_are_resettable(void)
|
||||
-{
|
||||
- return true;
|
||||
-}
|
||||
-
|
||||
static int aia_mode;
|
||||
|
||||
static const char *kvm_aia_mode_str(uint64_t mode)
|
||||
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
|
||||
index 55fb4855b1..4db59658e1 100644
|
||||
--- a/target/s390x/kvm/kvm.c
|
||||
+++ b/target/s390x/kvm/kvm.c
|
||||
@@ -2630,11 +2630,6 @@ void kvm_s390_stop_interrupt(S390CPU *cpu)
|
||||
kvm_s390_vcpu_interrupt(cpu, &irq);
|
||||
}
|
||||
|
||||
-bool kvm_arch_cpu_check_are_resettable(void)
|
||||
-{
|
||||
- return true;
|
||||
-}
|
||||
-
|
||||
int kvm_s390_get_zpci_op(void)
|
||||
{
|
||||
return cap_zpci_op;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,127 @@
|
||||
From 50399796da938c4ea7c69058fde84695bce9d794 Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Mon, 18 Mar 2024 14:41:10 -0400
|
||||
Subject: [PATCH 019/100] KVM: track whether guest state is encrypted
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [19/91] 685b9c54d43d0043d15c33d13afc3a420cbe139b (bonzini/rhel-qemu-kvm)
|
||||
|
||||
So far, KVM has allowed KVM_GET/SET_* ioctls to execute even if the
|
||||
guest state is encrypted, in which case they do nothing. For the new
|
||||
API using VM types, instead, the ioctls will fail which is a safer and
|
||||
more robust approach.
|
||||
|
||||
The new API will be the only one available for SEV-SNP and TDX, but it
|
||||
is also usable for SEV and SEV-ES. In preparation for that, require
|
||||
architecture-specific KVM code to communicate the point at which guest
|
||||
state is protected (which must be after kvm_cpu_synchronize_post_init(),
|
||||
though that might change in the future in order to suppor migration).
|
||||
From that point, skip reading registers so that cpu->vcpu_dirty is
|
||||
never true: if it ever becomes true, kvm_arch_put_registers() will
|
||||
fail miserably.
|
||||
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 5c3131c392f84c660033d511ec39872d8beb4b1e)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
accel/kvm/kvm-all.c | 17 ++++++++++++++---
|
||||
include/sysemu/kvm.h | 2 ++
|
||||
include/sysemu/kvm_int.h | 1 +
|
||||
target/i386/sev.c | 1 +
|
||||
4 files changed, 18 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||
index 931f74256e..ec0f6df7c5 100644
|
||||
--- a/accel/kvm/kvm-all.c
|
||||
+++ b/accel/kvm/kvm-all.c
|
||||
@@ -2703,7 +2703,7 @@ bool kvm_cpu_check_are_resettable(void)
|
||||
|
||||
static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
|
||||
{
|
||||
- if (!cpu->vcpu_dirty) {
|
||||
+ if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) {
|
||||
int ret = kvm_arch_get_registers(cpu);
|
||||
if (ret) {
|
||||
error_report("Failed to get registers: %s", strerror(-ret));
|
||||
@@ -2717,7 +2717,7 @@ static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
|
||||
|
||||
void kvm_cpu_synchronize_state(CPUState *cpu)
|
||||
{
|
||||
- if (!cpu->vcpu_dirty) {
|
||||
+ if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) {
|
||||
run_on_cpu(cpu, do_kvm_cpu_synchronize_state, RUN_ON_CPU_NULL);
|
||||
}
|
||||
}
|
||||
@@ -2752,7 +2752,13 @@ static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
|
||||
|
||||
void kvm_cpu_synchronize_post_init(CPUState *cpu)
|
||||
{
|
||||
- run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
|
||||
+ if (!kvm_state->guest_state_protected) {
|
||||
+ /*
|
||||
+ * This runs before the machine_init_done notifiers, and is the last
|
||||
+ * opportunity to synchronize the state of confidential guests.
|
||||
+ */
|
||||
+ run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
|
||||
+ }
|
||||
}
|
||||
|
||||
static void do_kvm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
|
||||
@@ -4099,3 +4105,8 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp)
|
||||
query_stats_schema_vcpu(first_cpu, &stats_args);
|
||||
}
|
||||
}
|
||||
+
|
||||
+void kvm_mark_guest_state_protected(void)
|
||||
+{
|
||||
+ kvm_state->guest_state_protected = true;
|
||||
+}
|
||||
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
|
||||
index fad9a7e8ff..302e8f6f1e 100644
|
||||
--- a/include/sysemu/kvm.h
|
||||
+++ b/include/sysemu/kvm.h
|
||||
@@ -539,6 +539,8 @@ bool kvm_dirty_ring_enabled(void);
|
||||
|
||||
uint32_t kvm_dirty_ring_size(void);
|
||||
|
||||
+void kvm_mark_guest_state_protected(void);
|
||||
+
|
||||
/**
|
||||
* kvm_hwpoisoned_mem - indicate if there is any hwpoisoned page
|
||||
* reported for the VM.
|
||||
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
|
||||
index 882e37e12c..3496be7997 100644
|
||||
--- a/include/sysemu/kvm_int.h
|
||||
+++ b/include/sysemu/kvm_int.h
|
||||
@@ -87,6 +87,7 @@ struct KVMState
|
||||
bool kernel_irqchip_required;
|
||||
OnOffAuto kernel_irqchip_split;
|
||||
bool sync_mmu;
|
||||
+ bool guest_state_protected;
|
||||
uint64_t manual_dirty_log_protect;
|
||||
/* The man page (and posix) say ioctl numbers are signed int, but
|
||||
* they're not. Linux, glibc and *BSD all treat ioctl numbers as
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index b8f79d34d1..c49a8fd55e 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -755,6 +755,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused)
|
||||
if (ret) {
|
||||
exit(1);
|
||||
}
|
||||
+ kvm_mark_guest_state_protected();
|
||||
}
|
||||
|
||||
/* query the measurement blob length */
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,329 @@
|
||||
From f4b01d645926faab2cab86fadb7398c26d6b8285 Mon Sep 17 00:00:00 2001
|
||||
From: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Date: Wed, 20 Mar 2024 03:39:02 -0500
|
||||
Subject: [PATCH 028/100] RAMBlock: Add support of KVM private guest memfd
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [28/91] 95fdf196afcb67113834c20fa354ee1397411bfd (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Add KVM guest_memfd support to RAMBlock so both normal hva based memory
|
||||
and kvm guest memfd based private memory can be associated in one RAMBlock.
|
||||
|
||||
Introduce new flag RAM_GUEST_MEMFD. When it's set, it calls KVM ioctl to
|
||||
create private guest_memfd during RAMBlock setup.
|
||||
|
||||
Allocating a new RAM_GUEST_MEMFD flag to instruct the setup of guest memfd
|
||||
is more flexible and extensible than simply relying on the VM type because
|
||||
in the future we may have the case that not all the memory of a VM need
|
||||
guest memfd. As a benefit, it also avoid getting MachineState in memory
|
||||
subsystem.
|
||||
|
||||
Note, RAM_GUEST_MEMFD is supposed to be set for memory backends of
|
||||
confidential guests, such as TDX VM. How and when to set it for memory
|
||||
backends will be implemented in the following patches.
|
||||
|
||||
Introduce memory_region_has_guest_memfd() to query if the MemoryRegion has
|
||||
KVM guest_memfd allocated.
|
||||
|
||||
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Message-ID: <20240320083945.991426-7-michael.roth@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 15f7a80c49cb3637f62fa37fa4a17da913bd91ff)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
accel/kvm/kvm-all.c | 28 ++++++++++++++++++++++++++++
|
||||
accel/stubs/kvm-stub.c | 5 +++++
|
||||
include/exec/memory.h | 20 +++++++++++++++++---
|
||||
include/exec/ram_addr.h | 2 +-
|
||||
include/exec/ramblock.h | 1 +
|
||||
include/sysemu/kvm.h | 2 ++
|
||||
system/memory.c | 5 +++++
|
||||
system/physmem.c | 34 +++++++++++++++++++++++++++++++---
|
||||
8 files changed, 90 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||
index 272e945f52..a7b9a127dd 100644
|
||||
--- a/accel/kvm/kvm-all.c
|
||||
+++ b/accel/kvm/kvm-all.c
|
||||
@@ -92,6 +92,7 @@ static bool kvm_has_guest_debug;
|
||||
static int kvm_sstep_flags;
|
||||
static bool kvm_immediate_exit;
|
||||
static uint64_t kvm_supported_memory_attributes;
|
||||
+static bool kvm_guest_memfd_supported;
|
||||
static hwaddr kvm_max_slot_size = ~0;
|
||||
|
||||
static const KVMCapabilityInfo kvm_required_capabilites[] = {
|
||||
@@ -2419,6 +2420,11 @@ static int kvm_init(MachineState *ms)
|
||||
}
|
||||
|
||||
kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES);
|
||||
+ kvm_guest_memfd_supported =
|
||||
+ kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) &&
|
||||
+ kvm_check_extension(s, KVM_CAP_USER_MEMORY2) &&
|
||||
+ (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE);
|
||||
+
|
||||
kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT);
|
||||
s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS);
|
||||
|
||||
@@ -4138,3 +4144,25 @@ void kvm_mark_guest_state_protected(void)
|
||||
{
|
||||
kvm_state->guest_state_protected = true;
|
||||
}
|
||||
+
|
||||
+int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp)
|
||||
+{
|
||||
+ int fd;
|
||||
+ struct kvm_create_guest_memfd guest_memfd = {
|
||||
+ .size = size,
|
||||
+ .flags = flags,
|
||||
+ };
|
||||
+
|
||||
+ if (!kvm_guest_memfd_supported) {
|
||||
+ error_setg(errp, "KVM does not support guest_memfd");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &guest_memfd);
|
||||
+ if (fd < 0) {
|
||||
+ error_setg_errno(errp, errno, "Error creating KVM guest_memfd");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return fd;
|
||||
+}
|
||||
diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c
|
||||
index ca38172884..8e0eb22e61 100644
|
||||
--- a/accel/stubs/kvm-stub.c
|
||||
+++ b/accel/stubs/kvm-stub.c
|
||||
@@ -129,3 +129,8 @@ bool kvm_hwpoisoned_mem(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
+
|
||||
+int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp)
|
||||
+{
|
||||
+ return -ENOSYS;
|
||||
+}
|
||||
diff --git a/include/exec/memory.h b/include/exec/memory.h
|
||||
index 8626a355b3..679a847685 100644
|
||||
--- a/include/exec/memory.h
|
||||
+++ b/include/exec/memory.h
|
||||
@@ -243,6 +243,9 @@ typedef struct IOMMUTLBEvent {
|
||||
/* RAM FD is opened read-only */
|
||||
#define RAM_READONLY_FD (1 << 11)
|
||||
|
||||
+/* RAM can be private that has kvm guest memfd backend */
|
||||
+#define RAM_GUEST_MEMFD (1 << 12)
|
||||
+
|
||||
static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
|
||||
IOMMUNotifierFlag flags,
|
||||
hwaddr start, hwaddr end,
|
||||
@@ -1307,7 +1310,8 @@ bool memory_region_init_ram_nomigrate(MemoryRegion *mr,
|
||||
* @name: Region name, becomes part of RAMBlock name used in migration stream
|
||||
* must be unique within any device
|
||||
* @size: size of the region.
|
||||
- * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE.
|
||||
+ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE,
|
||||
+ * RAM_GUEST_MEMFD.
|
||||
* @errp: pointer to Error*, to store an error if it happens.
|
||||
*
|
||||
* Note that this function does not do anything to cause the data in the
|
||||
@@ -1369,7 +1373,7 @@ bool memory_region_init_resizeable_ram(MemoryRegion *mr,
|
||||
* (getpagesize()) will be used.
|
||||
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
|
||||
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
|
||||
- * RAM_READONLY_FD
|
||||
+ * RAM_READONLY_FD, RAM_GUEST_MEMFD
|
||||
* @path: the path in which to allocate the RAM.
|
||||
* @offset: offset within the file referenced by path
|
||||
* @errp: pointer to Error*, to store an error if it happens.
|
||||
@@ -1399,7 +1403,7 @@ bool memory_region_init_ram_from_file(MemoryRegion *mr,
|
||||
* @size: size of the region.
|
||||
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
|
||||
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
|
||||
- * RAM_READONLY_FD
|
||||
+ * RAM_READONLY_FD, RAM_GUEST_MEMFD
|
||||
* @fd: the fd to mmap.
|
||||
* @offset: offset within the file referenced by fd
|
||||
* @errp: pointer to Error*, to store an error if it happens.
|
||||
@@ -1722,6 +1726,16 @@ static inline bool memory_region_is_romd(MemoryRegion *mr)
|
||||
*/
|
||||
bool memory_region_is_protected(MemoryRegion *mr);
|
||||
|
||||
+/**
|
||||
+ * memory_region_has_guest_memfd: check whether a memory region has guest_memfd
|
||||
+ * associated
|
||||
+ *
|
||||
+ * Returns %true if a memory region's ram_block has valid guest_memfd assigned.
|
||||
+ *
|
||||
+ * @mr: the memory region being queried
|
||||
+ */
|
||||
+bool memory_region_has_guest_memfd(MemoryRegion *mr);
|
||||
+
|
||||
/**
|
||||
* memory_region_get_iommu: check whether a memory region is an iommu
|
||||
*
|
||||
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
|
||||
index de45ba7bc9..07c8f86375 100644
|
||||
--- a/include/exec/ram_addr.h
|
||||
+++ b/include/exec/ram_addr.h
|
||||
@@ -110,7 +110,7 @@ long qemu_maxrampagesize(void);
|
||||
* @mr: the memory region where the ram block is
|
||||
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
|
||||
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
|
||||
- * RAM_READONLY_FD
|
||||
+ * RAM_READONLY_FD, RAM_GUEST_MEMFD
|
||||
* @mem_path or @fd: specify the backing file or device
|
||||
* @offset: Offset into target file
|
||||
* @errp: pointer to Error*, to store an error if it happens
|
||||
diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h
|
||||
index 848915ea5b..459c8917de 100644
|
||||
--- a/include/exec/ramblock.h
|
||||
+++ b/include/exec/ramblock.h
|
||||
@@ -41,6 +41,7 @@ struct RAMBlock {
|
||||
QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
|
||||
int fd;
|
||||
uint64_t fd_offset;
|
||||
+ int guest_memfd;
|
||||
size_t page_size;
|
||||
/* dirty bitmap used during migration */
|
||||
unsigned long *bmap;
|
||||
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
|
||||
index f114ff6986..9e4ab7ae89 100644
|
||||
--- a/include/sysemu/kvm.h
|
||||
+++ b/include/sysemu/kvm.h
|
||||
@@ -537,6 +537,8 @@ void kvm_mark_guest_state_protected(void);
|
||||
*/
|
||||
bool kvm_hwpoisoned_mem(void);
|
||||
|
||||
+int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp);
|
||||
+
|
||||
int kvm_set_memory_attributes_private(hwaddr start, uint64_t size);
|
||||
int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size);
|
||||
|
||||
diff --git a/system/memory.c b/system/memory.c
|
||||
index a229a79988..c756950c0c 100644
|
||||
--- a/system/memory.c
|
||||
+++ b/system/memory.c
|
||||
@@ -1850,6 +1850,11 @@ bool memory_region_is_protected(MemoryRegion *mr)
|
||||
return mr->ram && (mr->ram_block->flags & RAM_PROTECTED);
|
||||
}
|
||||
|
||||
+bool memory_region_has_guest_memfd(MemoryRegion *mr)
|
||||
+{
|
||||
+ return mr->ram_block && mr->ram_block->guest_memfd >= 0;
|
||||
+}
|
||||
+
|
||||
uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr)
|
||||
{
|
||||
uint8_t mask = mr->dirty_log_mask;
|
||||
diff --git a/system/physmem.c b/system/physmem.c
|
||||
index a4fe3d2bf8..f5dfa20e57 100644
|
||||
--- a/system/physmem.c
|
||||
+++ b/system/physmem.c
|
||||
@@ -1808,6 +1808,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
|
||||
const bool shared = qemu_ram_is_shared(new_block);
|
||||
RAMBlock *block;
|
||||
RAMBlock *last_block = NULL;
|
||||
+ bool free_on_error = false;
|
||||
ram_addr_t old_ram_size, new_ram_size;
|
||||
Error *err = NULL;
|
||||
|
||||
@@ -1837,6 +1838,19 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
|
||||
return;
|
||||
}
|
||||
memory_try_enable_merging(new_block->host, new_block->max_length);
|
||||
+ free_on_error = true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (new_block->flags & RAM_GUEST_MEMFD) {
|
||||
+ assert(kvm_enabled());
|
||||
+ assert(new_block->guest_memfd < 0);
|
||||
+
|
||||
+ new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length,
|
||||
+ 0, errp);
|
||||
+ if (new_block->guest_memfd < 0) {
|
||||
+ qemu_mutex_unlock_ramlist();
|
||||
+ goto out_free;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1888,6 +1902,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
|
||||
ram_block_notify_add(new_block->host, new_block->used_length,
|
||||
new_block->max_length);
|
||||
}
|
||||
+ return;
|
||||
+
|
||||
+out_free:
|
||||
+ if (free_on_error) {
|
||||
+ qemu_anon_ram_free(new_block->host, new_block->max_length);
|
||||
+ new_block->host = NULL;
|
||||
+ }
|
||||
}
|
||||
|
||||
#ifdef CONFIG_POSIX
|
||||
@@ -1902,7 +1923,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
|
||||
/* Just support these ram flags by now. */
|
||||
assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
|
||||
RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
|
||||
- RAM_READONLY_FD)) == 0);
|
||||
+ RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0);
|
||||
|
||||
if (xen_enabled()) {
|
||||
error_setg(errp, "-mem-path not supported with Xen");
|
||||
@@ -1939,6 +1960,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
|
||||
new_block->used_length = size;
|
||||
new_block->max_length = size;
|
||||
new_block->flags = ram_flags;
|
||||
+ new_block->guest_memfd = -1;
|
||||
new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset,
|
||||
errp);
|
||||
if (!new_block->host) {
|
||||
@@ -2018,7 +2040,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
|
||||
int align;
|
||||
|
||||
assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC |
|
||||
- RAM_NORESERVE)) == 0);
|
||||
+ RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0);
|
||||
assert(!host ^ (ram_flags & RAM_PREALLOC));
|
||||
|
||||
align = qemu_real_host_page_size();
|
||||
@@ -2033,6 +2055,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
|
||||
new_block->max_length = max_size;
|
||||
assert(max_size >= size);
|
||||
new_block->fd = -1;
|
||||
+ new_block->guest_memfd = -1;
|
||||
new_block->page_size = qemu_real_host_page_size();
|
||||
new_block->host = host;
|
||||
new_block->flags = ram_flags;
|
||||
@@ -2055,7 +2078,7 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
|
||||
RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags,
|
||||
MemoryRegion *mr, Error **errp)
|
||||
{
|
||||
- assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0);
|
||||
+ assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0);
|
||||
return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp);
|
||||
}
|
||||
|
||||
@@ -2083,6 +2106,11 @@ static void reclaim_ramblock(RAMBlock *block)
|
||||
} else {
|
||||
qemu_anon_ram_free(block->host, block->max_length);
|
||||
}
|
||||
+
|
||||
+ if (block->guest_memfd >= 0) {
|
||||
+ close(block->guest_memfd);
|
||||
+ }
|
||||
+
|
||||
g_free(block);
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,82 @@
|
||||
From bd289293604d6f33e9fb89196f0b19117ce81f89 Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Wed, 20 Mar 2024 17:45:29 +0100
|
||||
Subject: [PATCH 032/100] RAMBlock: make guest_memfd require uncoordinated
|
||||
discard
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [32/91] 0c005849026c334737b88cbd20a0ac237dfca37e (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Some subsystems like VFIO might disable ram block discard, but guest_memfd
|
||||
uses discard operations to implement conversions between private and
|
||||
shared memory. Because of this, sequences like the following can result
|
||||
in stale IOMMU mappings:
|
||||
|
||||
1. allocate shared page
|
||||
2. convert page shared->private
|
||||
3. discard shared page
|
||||
4. convert page private->shared
|
||||
5. allocate shared page
|
||||
6. issue DMA operations against that shared page
|
||||
|
||||
This is not a use-after-free, because after step 3 VFIO is still pinning
|
||||
the page. However, DMA operations in step 6 will hit the old mapping
|
||||
that was allocated in step 1.
|
||||
|
||||
Address this by taking ram_block_discard_is_enabled() into account when
|
||||
deciding whether or not to discard pages.
|
||||
|
||||
Since kvm_convert_memory()/guest_memfd doesn't implement a
|
||||
RamDiscardManager handler to convey and replay discard operations,
|
||||
this is a case of uncoordinated discard, which is blocked/released
|
||||
by ram_block_discard_require(). Interestingly, this function had
|
||||
no use so far.
|
||||
|
||||
Alternative approaches would be to block discard of shared pages, but
|
||||
this would cause guests to consume twice the memory if they use VFIO;
|
||||
or to implement a RamDiscardManager and only block uncoordinated
|
||||
discard, i.e. use ram_block_coordinated_discard_require().
|
||||
|
||||
[Commit message mostly by Michael Roth <michael.roth@amd.com>]
|
||||
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 852f0048f3ea9f14de18eb279a99fccb6d250e8f)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
system/physmem.c | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/system/physmem.c b/system/physmem.c
|
||||
index f5dfa20e57..5ebcf5be11 100644
|
||||
--- a/system/physmem.c
|
||||
+++ b/system/physmem.c
|
||||
@@ -1846,6 +1846,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
|
||||
assert(kvm_enabled());
|
||||
assert(new_block->guest_memfd < 0);
|
||||
|
||||
+ if (ram_block_discard_require(true) < 0) {
|
||||
+ error_setg_errno(errp, errno,
|
||||
+ "cannot set up private guest memory: discard currently blocked");
|
||||
+ error_append_hint(errp, "Are you using assigned devices?\n");
|
||||
+ goto out_free;
|
||||
+ }
|
||||
+
|
||||
new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length,
|
||||
0, errp);
|
||||
if (new_block->guest_memfd < 0) {
|
||||
@@ -2109,6 +2116,7 @@ static void reclaim_ramblock(RAMBlock *block)
|
||||
|
||||
if (block->guest_memfd >= 0) {
|
||||
close(block->guest_memfd);
|
||||
+ ram_block_discard_require(false);
|
||||
}
|
||||
|
||||
g_free(block);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,60 +0,0 @@
|
||||
From 2e0e4355b2d4edb66b7d8c198339e17940abd682 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= <berrange@redhat.com>
|
||||
Date: Mon, 18 Mar 2024 13:03:19 +0000
|
||||
Subject: [PATCH 2/3] Revert "chardev/char-socket: Fix TLS io channels sending
|
||||
too much data to the backend"
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Daniel P. Berrangé <berrange@redhat.com>
|
||||
RH-MergeRequest: 233: Fix handling of TLS sessions in chardevs
|
||||
RH-Jira: RHEL-24614
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
RH-Commit: [2/3] 1cb3d72b86ced0f70a09dfa0d325ae8a85db1b2b (berrange/centos-src-qemu)
|
||||
|
||||
This commit results in unexpected termination of the TLS connection.
|
||||
When 'fd_can_read' returns 0, the code goes on to pass a zero length
|
||||
buffer to qio_channel_read. The TLS impl calls into gnutls_recv()
|
||||
with this zero length buffer, at which point GNUTLS returns an error
|
||||
GNUTLS_E_INVALID_REQUEST. This is treated as fatal by QEMU's TLS code
|
||||
resulting in the connection being torn down by the chardev.
|
||||
|
||||
Simply skipping the qio_channel_read when the buffer length is zero
|
||||
is also not satisfactory, as it results in a high CPU burn busy loop
|
||||
massively slowing QEMU's functionality.
|
||||
|
||||
The proper solution is to avoid tcp_chr_read being called at all
|
||||
unless the frontend is able to accept more data. This will be done
|
||||
in a followup commit.
|
||||
|
||||
This reverts commit 462945cd22d2bcd233401ed3aa167d83a8e35b05
|
||||
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
(cherry picked from commit e8ee827ffdb86ebbd5f5213a1f78123c25a90864)
|
||||
---
|
||||
chardev/char-socket.c | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
|
||||
index f48d341ebc..51d0943fce 100644
|
||||
--- a/chardev/char-socket.c
|
||||
+++ b/chardev/char-socket.c
|
||||
@@ -492,9 +492,9 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
|
||||
s->max_size <= 0) {
|
||||
return TRUE;
|
||||
}
|
||||
- len = tcp_chr_read_poll(opaque);
|
||||
- if (len > sizeof(buf)) {
|
||||
- len = sizeof(buf);
|
||||
+ len = sizeof(buf);
|
||||
+ if (len > s->max_size) {
|
||||
+ len = s->max_size;
|
||||
}
|
||||
size = tcp_chr_recv(chr, (void *)buf, len);
|
||||
if (size == 0 || (size == -1 && errno != EAGAIN)) {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,216 +0,0 @@
|
||||
From ab5a33d57b48e35388928e388bb6e6479bc77651 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= <berrange@redhat.com>
|
||||
Date: Mon, 18 Mar 2024 17:08:30 +0000
|
||||
Subject: [PATCH 3/3] Revert "chardev: use a child source for qio input source"
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Daniel P. Berrangé <berrange@redhat.com>
|
||||
RH-MergeRequest: 233: Fix handling of TLS sessions in chardevs
|
||||
RH-Jira: RHEL-24614
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
RH-Commit: [3/3] b58e6c19c2b11d5d28db31cf1386226fb01d195e (berrange/centos-src-qemu)
|
||||
|
||||
This reverts commit a7077b8e354d90fec26c2921aa2dea85b90dff90,
|
||||
and add comments to explain why child sources cannot be used.
|
||||
|
||||
When a GSource is added as a child of another GSource, if its
|
||||
'prepare' function indicates readiness, then the parent's
|
||||
'prepare' function will never be run. The io_watch_poll_prepare
|
||||
absolutely *must* be run on every iteration of the main loop,
|
||||
to ensure that the chardev backend doesn't feed data to the
|
||||
frontend that it is unable to consume.
|
||||
|
||||
At the time a7077b8e354d90fec26c2921aa2dea85b90dff90 was made,
|
||||
all the child GSource impls were relying on poll'ing an FD,
|
||||
so their 'prepare' functions would never indicate readiness
|
||||
ahead of poll() being invoked. So the buggy behaviour was
|
||||
not noticed and lay dormant.
|
||||
|
||||
Relatively recently the QIOChannelTLS impl introduced a
|
||||
level 2 child GSource, which checks with GNUTLS whether it
|
||||
has cached any data that was decoded but not yet consumed:
|
||||
|
||||
commit ffda5db65aef42266a5053a4be34515106c4c7ee
|
||||
Author: Antoine Damhet <antoine.damhet@shadow.tech>
|
||||
Date: Tue Nov 15 15:23:29 2022 +0100
|
||||
|
||||
io/channel-tls: fix handling of bigger read buffers
|
||||
|
||||
Since the TLS backend can read more data from the underlying QIOChannel
|
||||
we introduce a minimal child GSource to notify if we still have more
|
||||
data available to be read.
|
||||
|
||||
Signed-off-by: Antoine Damhet <antoine.damhet@shadow.tech>
|
||||
Signed-off-by: Charles Frey <charles.frey@shadow.tech>
|
||||
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
|
||||
With this, it is now quite common for the 'prepare' function
|
||||
on a QIOChannelTLS GSource to indicate immediate readiness,
|
||||
bypassing the parent GSource 'prepare' function. IOW, the
|
||||
critical 'io_watch_poll_prepare' is being skipped on some
|
||||
iterations of the main loop. As a result chardev frontend
|
||||
asserts are now being triggered as they are fed data they
|
||||
are not ready to consume.
|
||||
|
||||
A reproducer is as follows:
|
||||
|
||||
* In terminal 1 run a GNUTLS *echo* server
|
||||
|
||||
$ gnutls-serv --echo \
|
||||
--x509cafile ca-cert.pem \
|
||||
--x509keyfile server-key.pem \
|
||||
--x509certfile server-cert.pem \
|
||||
-p 9000
|
||||
|
||||
* In terminal 2 run a QEMU guest
|
||||
|
||||
$ qemu-system-s390x \
|
||||
-nodefaults \
|
||||
-display none \
|
||||
-object tls-creds-x509,id=tls0,dir=$PWD,endpoint=client \
|
||||
-chardev socket,id=con0,host=localhost,port=9000,tls-creds=tls0 \
|
||||
-device sclpconsole,chardev=con0 \
|
||||
-hda Fedora-Cloud-Base-39-1.5.s390x.qcow2
|
||||
|
||||
After the previous patch revert, but before this patch revert,
|
||||
this scenario will crash:
|
||||
|
||||
qemu-system-s390x: ../hw/char/sclpconsole.c:73: chr_read: Assertion
|
||||
`size <= SIZE_BUFFER_VT220 - scon->iov_data_len' failed.
|
||||
|
||||
This assert indicates that 'tcp_chr_read' was called without
|
||||
'tcp_chr_read_poll' having first been checked for ability to
|
||||
receive more data
|
||||
|
||||
QEMU's use of a 'prepare' function to create/delete another
|
||||
GSource is rather a hack and not normally the kind of thing that
|
||||
is expected to be done by a GSource. There is no mechanism to
|
||||
force GLib to always run the 'prepare' function of a parent
|
||||
GSource. The best option is to simply not use the child source
|
||||
concept, and go back to the functional approach previously
|
||||
relied on.
|
||||
|
||||
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Tested-by: Thomas Huth <thuth@redhat.com>
|
||||
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
(cherry picked from commit 038b4217884c6f297278bb1ec6f0463c6c8221de)
|
||||
---
|
||||
chardev/char-io.c | 56 ++++++++++++++++++++++++++++++++++++++++++-----
|
||||
1 file changed, 51 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/chardev/char-io.c b/chardev/char-io.c
|
||||
index 4451128cba..dab77b112e 100644
|
||||
--- a/chardev/char-io.c
|
||||
+++ b/chardev/char-io.c
|
||||
@@ -33,6 +33,7 @@ typedef struct IOWatchPoll {
|
||||
IOCanReadHandler *fd_can_read;
|
||||
GSourceFunc fd_read;
|
||||
void *opaque;
|
||||
+ GMainContext *context;
|
||||
} IOWatchPoll;
|
||||
|
||||
static IOWatchPoll *io_watch_poll_from_source(GSource *source)
|
||||
@@ -50,28 +51,59 @@ static gboolean io_watch_poll_prepare(GSource *source,
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * We do not register the QIOChannel watch as a child GSource.
|
||||
+ * The 'prepare' function on the parent GSource will be
|
||||
+ * skipped if a child GSource's 'prepare' function indicates
|
||||
+ * readiness. We need this prepare function be guaranteed
|
||||
+ * to run on *every* iteration of the main loop, because
|
||||
+ * it is critical to ensure we remove the QIOChannel watch
|
||||
+ * if 'fd_can_read' indicates the frontend cannot receive
|
||||
+ * more data.
|
||||
+ */
|
||||
if (now_active) {
|
||||
iwp->src = qio_channel_create_watch(
|
||||
iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL);
|
||||
g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL);
|
||||
- g_source_add_child_source(source, iwp->src);
|
||||
- g_source_unref(iwp->src);
|
||||
+ g_source_attach(iwp->src, iwp->context);
|
||||
} else {
|
||||
- g_source_remove_child_source(source, iwp->src);
|
||||
+ g_source_destroy(iwp->src);
|
||||
+ g_source_unref(iwp->src);
|
||||
iwp->src = NULL;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
+static gboolean io_watch_poll_check(GSource *source)
|
||||
+{
|
||||
+ return FALSE;
|
||||
+}
|
||||
+
|
||||
static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback,
|
||||
gpointer user_data)
|
||||
{
|
||||
- return G_SOURCE_CONTINUE;
|
||||
+ abort();
|
||||
+}
|
||||
+
|
||||
+static void io_watch_poll_finalize(GSource *source)
|
||||
+{
|
||||
+ /*
|
||||
+ * Due to a glib bug, removing the last reference to a source
|
||||
+ * inside a finalize callback causes recursive locking (and a
|
||||
+ * deadlock). This is not a problem inside other callbacks,
|
||||
+ * including dispatch callbacks, so we call io_remove_watch_poll
|
||||
+ * to remove this source. At this point, iwp->src must
|
||||
+ * be NULL, or we would leak it.
|
||||
+ */
|
||||
+ IOWatchPoll *iwp = io_watch_poll_from_source(source);
|
||||
+ assert(iwp->src == NULL);
|
||||
}
|
||||
|
||||
static GSourceFuncs io_watch_poll_funcs = {
|
||||
.prepare = io_watch_poll_prepare,
|
||||
+ .check = io_watch_poll_check,
|
||||
.dispatch = io_watch_poll_dispatch,
|
||||
+ .finalize = io_watch_poll_finalize,
|
||||
};
|
||||
|
||||
GSource *io_add_watch_poll(Chardev *chr,
|
||||
@@ -91,6 +123,7 @@ GSource *io_add_watch_poll(Chardev *chr,
|
||||
iwp->ioc = ioc;
|
||||
iwp->fd_read = (GSourceFunc) fd_read;
|
||||
iwp->src = NULL;
|
||||
+ iwp->context = context;
|
||||
|
||||
name = g_strdup_printf("chardev-iowatch-%s", chr->label);
|
||||
g_source_set_name((GSource *)iwp, name);
|
||||
@@ -101,10 +134,23 @@ GSource *io_add_watch_poll(Chardev *chr,
|
||||
return (GSource *)iwp;
|
||||
}
|
||||
|
||||
+static void io_remove_watch_poll(GSource *source)
|
||||
+{
|
||||
+ IOWatchPoll *iwp;
|
||||
+
|
||||
+ iwp = io_watch_poll_from_source(source);
|
||||
+ if (iwp->src) {
|
||||
+ g_source_destroy(iwp->src);
|
||||
+ g_source_unref(iwp->src);
|
||||
+ iwp->src = NULL;
|
||||
+ }
|
||||
+ g_source_destroy(&iwp->parent);
|
||||
+}
|
||||
+
|
||||
void remove_fd_in_watch(Chardev *chr)
|
||||
{
|
||||
if (chr->gsource) {
|
||||
- g_source_destroy(chr->gsource);
|
||||
+ io_remove_watch_poll(chr->gsource);
|
||||
chr->gsource = NULL;
|
||||
}
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,67 @@
|
||||
From d4e6f7105b00ba2536d5d733b7c03116f28ce116 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Mon, 6 May 2024 15:06:21 -0400
|
||||
Subject: [PATCH 2/5] Revert "monitor: use aio_co_reschedule_self()"
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 248: Revert "monitor: use aio_co_reschedule_self()"
|
||||
RH-Jira: RHEL-34618 RHEL-38697
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/2] b6a2ebd4a69dbcd2bd56c61e7c747f8f8f42337e (kmwolf/centos-qemu-kvm)
|
||||
|
||||
Commit 1f25c172f837 ("monitor: use aio_co_reschedule_self()") was a code
|
||||
cleanup that uses aio_co_reschedule_self() instead of open coding
|
||||
coroutine rescheduling.
|
||||
|
||||
Bug RHEL-34618 was reported and Kevin Wolf <kwolf@redhat.com> identified
|
||||
the root cause. I missed that aio_co_reschedule_self() ->
|
||||
qemu_get_current_aio_context() only knows about
|
||||
qemu_aio_context/IOThread AioContexts and not about iohandler_ctx. It
|
||||
does not function correctly when going back from the iohandler_ctx to
|
||||
qemu_aio_context.
|
||||
|
||||
Go back to open coding the AioContext transitions to avoid this bug.
|
||||
|
||||
This reverts commit 1f25c172f83704e350c0829438d832384084a74d.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Buglink: https://issues.redhat.com/browse/RHEL-34618
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20240506190622.56095-2-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 719c6819ed9a9838520fa732f9861918dc693bda)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
qapi/qmp-dispatch.c | 7 +++++--
|
||||
1 file changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c
|
||||
index f3488afeef..176b549473 100644
|
||||
--- a/qapi/qmp-dispatch.c
|
||||
+++ b/qapi/qmp-dispatch.c
|
||||
@@ -212,7 +212,8 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ
|
||||
* executing the command handler so that it can make progress if it
|
||||
* involves an AIO_WAIT_WHILE().
|
||||
*/
|
||||
- aio_co_reschedule_self(qemu_get_aio_context());
|
||||
+ aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self());
|
||||
+ qemu_coroutine_yield();
|
||||
}
|
||||
|
||||
monitor_set_cur(qemu_coroutine_self(), cur_mon);
|
||||
@@ -226,7 +227,9 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ
|
||||
* Move back to iohandler_ctx so that nested event loops for
|
||||
* qemu_aio_context don't start new monitor commands.
|
||||
*/
|
||||
- aio_co_reschedule_self(iohandler_get_aio_context());
|
||||
+ aio_co_schedule(iohandler_get_aio_context(),
|
||||
+ qemu_coroutine_self());
|
||||
+ qemu_coroutine_yield();
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,38 @@
|
||||
From bcbc897cb19b3a6523de611f48f6bac6cea16c97 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Ott <sebott@redhat.com>
|
||||
Date: Thu, 2 May 2024 13:17:03 +0200
|
||||
Subject: [PATCH 2/2] Revert "x86: rhel 9.4.0 machine type compat fix"
|
||||
|
||||
RH-Author: Sebastian Ott <sebott@redhat.com>
|
||||
RH-MergeRequest: 237: Revert "x86: rhel 9.4.0 machine type compat fix"
|
||||
RH-Jira: RHEL-30362
|
||||
RH-Acked-by: Ani Sinha <anisinha@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/1] 858ec153e65e96c39ca4db17ed93fd58c77dc2eb (seott1/cos-qemu-kvm)
|
||||
|
||||
This reverts commit c46e44f0f4e861fe412ce679b0b0204881c1c2f5.
|
||||
|
||||
pc-q35-rhel9.4.0 and newer should stay with SMBIOS_ENTRY_POINT_TYPE_AUTO.
|
||||
|
||||
Signed-off-by: Sebastian Ott <sebott@redhat.com>
|
||||
---
|
||||
hw/i386/pc_q35.c | 3 ---
|
||||
1 file changed, 3 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index 2f11f9af7d..2b54944c0f 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -734,9 +734,6 @@ static void pc_q35_machine_rhel940_options(MachineClass *m)
|
||||
pcmc->smbios_stream_product = "RHEL";
|
||||
pcmc->smbios_stream_version = "9.4.0";
|
||||
|
||||
- /* From pc_q35_8_2_machine_options() - use SMBIOS 3.X by default */
|
||||
- pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64;
|
||||
-
|
||||
compat_props_add(m->compat_props, hw_compat_rhel_9_5,
|
||||
hw_compat_rhel_9_5_len);
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,60 +0,0 @@
|
||||
From 6b5cfed21e20b372090046a934387255ff4bda58 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 5 Dec 2023 13:20:01 -0500
|
||||
Subject: [PATCH 084/101] aio: make aio_context_acquire()/aio_context_release()
|
||||
a no-op
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [15/26] 723dcada900aaf08862e8221921be22506b561a8 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
aio_context_acquire()/aio_context_release() has been replaced by
|
||||
fine-grained locking to protect state shared by multiple threads. The
|
||||
AioContext lock still plays the role of balancing locking in
|
||||
AIO_WAIT_WHILE() and many functions in QEMU either require that the
|
||||
AioContext lock is held or not held for this reason. In other words, the
|
||||
AioContext lock is purely there for consistency with itself and serves
|
||||
no real purpose anymore.
|
||||
|
||||
Stop actually acquiring/releasing the lock in
|
||||
aio_context_acquire()/aio_context_release() so that subsequent patches
|
||||
can remove callers across the codebase incrementally.
|
||||
|
||||
I have performed "make check" and qemu-iotests stress tests across
|
||||
x86-64, ppc64le, and aarch64 to confirm that there are no failures as a
|
||||
result of eliminating the lock.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-ID: <20231205182011.1976568-5-stefanha@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
util/async.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index 8f90ddc304..04ee83d220 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -725,12 +725,12 @@ void aio_context_unref(AioContext *ctx)
|
||||
|
||||
void aio_context_acquire(AioContext *ctx)
|
||||
{
|
||||
- qemu_rec_mutex_lock(&ctx->lock);
|
||||
+ /* TODO remove this function */
|
||||
}
|
||||
|
||||
void aio_context_release(AioContext *ctx)
|
||||
{
|
||||
- qemu_rec_mutex_unlock(&ctx->lock);
|
||||
+ /* TODO remove this function */
|
||||
}
|
||||
|
||||
QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext)
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,102 +0,0 @@
|
||||
From 14913d8970090c8914dc19dad14f3b9f91985ec3 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 5 Dec 2023 13:20:07 -0500
|
||||
Subject: [PATCH 090/101] aio: remove
|
||||
aio_context_acquire()/aio_context_release() API
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [21/26] 4b6d4afcac79d3248a6722b063b5fc777dc418df (kmwolf/centos-qemu-kvm)
|
||||
|
||||
Delete these functions because nothing calls these functions anymore.
|
||||
|
||||
I introduced these APIs in commit 98563fc3ec44 ("aio: add
|
||||
aio_context_acquire() and aio_context_release()") in 2014. It's with a
|
||||
sigh of relief that I delete these APIs almost 10 years later.
|
||||
|
||||
Thanks to Paolo Bonzini's vision for multi-queue QEMU, we got an
|
||||
understanding of where the code needed to go in order to remove the
|
||||
limitations that the original dataplane and the IOThread/AioContext
|
||||
approach that followed it.
|
||||
|
||||
Emanuele Giuseppe Esposito had the splendid determination to convert
|
||||
large parts of the codebase so that they no longer needed the AioContext
|
||||
lock. This was a painstaking process, both in the actual code changes
|
||||
required and the iterations of code review that Emanuele eked out of
|
||||
Kevin and me over many months.
|
||||
|
||||
Kevin Wolf tackled multitudes of graph locking conversions to protect
|
||||
in-flight I/O from run-time changes to the block graph as well as the
|
||||
clang Thread Safety Analysis annotations that allow the compiler to
|
||||
check whether the graph lock is being used correctly.
|
||||
|
||||
And me, well, I'm just here to add some pizzazz to the QEMU multi-queue
|
||||
block layer :). Thank you to everyone who helped with this effort,
|
||||
including Eric Blake, code reviewer extraordinaire, and others who I've
|
||||
forgotten to mention.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20231205182011.1976568-11-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
include/block/aio.h | 17 -----------------
|
||||
util/async.c | 10 ----------
|
||||
2 files changed, 27 deletions(-)
|
||||
|
||||
diff --git a/include/block/aio.h b/include/block/aio.h
|
||||
index f08b358077..af05512a7d 100644
|
||||
--- a/include/block/aio.h
|
||||
+++ b/include/block/aio.h
|
||||
@@ -278,23 +278,6 @@ void aio_context_ref(AioContext *ctx);
|
||||
*/
|
||||
void aio_context_unref(AioContext *ctx);
|
||||
|
||||
-/* Take ownership of the AioContext. If the AioContext will be shared between
|
||||
- * threads, and a thread does not want to be interrupted, it will have to
|
||||
- * take ownership around calls to aio_poll(). Otherwise, aio_poll()
|
||||
- * automatically takes care of calling aio_context_acquire and
|
||||
- * aio_context_release.
|
||||
- *
|
||||
- * Note that this is separate from bdrv_drained_begin/bdrv_drained_end. A
|
||||
- * thread still has to call those to avoid being interrupted by the guest.
|
||||
- *
|
||||
- * Bottom halves, timers and callbacks can be created or removed without
|
||||
- * acquiring the AioContext.
|
||||
- */
|
||||
-void aio_context_acquire(AioContext *ctx);
|
||||
-
|
||||
-/* Relinquish ownership of the AioContext. */
|
||||
-void aio_context_release(AioContext *ctx);
|
||||
-
|
||||
/**
|
||||
* aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will
|
||||
* run only once and as soon as possible.
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index dfd44ef612..460529057c 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -719,16 +719,6 @@ void aio_context_unref(AioContext *ctx)
|
||||
g_source_unref(&ctx->source);
|
||||
}
|
||||
|
||||
-void aio_context_acquire(AioContext *ctx)
|
||||
-{
|
||||
- /* TODO remove this function */
|
||||
-}
|
||||
-
|
||||
-void aio_context_release(AioContext *ctx)
|
||||
-{
|
||||
- /* TODO remove this function */
|
||||
-}
|
||||
-
|
||||
QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext)
|
||||
|
||||
AioContext *qemu_get_current_aio_context(void)
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,81 +0,0 @@
|
||||
From e1e2f3972065c4b5d6fcf37e0e1c4fb92a0d5260 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 5 Dec 2023 13:20:06 -0500
|
||||
Subject: [PATCH 089/101] aio-wait: draw equivalence between AIO_WAIT_WHILE()
|
||||
and AIO_WAIT_WHILE_UNLOCKED()
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [20/26] 20e49777869714c99769263103f1b0c2c370cfcd (kmwolf/centos-qemu-kvm)
|
||||
|
||||
Now that the AioContext lock no longer exists, AIO_WAIT_WHILE() and
|
||||
AIO_WAIT_WHILE_UNLOCKED() are equivalent.
|
||||
|
||||
A future patch will get rid of AIO_WAIT_WHILE_UNLOCKED().
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20231205182011.1976568-10-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
include/block/aio-wait.h | 16 ++++------------
|
||||
1 file changed, 4 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
|
||||
index 5449b6d742..157f105916 100644
|
||||
--- a/include/block/aio-wait.h
|
||||
+++ b/include/block/aio-wait.h
|
||||
@@ -63,9 +63,6 @@ extern AioWait global_aio_wait;
|
||||
* @ctx: the aio context, or NULL if multiple aio contexts (for which the
|
||||
* caller does not hold a lock) are involved in the polling condition.
|
||||
* @cond: wait while this conditional expression is true
|
||||
- * @unlock: whether to unlock and then lock again @ctx. This applies
|
||||
- * only when waiting for another AioContext from the main loop.
|
||||
- * Otherwise it's ignored.
|
||||
*
|
||||
* Wait while a condition is true. Use this to implement synchronous
|
||||
* operations that require event loop activity.
|
||||
@@ -78,7 +75,7 @@ extern AioWait global_aio_wait;
|
||||
* wait on conditions between two IOThreads since that could lead to deadlock,
|
||||
* go via the main loop instead.
|
||||
*/
|
||||
-#define AIO_WAIT_WHILE_INTERNAL(ctx, cond, unlock) ({ \
|
||||
+#define AIO_WAIT_WHILE_INTERNAL(ctx, cond) ({ \
|
||||
bool waited_ = false; \
|
||||
AioWait *wait_ = &global_aio_wait; \
|
||||
AioContext *ctx_ = (ctx); \
|
||||
@@ -95,13 +92,7 @@ extern AioWait global_aio_wait;
|
||||
assert(qemu_get_current_aio_context() == \
|
||||
qemu_get_aio_context()); \
|
||||
while ((cond)) { \
|
||||
- if (unlock && ctx_) { \
|
||||
- aio_context_release(ctx_); \
|
||||
- } \
|
||||
aio_poll(qemu_get_aio_context(), true); \
|
||||
- if (unlock && ctx_) { \
|
||||
- aio_context_acquire(ctx_); \
|
||||
- } \
|
||||
waited_ = true; \
|
||||
} \
|
||||
} \
|
||||
@@ -109,10 +100,11 @@ extern AioWait global_aio_wait;
|
||||
waited_; })
|
||||
|
||||
#define AIO_WAIT_WHILE(ctx, cond) \
|
||||
- AIO_WAIT_WHILE_INTERNAL(ctx, cond, true)
|
||||
+ AIO_WAIT_WHILE_INTERNAL(ctx, cond)
|
||||
|
||||
+/* TODO replace this with AIO_WAIT_WHILE() in a future patch */
|
||||
#define AIO_WAIT_WHILE_UNLOCKED(ctx, cond) \
|
||||
- AIO_WAIT_WHILE_INTERNAL(ctx, cond, false)
|
||||
+ AIO_WAIT_WHILE_INTERNAL(ctx, cond)
|
||||
|
||||
/**
|
||||
* aio_wait_kick:
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,64 @@
|
||||
From 0e3934e89ad1dda21681f64ff38da69b07d1b531 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Mon, 6 May 2024 15:06:22 -0400
|
||||
Subject: [PATCH 3/5] aio: warn about iohandler_ctx special casing
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 248: Revert "monitor: use aio_co_reschedule_self()"
|
||||
RH-Jira: RHEL-34618 RHEL-38697
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/2] cc316d70b2c187ee0412d6560ca1a03e381a69c1 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
The main loop has two AioContexts: qemu_aio_context and iohandler_ctx.
|
||||
The main loop runs them both, but nested aio_poll() calls on
|
||||
qemu_aio_context exclude iohandler_ctx.
|
||||
|
||||
Which one should qemu_get_current_aio_context() return when called from
|
||||
the main loop? Document that it's always qemu_aio_context.
|
||||
|
||||
This has subtle effects on functions that use
|
||||
qemu_get_current_aio_context(). For example, aio_co_reschedule_self()
|
||||
does not work when moving from iohandler_ctx to qemu_aio_context because
|
||||
qemu_get_current_aio_context() does not differentiate these two
|
||||
AioContexts.
|
||||
|
||||
Document this in order to reduce the chance of future bugs.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20240506190622.56095-3-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit e669e800fc9ef8806af5c5578249ab758a4f8a5a)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
include/block/aio.h | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/include/block/aio.h b/include/block/aio.h
|
||||
index 8378553eb9..4ee81936ed 100644
|
||||
--- a/include/block/aio.h
|
||||
+++ b/include/block/aio.h
|
||||
@@ -629,6 +629,9 @@ void aio_co_schedule(AioContext *ctx, Coroutine *co);
|
||||
*
|
||||
* Move the currently running coroutine to new_ctx. If the coroutine is already
|
||||
* running in new_ctx, do nothing.
|
||||
+ *
|
||||
+ * Note that this function cannot reschedule from iohandler_ctx to
|
||||
+ * qemu_aio_context.
|
||||
*/
|
||||
void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx);
|
||||
|
||||
@@ -661,6 +664,9 @@ void aio_co_enter(AioContext *ctx, Coroutine *co);
|
||||
* If called from an IOThread this will be the IOThread's AioContext. If
|
||||
* called from the main thread or with the "big QEMU lock" taken it
|
||||
* will be the main loop AioContext.
|
||||
+ *
|
||||
+ * Note that the return value is never the main loop's iohandler_ctx and the
|
||||
+ * return value is the main loop AioContext instead.
|
||||
*/
|
||||
AioContext *qemu_get_current_aio_context(void);
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,476 +0,0 @@
|
||||
From 0d8255c98b3ef6f603ff0279592d3e91de26de0e Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Tue, 21 Nov 2023 16:44:00 +0800
|
||||
Subject: [PATCH 021/101] backends/iommufd: Introduce the iommufd object
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [20/67] 8a56344ab4a2126f248bfa492ccddd19265f39be (eauger1/centos-qemu-kvm)
|
||||
|
||||
Introduce an iommufd object which allows the interaction
|
||||
with the host /dev/iommu device.
|
||||
|
||||
The /dev/iommu can have been already pre-opened outside of qemu,
|
||||
in which case the fd can be passed directly along with the
|
||||
iommufd object:
|
||||
|
||||
This allows the iommufd object to be shared accross several
|
||||
subsystems (VFIO, VDPA, ...). For example, libvirt would open
|
||||
the /dev/iommu once.
|
||||
|
||||
If no fd is passed along with the iommufd object, the /dev/iommu
|
||||
is opened by the qemu code.
|
||||
|
||||
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||||
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit 6e6d8ac62b5b38dc9d4b69ffdf073f0a0b43b7be)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
MAINTAINERS | 8 ++
|
||||
backends/Kconfig | 4 +
|
||||
backends/iommufd.c | 245 +++++++++++++++++++++++++++++++++++++++
|
||||
backends/meson.build | 1 +
|
||||
backends/trace-events | 10 ++
|
||||
include/sysemu/iommufd.h | 38 ++++++
|
||||
qapi/qom.json | 19 +++
|
||||
qemu-options.hx | 12 ++
|
||||
8 files changed, 337 insertions(+)
|
||||
create mode 100644 backends/iommufd.c
|
||||
create mode 100644 include/sysemu/iommufd.h
|
||||
|
||||
diff --git a/MAINTAINERS b/MAINTAINERS
|
||||
index 695e0bd34f..a5a446914a 100644
|
||||
--- a/MAINTAINERS
|
||||
+++ b/MAINTAINERS
|
||||
@@ -2167,6 +2167,14 @@ F: hw/vfio/ap.c
|
||||
F: docs/system/s390x/vfio-ap.rst
|
||||
L: qemu-s390x@nongnu.org
|
||||
|
||||
+iommufd
|
||||
+M: Yi Liu <yi.l.liu@intel.com>
|
||||
+M: Eric Auger <eric.auger@redhat.com>
|
||||
+M: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
+S: Supported
|
||||
+F: backends/iommufd.c
|
||||
+F: include/sysemu/iommufd.h
|
||||
+
|
||||
vhost
|
||||
M: Michael S. Tsirkin <mst@redhat.com>
|
||||
S: Supported
|
||||
diff --git a/backends/Kconfig b/backends/Kconfig
|
||||
index f35abc1609..2cb23f62fa 100644
|
||||
--- a/backends/Kconfig
|
||||
+++ b/backends/Kconfig
|
||||
@@ -1 +1,5 @@
|
||||
source tpm/Kconfig
|
||||
+
|
||||
+config IOMMUFD
|
||||
+ bool
|
||||
+ depends on VFIO
|
||||
diff --git a/backends/iommufd.c b/backends/iommufd.c
|
||||
new file mode 100644
|
||||
index 0000000000..ba58a0eb0d
|
||||
--- /dev/null
|
||||
+++ b/backends/iommufd.c
|
||||
@@ -0,0 +1,245 @@
|
||||
+/*
|
||||
+ * iommufd container backend
|
||||
+ *
|
||||
+ * Copyright (C) 2023 Intel Corporation.
|
||||
+ * Copyright Red Hat, Inc. 2023
|
||||
+ *
|
||||
+ * Authors: Yi Liu <yi.l.liu@intel.com>
|
||||
+ * Eric Auger <eric.auger@redhat.com>
|
||||
+ *
|
||||
+ * SPDX-License-Identifier: GPL-2.0-or-later
|
||||
+ */
|
||||
+
|
||||
+#include "qemu/osdep.h"
|
||||
+#include "sysemu/iommufd.h"
|
||||
+#include "qapi/error.h"
|
||||
+#include "qapi/qmp/qerror.h"
|
||||
+#include "qemu/module.h"
|
||||
+#include "qom/object_interfaces.h"
|
||||
+#include "qemu/error-report.h"
|
||||
+#include "monitor/monitor.h"
|
||||
+#include "trace.h"
|
||||
+#include <sys/ioctl.h>
|
||||
+#include <linux/iommufd.h>
|
||||
+
|
||||
+static void iommufd_backend_init(Object *obj)
|
||||
+{
|
||||
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
|
||||
+
|
||||
+ be->fd = -1;
|
||||
+ be->users = 0;
|
||||
+ be->owned = true;
|
||||
+ qemu_mutex_init(&be->lock);
|
||||
+}
|
||||
+
|
||||
+static void iommufd_backend_finalize(Object *obj)
|
||||
+{
|
||||
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
|
||||
+
|
||||
+ if (be->owned) {
|
||||
+ close(be->fd);
|
||||
+ be->fd = -1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp)
|
||||
+{
|
||||
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
|
||||
+ int fd = -1;
|
||||
+
|
||||
+ fd = monitor_fd_param(monitor_cur(), str, errp);
|
||||
+ if (fd == -1) {
|
||||
+ error_prepend(errp, "Could not parse remote object fd %s:", str);
|
||||
+ return;
|
||||
+ }
|
||||
+ qemu_mutex_lock(&be->lock);
|
||||
+ be->fd = fd;
|
||||
+ be->owned = false;
|
||||
+ qemu_mutex_unlock(&be->lock);
|
||||
+ trace_iommu_backend_set_fd(be->fd);
|
||||
+}
|
||||
+
|
||||
+static bool iommufd_backend_can_be_deleted(UserCreatable *uc)
|
||||
+{
|
||||
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(uc);
|
||||
+
|
||||
+ return !be->users;
|
||||
+}
|
||||
+
|
||||
+static void iommufd_backend_class_init(ObjectClass *oc, void *data)
|
||||
+{
|
||||
+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
|
||||
+
|
||||
+ ucc->can_be_deleted = iommufd_backend_can_be_deleted;
|
||||
+
|
||||
+ object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd);
|
||||
+}
|
||||
+
|
||||
+int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
|
||||
+{
|
||||
+ int fd, ret = 0;
|
||||
+
|
||||
+ qemu_mutex_lock(&be->lock);
|
||||
+ if (be->users == UINT32_MAX) {
|
||||
+ error_setg(errp, "too many connections");
|
||||
+ ret = -E2BIG;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ if (be->owned && !be->users) {
|
||||
+ fd = qemu_open_old("/dev/iommu", O_RDWR);
|
||||
+ if (fd < 0) {
|
||||
+ error_setg_errno(errp, errno, "/dev/iommu opening failed");
|
||||
+ ret = fd;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ be->fd = fd;
|
||||
+ }
|
||||
+ be->users++;
|
||||
+out:
|
||||
+ trace_iommufd_backend_connect(be->fd, be->owned,
|
||||
+ be->users, ret);
|
||||
+ qemu_mutex_unlock(&be->lock);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+void iommufd_backend_disconnect(IOMMUFDBackend *be)
|
||||
+{
|
||||
+ qemu_mutex_lock(&be->lock);
|
||||
+ if (!be->users) {
|
||||
+ goto out;
|
||||
+ }
|
||||
+ be->users--;
|
||||
+ if (!be->users && be->owned) {
|
||||
+ close(be->fd);
|
||||
+ be->fd = -1;
|
||||
+ }
|
||||
+out:
|
||||
+ trace_iommufd_backend_disconnect(be->fd, be->users);
|
||||
+ qemu_mutex_unlock(&be->lock);
|
||||
+}
|
||||
+
|
||||
+int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ int ret, fd = be->fd;
|
||||
+ struct iommu_ioas_alloc alloc_data = {
|
||||
+ .size = sizeof(alloc_data),
|
||||
+ .flags = 0,
|
||||
+ };
|
||||
+
|
||||
+ ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data);
|
||||
+ if (ret) {
|
||||
+ error_setg_errno(errp, errno, "Failed to allocate ioas");
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ *ioas_id = alloc_data.out_ioas_id;
|
||||
+ trace_iommufd_backend_alloc_ioas(fd, *ioas_id, ret);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id)
|
||||
+{
|
||||
+ int ret, fd = be->fd;
|
||||
+ struct iommu_destroy des = {
|
||||
+ .size = sizeof(des),
|
||||
+ .id = id,
|
||||
+ };
|
||||
+
|
||||
+ ret = ioctl(fd, IOMMU_DESTROY, &des);
|
||||
+ trace_iommufd_backend_free_id(fd, id, ret);
|
||||
+ if (ret) {
|
||||
+ error_report("Failed to free id: %u %m", id);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
|
||||
+ ram_addr_t size, void *vaddr, bool readonly)
|
||||
+{
|
||||
+ int ret, fd = be->fd;
|
||||
+ struct iommu_ioas_map map = {
|
||||
+ .size = sizeof(map),
|
||||
+ .flags = IOMMU_IOAS_MAP_READABLE |
|
||||
+ IOMMU_IOAS_MAP_FIXED_IOVA,
|
||||
+ .ioas_id = ioas_id,
|
||||
+ .__reserved = 0,
|
||||
+ .user_va = (uintptr_t)vaddr,
|
||||
+ .iova = iova,
|
||||
+ .length = size,
|
||||
+ };
|
||||
+
|
||||
+ if (!readonly) {
|
||||
+ map.flags |= IOMMU_IOAS_MAP_WRITEABLE;
|
||||
+ }
|
||||
+
|
||||
+ ret = ioctl(fd, IOMMU_IOAS_MAP, &map);
|
||||
+ trace_iommufd_backend_map_dma(fd, ioas_id, iova, size,
|
||||
+ vaddr, readonly, ret);
|
||||
+ if (ret) {
|
||||
+ ret = -errno;
|
||||
+
|
||||
+ /* TODO: Not support mapping hardware PCI BAR region for now. */
|
||||
+ if (errno == EFAULT) {
|
||||
+ warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?");
|
||||
+ } else {
|
||||
+ error_report("IOMMU_IOAS_MAP failed: %m");
|
||||
+ }
|
||||
+ }
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
|
||||
+ hwaddr iova, ram_addr_t size)
|
||||
+{
|
||||
+ int ret, fd = be->fd;
|
||||
+ struct iommu_ioas_unmap unmap = {
|
||||
+ .size = sizeof(unmap),
|
||||
+ .ioas_id = ioas_id,
|
||||
+ .iova = iova,
|
||||
+ .length = size,
|
||||
+ };
|
||||
+
|
||||
+ ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap);
|
||||
+ /*
|
||||
+ * IOMMUFD takes mapping as some kind of object, unmapping
|
||||
+ * nonexistent mapping is treated as deleting a nonexistent
|
||||
+ * object and return ENOENT. This is different from legacy
|
||||
+ * backend which allows it. vIOMMU may trigger a lot of
|
||||
+ * redundant unmapping, to avoid flush the log, treat them
|
||||
+ * as succeess for IOMMUFD just like legacy backend.
|
||||
+ */
|
||||
+ if (ret && errno == ENOENT) {
|
||||
+ trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret);
|
||||
+ ret = 0;
|
||||
+ } else {
|
||||
+ trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret);
|
||||
+ }
|
||||
+
|
||||
+ if (ret) {
|
||||
+ ret = -errno;
|
||||
+ error_report("IOMMU_IOAS_UNMAP failed: %m");
|
||||
+ }
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static const TypeInfo iommufd_backend_info = {
|
||||
+ .name = TYPE_IOMMUFD_BACKEND,
|
||||
+ .parent = TYPE_OBJECT,
|
||||
+ .instance_size = sizeof(IOMMUFDBackend),
|
||||
+ .instance_init = iommufd_backend_init,
|
||||
+ .instance_finalize = iommufd_backend_finalize,
|
||||
+ .class_size = sizeof(IOMMUFDBackendClass),
|
||||
+ .class_init = iommufd_backend_class_init,
|
||||
+ .interfaces = (InterfaceInfo[]) {
|
||||
+ { TYPE_USER_CREATABLE },
|
||||
+ { }
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+static void register_types(void)
|
||||
+{
|
||||
+ type_register_static(&iommufd_backend_info);
|
||||
+}
|
||||
+
|
||||
+type_init(register_types);
|
||||
diff --git a/backends/meson.build b/backends/meson.build
|
||||
index 914c7c4afb..9a5cea480d 100644
|
||||
--- a/backends/meson.build
|
||||
+++ b/backends/meson.build
|
||||
@@ -20,6 +20,7 @@ if have_vhost_user
|
||||
system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c'))
|
||||
endif
|
||||
system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c'))
|
||||
+system_ss.add(when: 'CONFIG_IOMMUFD', if_true: files('iommufd.c'))
|
||||
if have_vhost_user_crypto
|
||||
system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c'))
|
||||
endif
|
||||
diff --git a/backends/trace-events b/backends/trace-events
|
||||
index 652eb76a57..d45c6e31a6 100644
|
||||
--- a/backends/trace-events
|
||||
+++ b/backends/trace-events
|
||||
@@ -5,3 +5,13 @@ dbus_vmstate_pre_save(void)
|
||||
dbus_vmstate_post_load(int version_id) "version_id: %d"
|
||||
dbus_vmstate_loading(const char *id) "id: %s"
|
||||
dbus_vmstate_saving(const char *id) "id: %s"
|
||||
+
|
||||
+# iommufd.c
|
||||
+iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d owned=%d users=%d (%d)"
|
||||
+iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d"
|
||||
+iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d"
|
||||
+iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)"
|
||||
+iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
|
||||
+iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
|
||||
+iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)"
|
||||
+iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
|
||||
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
|
||||
new file mode 100644
|
||||
index 0000000000..9c5524b0ed
|
||||
--- /dev/null
|
||||
+++ b/include/sysemu/iommufd.h
|
||||
@@ -0,0 +1,38 @@
|
||||
+#ifndef SYSEMU_IOMMUFD_H
|
||||
+#define SYSEMU_IOMMUFD_H
|
||||
+
|
||||
+#include "qom/object.h"
|
||||
+#include "qemu/thread.h"
|
||||
+#include "exec/hwaddr.h"
|
||||
+#include "exec/cpu-common.h"
|
||||
+
|
||||
+#define TYPE_IOMMUFD_BACKEND "iommufd"
|
||||
+OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND)
|
||||
+
|
||||
+struct IOMMUFDBackendClass {
|
||||
+ ObjectClass parent_class;
|
||||
+};
|
||||
+
|
||||
+struct IOMMUFDBackend {
|
||||
+ Object parent;
|
||||
+
|
||||
+ /*< protected >*/
|
||||
+ int fd; /* /dev/iommu file descriptor */
|
||||
+ bool owned; /* is the /dev/iommu opened internally */
|
||||
+ QemuMutex lock;
|
||||
+ uint32_t users;
|
||||
+
|
||||
+ /*< public >*/
|
||||
+};
|
||||
+
|
||||
+int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp);
|
||||
+void iommufd_backend_disconnect(IOMMUFDBackend *be);
|
||||
+
|
||||
+int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
|
||||
+ Error **errp);
|
||||
+void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id);
|
||||
+int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
|
||||
+ ram_addr_t size, void *vaddr, bool readonly);
|
||||
+int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
|
||||
+ hwaddr iova, ram_addr_t size);
|
||||
+#endif
|
||||
diff --git a/qapi/qom.json b/qapi/qom.json
|
||||
index c53ef978ff..95516ba325 100644
|
||||
--- a/qapi/qom.json
|
||||
+++ b/qapi/qom.json
|
||||
@@ -794,6 +794,23 @@
|
||||
{ 'struct': 'VfioUserServerProperties',
|
||||
'data': { 'socket': 'SocketAddress', 'device': 'str' } }
|
||||
|
||||
+##
|
||||
+# @IOMMUFDProperties:
|
||||
+#
|
||||
+# Properties for iommufd objects.
|
||||
+#
|
||||
+# @fd: file descriptor name previously passed via 'getfd' command,
|
||||
+# which represents a pre-opened /dev/iommu. This allows the
|
||||
+# iommufd object to be shared accross several subsystems
|
||||
+# (VFIO, VDPA, ...), and the file descriptor to be shared
|
||||
+# with other process, e.g. DPDK. (default: QEMU opens
|
||||
+# /dev/iommu by itself)
|
||||
+#
|
||||
+# Since: 9.0
|
||||
+##
|
||||
+{ 'struct': 'IOMMUFDProperties',
|
||||
+ 'data': { '*fd': 'str' } }
|
||||
+
|
||||
##
|
||||
# @RngProperties:
|
||||
#
|
||||
@@ -934,6 +951,7 @@
|
||||
'input-barrier',
|
||||
{ 'name': 'input-linux',
|
||||
'if': 'CONFIG_LINUX' },
|
||||
+ 'iommufd',
|
||||
'iothread',
|
||||
'main-loop',
|
||||
{ 'name': 'memory-backend-epc',
|
||||
@@ -1003,6 +1021,7 @@
|
||||
'input-barrier': 'InputBarrierProperties',
|
||||
'input-linux': { 'type': 'InputLinuxProperties',
|
||||
'if': 'CONFIG_LINUX' },
|
||||
+ 'iommufd': 'IOMMUFDProperties',
|
||||
'iothread': 'IothreadProperties',
|
||||
'main-loop': 'MainLoopProperties',
|
||||
'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties',
|
||||
diff --git a/qemu-options.hx b/qemu-options.hx
|
||||
index 557118cb1f..0814f43066 100644
|
||||
--- a/qemu-options.hx
|
||||
+++ b/qemu-options.hx
|
||||
@@ -5224,6 +5224,18 @@ SRST
|
||||
|
||||
The ``share`` boolean option is on by default with memfd.
|
||||
|
||||
+ ``-object iommufd,id=id[,fd=fd]``
|
||||
+ Creates an iommufd backend which allows control of DMA mapping
|
||||
+ through the ``/dev/iommu`` device.
|
||||
+
|
||||
+ The ``id`` parameter is a unique ID which frontends (such as
|
||||
+ vfio-pci of vdpa) will use to connect with the iommufd backend.
|
||||
+
|
||||
+ The ``fd`` parameter is an optional pre-opened file descriptor
|
||||
+ resulting from ``/dev/iommu`` opening. Usually the iommufd is shared
|
||||
+ across all subsystems, bringing the benefit of centralized
|
||||
+ reference counting.
|
||||
+
|
||||
``-object rng-builtin,id=id``
|
||||
Creates a random number generator backend which obtains entropy
|
||||
from QEMU builtin functions. The ``id`` parameter is a unique ID
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,47 +0,0 @@
|
||||
From da9a24793e876f6f2727d57f939d882be26a47b8 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Fri, 22 Dec 2023 08:55:23 +0100
|
||||
Subject: [PATCH 064/101] backends/iommufd: Remove check on number of backend
|
||||
users
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [63/67] ac4d4589d1f2de5ac3f0adfd8d1f27dbf6bbfdee (eauger1/centos-qemu-kvm)
|
||||
|
||||
QOM already has a ref count on objects and it will assert much
|
||||
earlier, when INT_MAX is reached.
|
||||
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit c2ab3a6f7411c895e538e8350fee8948ac07c1a0)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
backends/iommufd.c | 5 -----
|
||||
1 file changed, 5 deletions(-)
|
||||
|
||||
diff --git a/backends/iommufd.c b/backends/iommufd.c
|
||||
index ba58a0eb0d..393c0d9a37 100644
|
||||
--- a/backends/iommufd.c
|
||||
+++ b/backends/iommufd.c
|
||||
@@ -80,11 +80,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
|
||||
int fd, ret = 0;
|
||||
|
||||
qemu_mutex_lock(&be->lock);
|
||||
- if (be->users == UINT32_MAX) {
|
||||
- error_setg(errp, "too many connections");
|
||||
- ret = -E2BIG;
|
||||
- goto out;
|
||||
- }
|
||||
if (be->owned && !be->users) {
|
||||
fd = qemu_open_old("/dev/iommu", O_RDWR);
|
||||
if (fd < 0) {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,112 +0,0 @@
|
||||
From 92aff3cc1a412de01e9563802fa48848eae5283f Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Thu, 21 Dec 2023 16:58:41 +0100
|
||||
Subject: [PATCH 065/101] backends/iommufd: Remove mutex
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [64/67] 65518432b18f18ceadafe1b0698cdaa962e84f61 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Coverity reports a concurrent data access violation because be->users
|
||||
is being accessed in iommufd_backend_can_be_deleted() without holding
|
||||
the mutex.
|
||||
|
||||
However, these routines are called from the QEMU main thread when a
|
||||
device is created. In this case, the code paths should be protected by
|
||||
the BQL lock and it should be safe to drop the IOMMUFD backend mutex.
|
||||
Simply remove it.
|
||||
|
||||
Fixes: CID 1531550
|
||||
Fixes: CID 1531549
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit 19368b1905b4b917e915526fcbd5bfa3f7439451)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
backends/iommufd.c | 7 -------
|
||||
include/sysemu/iommufd.h | 2 --
|
||||
2 files changed, 9 deletions(-)
|
||||
|
||||
diff --git a/backends/iommufd.c b/backends/iommufd.c
|
||||
index 393c0d9a37..1ef683c7b0 100644
|
||||
--- a/backends/iommufd.c
|
||||
+++ b/backends/iommufd.c
|
||||
@@ -29,7 +29,6 @@ static void iommufd_backend_init(Object *obj)
|
||||
be->fd = -1;
|
||||
be->users = 0;
|
||||
be->owned = true;
|
||||
- qemu_mutex_init(&be->lock);
|
||||
}
|
||||
|
||||
static void iommufd_backend_finalize(Object *obj)
|
||||
@@ -52,10 +51,8 @@ static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp)
|
||||
error_prepend(errp, "Could not parse remote object fd %s:", str);
|
||||
return;
|
||||
}
|
||||
- qemu_mutex_lock(&be->lock);
|
||||
be->fd = fd;
|
||||
be->owned = false;
|
||||
- qemu_mutex_unlock(&be->lock);
|
||||
trace_iommu_backend_set_fd(be->fd);
|
||||
}
|
||||
|
||||
@@ -79,7 +76,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
|
||||
{
|
||||
int fd, ret = 0;
|
||||
|
||||
- qemu_mutex_lock(&be->lock);
|
||||
if (be->owned && !be->users) {
|
||||
fd = qemu_open_old("/dev/iommu", O_RDWR);
|
||||
if (fd < 0) {
|
||||
@@ -93,13 +89,11 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
|
||||
out:
|
||||
trace_iommufd_backend_connect(be->fd, be->owned,
|
||||
be->users, ret);
|
||||
- qemu_mutex_unlock(&be->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void iommufd_backend_disconnect(IOMMUFDBackend *be)
|
||||
{
|
||||
- qemu_mutex_lock(&be->lock);
|
||||
if (!be->users) {
|
||||
goto out;
|
||||
}
|
||||
@@ -110,7 +104,6 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be)
|
||||
}
|
||||
out:
|
||||
trace_iommufd_backend_disconnect(be->fd, be->users);
|
||||
- qemu_mutex_unlock(&be->lock);
|
||||
}
|
||||
|
||||
int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
|
||||
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
|
||||
index 9c5524b0ed..9af27ebd6c 100644
|
||||
--- a/include/sysemu/iommufd.h
|
||||
+++ b/include/sysemu/iommufd.h
|
||||
@@ -2,7 +2,6 @@
|
||||
#define SYSEMU_IOMMUFD_H
|
||||
|
||||
#include "qom/object.h"
|
||||
-#include "qemu/thread.h"
|
||||
#include "exec/hwaddr.h"
|
||||
#include "exec/cpu-common.h"
|
||||
|
||||
@@ -19,7 +18,6 @@ struct IOMMUFDBackend {
|
||||
/*< protected >*/
|
||||
int fd; /* /dev/iommu file descriptor */
|
||||
bool owned; /* is the /dev/iommu opened internally */
|
||||
- QemuMutex lock;
|
||||
uint32_t users;
|
||||
|
||||
/*< public >*/
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,104 +0,0 @@
|
||||
From afa842e9fdf6e1d6e5d5785679a22779632142bd Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Czenczek <hreitz@redhat.com>
|
||||
Date: Fri, 2 Feb 2024 15:47:54 +0100
|
||||
Subject: [PATCH 03/22] block-backend: Allow concurrent context changes
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 222: Allow concurrent BlockBackend context changes
|
||||
RH-Jira: RHEL-24593
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [1/2] 9e1b535f60f7afa94a0817dc3e71136e41631c71 (hreitz/qemu-kvm-c-9-s)
|
||||
|
||||
Since AioContext locks have been removed, a BlockBackend's AioContext
|
||||
may really change at any time (only exception is that it is often
|
||||
confined to a drained section, as noted in this patch). Therefore,
|
||||
blk_get_aio_context() cannot rely on its root node's context always
|
||||
matching that of the BlockBackend.
|
||||
|
||||
In practice, whether they match does not matter anymore anyway: Requests
|
||||
can be sent to BDSs from any context, so anyone who requests the BB's
|
||||
context should have no reason to require the root node to have the same
|
||||
context. Therefore, we can and should remove the assertion to that
|
||||
effect.
|
||||
|
||||
In addition, because the context can be set and queried from different
|
||||
threads concurrently, it has to be accessed with atomic operations.
|
||||
|
||||
Buglink: https://issues.redhat.com/browse/RHEL-19381
|
||||
Suggested-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Message-ID: <20240202144755.671354-2-hreitz@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit ad893672027ffe26db498947d70cde6d4f58a111)
|
||||
---
|
||||
block/block-backend.c | 22 +++++++++++-----------
|
||||
1 file changed, 11 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/block/block-backend.c b/block/block-backend.c
|
||||
index 209eb07528..9c4de79e6b 100644
|
||||
--- a/block/block-backend.c
|
||||
+++ b/block/block-backend.c
|
||||
@@ -44,7 +44,7 @@ struct BlockBackend {
|
||||
char *name;
|
||||
int refcnt;
|
||||
BdrvChild *root;
|
||||
- AioContext *ctx;
|
||||
+ AioContext *ctx; /* access with atomic operations only */
|
||||
DriveInfo *legacy_dinfo; /* null unless created by drive_new() */
|
||||
QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */
|
||||
QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
|
||||
@@ -2414,22 +2414,22 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason)
|
||||
}
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * Return BB's current AioContext. Note that this context may change
|
||||
+ * concurrently at any time, with one exception: If the BB has a root node
|
||||
+ * attached, its context will only change through bdrv_try_change_aio_context(),
|
||||
+ * which creates a drained section. Therefore, incrementing such a BB's
|
||||
+ * in-flight counter will prevent its context from changing.
|
||||
+ */
|
||||
AioContext *blk_get_aio_context(BlockBackend *blk)
|
||||
{
|
||||
- BlockDriverState *bs;
|
||||
IO_CODE();
|
||||
|
||||
if (!blk) {
|
||||
return qemu_get_aio_context();
|
||||
}
|
||||
|
||||
- bs = blk_bs(blk);
|
||||
- if (bs) {
|
||||
- AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
|
||||
- assert(ctx == blk->ctx);
|
||||
- }
|
||||
-
|
||||
- return blk->ctx;
|
||||
+ return qatomic_read(&blk->ctx);
|
||||
}
|
||||
|
||||
int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
|
||||
@@ -2442,7 +2442,7 @@ int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
if (!bs) {
|
||||
- blk->ctx = new_context;
|
||||
+ qatomic_set(&blk->ctx, new_context);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2471,7 +2471,7 @@ static void blk_root_set_aio_ctx_commit(void *opaque)
|
||||
AioContext *new_context = s->new_ctx;
|
||||
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
|
||||
|
||||
- blk->ctx = new_context;
|
||||
+ qatomic_set(&blk->ctx, new_context);
|
||||
if (tgm->throttle_state) {
|
||||
throttle_group_detach_aio_context(tgm);
|
||||
throttle_group_attach_aio_context(tgm, new_context);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,69 +0,0 @@
|
||||
From b1a68aebadecd7d339cf5eaffeda15099c998528 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 12 Sep 2023 19:10:37 -0400
|
||||
Subject: [PATCH 095/101] block-coroutine-wrapper: use
|
||||
qemu_get_current_aio_context()
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [26/26] cde767bcdc626e90721792e3889952057a548ac5 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
Use qemu_get_current_aio_context() in mixed wrappers and coroutine
|
||||
wrappers so that code runs in the caller's AioContext instead of moving
|
||||
to the BlockDriverState's AioContext. This change is necessary for the
|
||||
multi-queue block layer where any thread can call into the block layer.
|
||||
|
||||
Most wrappers are IO_CODE where it's safe to use the current AioContext
|
||||
nowadays. BlockDrivers and the core block layer use their own locks and
|
||||
no longer depend on the AioContext lock for thread-safety.
|
||||
|
||||
The bdrv_create() wrapper invokes GLOBAL_STATE code. Using the current
|
||||
AioContext is safe because this code is only called with the BQL held
|
||||
from the main loop thread.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20230912231037.826804-6-stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
scripts/block-coroutine-wrapper.py | 6 ++----
|
||||
1 file changed, 2 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py
|
||||
index c9c09fcacd..dbbde99e39 100644
|
||||
--- a/scripts/block-coroutine-wrapper.py
|
||||
+++ b/scripts/block-coroutine-wrapper.py
|
||||
@@ -92,8 +92,6 @@ def __init__(self, wrapper_type: str, return_type: str, name: str,
|
||||
f"{self.name}")
|
||||
self.target_name = f'{subsystem}_{subname}'
|
||||
|
||||
- self.ctx = self.gen_ctx()
|
||||
-
|
||||
self.get_result = 's->ret = '
|
||||
self.ret = 'return s.ret;'
|
||||
self.co_ret = 'return '
|
||||
@@ -167,7 +165,7 @@ def create_mixed_wrapper(func: FuncDecl) -> str:
|
||||
{func.co_ret}{name}({ func.gen_list('{name}') });
|
||||
}} else {{
|
||||
{struct_name} s = {{
|
||||
- .poll_state.ctx = {func.ctx},
|
||||
+ .poll_state.ctx = qemu_get_current_aio_context(),
|
||||
.poll_state.in_progress = true,
|
||||
|
||||
{ func.gen_block(' .{name} = {name},') }
|
||||
@@ -191,7 +189,7 @@ def create_co_wrapper(func: FuncDecl) -> str:
|
||||
{func.return_type} {func.name}({ func.gen_list('{decl}') })
|
||||
{{
|
||||
{struct_name} s = {{
|
||||
- .poll_state.ctx = {func.ctx},
|
||||
+ .poll_state.ctx = qemu_get_current_aio_context(),
|
||||
.poll_state.in_progress = true,
|
||||
|
||||
{ func.gen_block(' .{name} = {name},') }
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,330 @@
|
||||
From a67edfb4b591acdffc5b4987601a30224376996f Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Mon, 27 May 2024 11:58:50 -0400
|
||||
Subject: [PATCH 4/5] block/crypto: create ciphers on demand
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 251: block/crypto: create ciphers on demand
|
||||
RH-Jira: RHEL-36159
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/2] 22a4c87fef774cad98a6f5a79f27df50a208013d (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
Ciphers are pre-allocated by qcrypto_block_init_cipher() depending on
|
||||
the given number of threads. The -device
|
||||
virtio-blk-pci,iothread-vq-mapping= feature allows users to assign
|
||||
multiple IOThreads to a virtio-blk device, but the association between
|
||||
the virtio-blk device and the block driver happens after the block
|
||||
driver is already open.
|
||||
|
||||
When the number of threads given to qcrypto_block_init_cipher() is
|
||||
smaller than the actual number of threads at runtime, the
|
||||
block->n_free_ciphers > 0 assertion in qcrypto_block_pop_cipher() can
|
||||
fail.
|
||||
|
||||
Get rid of qcrypto_block_init_cipher() n_thread's argument and allocate
|
||||
ciphers on demand.
|
||||
|
||||
Reported-by: Qing Wang <qinwang@redhat.com>
|
||||
Buglink: https://issues.redhat.com/browse/RHEL-36159
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20240527155851.892885-2-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Acked-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit af206c284e4c1b17cdfb0f17e898b288c0fc1751)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
crypto/block-luks.c | 3 +-
|
||||
crypto/block-qcow.c | 2 +-
|
||||
crypto/block.c | 111 ++++++++++++++++++++++++++------------------
|
||||
crypto/blockpriv.h | 12 +++--
|
||||
4 files changed, 78 insertions(+), 50 deletions(-)
|
||||
|
||||
diff --git a/crypto/block-luks.c b/crypto/block-luks.c
|
||||
index 3ee928fb5a..3357852c0a 100644
|
||||
--- a/crypto/block-luks.c
|
||||
+++ b/crypto/block-luks.c
|
||||
@@ -1262,7 +1262,6 @@ qcrypto_block_luks_open(QCryptoBlock *block,
|
||||
luks->cipher_mode,
|
||||
masterkey,
|
||||
luks->header.master_key_len,
|
||||
- n_threads,
|
||||
errp) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
@@ -1456,7 +1455,7 @@ qcrypto_block_luks_create(QCryptoBlock *block,
|
||||
/* Setup the block device payload encryption objects */
|
||||
if (qcrypto_block_init_cipher(block, luks_opts.cipher_alg,
|
||||
luks_opts.cipher_mode, masterkey,
|
||||
- luks->header.master_key_len, 1, errp) < 0) {
|
||||
+ luks->header.master_key_len, errp) < 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
diff --git a/crypto/block-qcow.c b/crypto/block-qcow.c
|
||||
index 4d7cf36a8f..02305058e3 100644
|
||||
--- a/crypto/block-qcow.c
|
||||
+++ b/crypto/block-qcow.c
|
||||
@@ -75,7 +75,7 @@ qcrypto_block_qcow_init(QCryptoBlock *block,
|
||||
ret = qcrypto_block_init_cipher(block, QCRYPTO_CIPHER_ALG_AES_128,
|
||||
QCRYPTO_CIPHER_MODE_CBC,
|
||||
keybuf, G_N_ELEMENTS(keybuf),
|
||||
- n_threads, errp);
|
||||
+ errp);
|
||||
if (ret < 0) {
|
||||
ret = -ENOTSUP;
|
||||
goto fail;
|
||||
diff --git a/crypto/block.c b/crypto/block.c
|
||||
index 506ea1d1a3..ba6d1cebc7 100644
|
||||
--- a/crypto/block.c
|
||||
+++ b/crypto/block.c
|
||||
@@ -20,6 +20,7 @@
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qapi/error.h"
|
||||
+#include "qemu/lockable.h"
|
||||
#include "blockpriv.h"
|
||||
#include "block-qcow.h"
|
||||
#include "block-luks.h"
|
||||
@@ -57,6 +58,8 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,
|
||||
{
|
||||
QCryptoBlock *block = g_new0(QCryptoBlock, 1);
|
||||
|
||||
+ qemu_mutex_init(&block->mutex);
|
||||
+
|
||||
block->format = options->format;
|
||||
|
||||
if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
|
||||
@@ -76,8 +79,6 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
- qemu_mutex_init(&block->mutex);
|
||||
-
|
||||
return block;
|
||||
}
|
||||
|
||||
@@ -92,6 +93,8 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
|
||||
{
|
||||
QCryptoBlock *block = g_new0(QCryptoBlock, 1);
|
||||
|
||||
+ qemu_mutex_init(&block->mutex);
|
||||
+
|
||||
block->format = options->format;
|
||||
|
||||
if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
|
||||
@@ -111,8 +114,6 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
- qemu_mutex_init(&block->mutex);
|
||||
-
|
||||
return block;
|
||||
}
|
||||
|
||||
@@ -227,37 +228,42 @@ QCryptoCipher *qcrypto_block_get_cipher(QCryptoBlock *block)
|
||||
* This function is used only in test with one thread (it's safe to skip
|
||||
* pop/push interface), so it's enough to assert it here:
|
||||
*/
|
||||
- assert(block->n_ciphers <= 1);
|
||||
- return block->ciphers ? block->ciphers[0] : NULL;
|
||||
+ assert(block->max_free_ciphers <= 1);
|
||||
+ return block->free_ciphers ? block->free_ciphers[0] : NULL;
|
||||
}
|
||||
|
||||
|
||||
-static QCryptoCipher *qcrypto_block_pop_cipher(QCryptoBlock *block)
|
||||
+static QCryptoCipher *qcrypto_block_pop_cipher(QCryptoBlock *block,
|
||||
+ Error **errp)
|
||||
{
|
||||
- QCryptoCipher *cipher;
|
||||
-
|
||||
- qemu_mutex_lock(&block->mutex);
|
||||
-
|
||||
- assert(block->n_free_ciphers > 0);
|
||||
- block->n_free_ciphers--;
|
||||
- cipher = block->ciphers[block->n_free_ciphers];
|
||||
-
|
||||
- qemu_mutex_unlock(&block->mutex);
|
||||
+ /* Usually there is a free cipher available */
|
||||
+ WITH_QEMU_LOCK_GUARD(&block->mutex) {
|
||||
+ if (block->n_free_ciphers > 0) {
|
||||
+ block->n_free_ciphers--;
|
||||
+ return block->free_ciphers[block->n_free_ciphers];
|
||||
+ }
|
||||
+ }
|
||||
|
||||
- return cipher;
|
||||
+ /* Otherwise allocate a new cipher */
|
||||
+ return qcrypto_cipher_new(block->alg, block->mode, block->key,
|
||||
+ block->nkey, errp);
|
||||
}
|
||||
|
||||
|
||||
static void qcrypto_block_push_cipher(QCryptoBlock *block,
|
||||
QCryptoCipher *cipher)
|
||||
{
|
||||
- qemu_mutex_lock(&block->mutex);
|
||||
+ QEMU_LOCK_GUARD(&block->mutex);
|
||||
|
||||
- assert(block->n_free_ciphers < block->n_ciphers);
|
||||
- block->ciphers[block->n_free_ciphers] = cipher;
|
||||
- block->n_free_ciphers++;
|
||||
+ if (block->n_free_ciphers == block->max_free_ciphers) {
|
||||
+ block->max_free_ciphers++;
|
||||
+ block->free_ciphers = g_renew(QCryptoCipher *,
|
||||
+ block->free_ciphers,
|
||||
+ block->max_free_ciphers);
|
||||
+ }
|
||||
|
||||
- qemu_mutex_unlock(&block->mutex);
|
||||
+ block->free_ciphers[block->n_free_ciphers] = cipher;
|
||||
+ block->n_free_ciphers++;
|
||||
}
|
||||
|
||||
|
||||
@@ -265,24 +271,31 @@ int qcrypto_block_init_cipher(QCryptoBlock *block,
|
||||
QCryptoCipherAlgorithm alg,
|
||||
QCryptoCipherMode mode,
|
||||
const uint8_t *key, size_t nkey,
|
||||
- size_t n_threads, Error **errp)
|
||||
+ Error **errp)
|
||||
{
|
||||
- size_t i;
|
||||
+ QCryptoCipher *cipher;
|
||||
|
||||
- assert(!block->ciphers && !block->n_ciphers && !block->n_free_ciphers);
|
||||
+ assert(!block->free_ciphers && !block->max_free_ciphers &&
|
||||
+ !block->n_free_ciphers);
|
||||
|
||||
- block->ciphers = g_new0(QCryptoCipher *, n_threads);
|
||||
+ /* Stash away cipher parameters for qcrypto_block_pop_cipher() */
|
||||
+ block->alg = alg;
|
||||
+ block->mode = mode;
|
||||
+ block->key = g_memdup2(key, nkey);
|
||||
+ block->nkey = nkey;
|
||||
|
||||
- for (i = 0; i < n_threads; i++) {
|
||||
- block->ciphers[i] = qcrypto_cipher_new(alg, mode, key, nkey, errp);
|
||||
- if (!block->ciphers[i]) {
|
||||
- qcrypto_block_free_cipher(block);
|
||||
- return -1;
|
||||
- }
|
||||
- block->n_ciphers++;
|
||||
- block->n_free_ciphers++;
|
||||
+ /*
|
||||
+ * Create a new cipher to validate the parameters now. This reduces the
|
||||
+ * chance of cipher creation failing at I/O time.
|
||||
+ */
|
||||
+ cipher = qcrypto_block_pop_cipher(block, errp);
|
||||
+ if (!cipher) {
|
||||
+ g_free(block->key);
|
||||
+ block->key = NULL;
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
+ qcrypto_block_push_cipher(block, cipher);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -291,19 +304,23 @@ void qcrypto_block_free_cipher(QCryptoBlock *block)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
- if (!block->ciphers) {
|
||||
+ g_free(block->key);
|
||||
+ block->key = NULL;
|
||||
+
|
||||
+ if (!block->free_ciphers) {
|
||||
return;
|
||||
}
|
||||
|
||||
- assert(block->n_ciphers == block->n_free_ciphers);
|
||||
+ /* All popped ciphers were eventually pushed back */
|
||||
+ assert(block->n_free_ciphers == block->max_free_ciphers);
|
||||
|
||||
- for (i = 0; i < block->n_ciphers; i++) {
|
||||
- qcrypto_cipher_free(block->ciphers[i]);
|
||||
+ for (i = 0; i < block->max_free_ciphers; i++) {
|
||||
+ qcrypto_cipher_free(block->free_ciphers[i]);
|
||||
}
|
||||
|
||||
- g_free(block->ciphers);
|
||||
- block->ciphers = NULL;
|
||||
- block->n_ciphers = block->n_free_ciphers = 0;
|
||||
+ g_free(block->free_ciphers);
|
||||
+ block->free_ciphers = NULL;
|
||||
+ block->max_free_ciphers = block->n_free_ciphers = 0;
|
||||
}
|
||||
|
||||
QCryptoIVGen *qcrypto_block_get_ivgen(QCryptoBlock *block)
|
||||
@@ -311,7 +328,7 @@ QCryptoIVGen *qcrypto_block_get_ivgen(QCryptoBlock *block)
|
||||
/* ivgen should be accessed under mutex. However, this function is used only
|
||||
* in test with one thread, so it's enough to assert it here:
|
||||
*/
|
||||
- assert(block->n_ciphers <= 1);
|
||||
+ assert(block->max_free_ciphers <= 1);
|
||||
return block->ivgen;
|
||||
}
|
||||
|
||||
@@ -446,7 +463,10 @@ int qcrypto_block_decrypt_helper(QCryptoBlock *block,
|
||||
Error **errp)
|
||||
{
|
||||
int ret;
|
||||
- QCryptoCipher *cipher = qcrypto_block_pop_cipher(block);
|
||||
+ QCryptoCipher *cipher = qcrypto_block_pop_cipher(block, errp);
|
||||
+ if (!cipher) {
|
||||
+ return -1;
|
||||
+ }
|
||||
|
||||
ret = do_qcrypto_block_cipher_encdec(cipher, block->niv, block->ivgen,
|
||||
&block->mutex, sectorsize, offset, buf,
|
||||
@@ -465,7 +485,10 @@ int qcrypto_block_encrypt_helper(QCryptoBlock *block,
|
||||
Error **errp)
|
||||
{
|
||||
int ret;
|
||||
- QCryptoCipher *cipher = qcrypto_block_pop_cipher(block);
|
||||
+ QCryptoCipher *cipher = qcrypto_block_pop_cipher(block, errp);
|
||||
+ if (!cipher) {
|
||||
+ return -1;
|
||||
+ }
|
||||
|
||||
ret = do_qcrypto_block_cipher_encdec(cipher, block->niv, block->ivgen,
|
||||
&block->mutex, sectorsize, offset, buf,
|
||||
diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h
|
||||
index 836f3b4726..4bf6043d5d 100644
|
||||
--- a/crypto/blockpriv.h
|
||||
+++ b/crypto/blockpriv.h
|
||||
@@ -32,8 +32,14 @@ struct QCryptoBlock {
|
||||
const QCryptoBlockDriver *driver;
|
||||
void *opaque;
|
||||
|
||||
- QCryptoCipher **ciphers;
|
||||
- size_t n_ciphers;
|
||||
+ /* Cipher parameters */
|
||||
+ QCryptoCipherAlgorithm alg;
|
||||
+ QCryptoCipherMode mode;
|
||||
+ uint8_t *key;
|
||||
+ size_t nkey;
|
||||
+
|
||||
+ QCryptoCipher **free_ciphers;
|
||||
+ size_t max_free_ciphers;
|
||||
size_t n_free_ciphers;
|
||||
QCryptoIVGen *ivgen;
|
||||
QemuMutex mutex;
|
||||
@@ -130,7 +136,7 @@ int qcrypto_block_init_cipher(QCryptoBlock *block,
|
||||
QCryptoCipherAlgorithm alg,
|
||||
QCryptoCipherMode mode,
|
||||
const uint8_t *key, size_t nkey,
|
||||
- size_t n_threads, Error **errp);
|
||||
+ Error **errp);
|
||||
|
||||
void qcrypto_block_free_cipher(QCryptoBlock *block);
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,217 +0,0 @@
|
||||
From 25cce5df341861e8ba8ec57722558e2dee3ce56a Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Thu, 14 Sep 2023 10:00:58 -0400
|
||||
Subject: [PATCH 073/101] block/file-posix: set up Linux AIO and io_uring in
|
||||
the current thread
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [4/26] 74c7daf805daefe706378308c3afeb28d861164b (kmwolf/centos-qemu-kvm)
|
||||
|
||||
The file-posix block driver currently only sets up Linux AIO and
|
||||
io_uring in the BDS's AioContext. In the multi-queue block layer we must
|
||||
be able to submit I/O requests in AioContexts that do not have Linux AIO
|
||||
and io_uring set up yet since any thread can call into the block driver.
|
||||
|
||||
Set up Linux AIO and io_uring for the current AioContext during request
|
||||
submission. We lose the ability to return an error from
|
||||
.bdrv_file_open() when Linux AIO and io_uring setup fails (e.g. due to
|
||||
resource limits). Instead the user only gets warnings and we fall back
|
||||
to aio=threads. This is still better than a fatal error after startup.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20230914140101.1065008-2-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block/file-posix.c | 103 ++++++++++++++++++++++-----------------------
|
||||
1 file changed, 51 insertions(+), 52 deletions(-)
|
||||
|
||||
diff --git a/block/file-posix.c b/block/file-posix.c
|
||||
index b862406c71..35684f7e21 100644
|
||||
--- a/block/file-posix.c
|
||||
+++ b/block/file-posix.c
|
||||
@@ -712,17 +712,11 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
|
||||
|
||||
#ifdef CONFIG_LINUX_AIO
|
||||
/* Currently Linux does AIO only for files opened with O_DIRECT */
|
||||
- if (s->use_linux_aio) {
|
||||
- if (!(s->open_flags & O_DIRECT)) {
|
||||
- error_setg(errp, "aio=native was specified, but it requires "
|
||||
- "cache.direct=on, which was not specified.");
|
||||
- ret = -EINVAL;
|
||||
- goto fail;
|
||||
- }
|
||||
- if (!aio_setup_linux_aio(bdrv_get_aio_context(bs), errp)) {
|
||||
- error_prepend(errp, "Unable to use native AIO: ");
|
||||
- goto fail;
|
||||
- }
|
||||
+ if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) {
|
||||
+ error_setg(errp, "aio=native was specified, but it requires "
|
||||
+ "cache.direct=on, which was not specified.");
|
||||
+ ret = -EINVAL;
|
||||
+ goto fail;
|
||||
}
|
||||
#else
|
||||
if (s->use_linux_aio) {
|
||||
@@ -733,14 +727,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
|
||||
}
|
||||
#endif /* !defined(CONFIG_LINUX_AIO) */
|
||||
|
||||
-#ifdef CONFIG_LINUX_IO_URING
|
||||
- if (s->use_linux_io_uring) {
|
||||
- if (!aio_setup_linux_io_uring(bdrv_get_aio_context(bs), errp)) {
|
||||
- error_prepend(errp, "Unable to use io_uring: ");
|
||||
- goto fail;
|
||||
- }
|
||||
- }
|
||||
-#else
|
||||
+#ifndef CONFIG_LINUX_IO_URING
|
||||
if (s->use_linux_io_uring) {
|
||||
error_setg(errp, "aio=io_uring was specified, but is not supported "
|
||||
"in this build.");
|
||||
@@ -2444,6 +2431,48 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
|
||||
return true;
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_LINUX_IO_URING
|
||||
+static inline bool raw_check_linux_io_uring(BDRVRawState *s)
|
||||
+{
|
||||
+ Error *local_err = NULL;
|
||||
+ AioContext *ctx;
|
||||
+
|
||||
+ if (!s->use_linux_io_uring) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ ctx = qemu_get_current_aio_context();
|
||||
+ if (unlikely(!aio_setup_linux_io_uring(ctx, &local_err))) {
|
||||
+ error_reportf_err(local_err, "Unable to use linux io_uring, "
|
||||
+ "falling back to thread pool: ");
|
||||
+ s->use_linux_io_uring = false;
|
||||
+ return false;
|
||||
+ }
|
||||
+ return true;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+#ifdef CONFIG_LINUX_AIO
|
||||
+static inline bool raw_check_linux_aio(BDRVRawState *s)
|
||||
+{
|
||||
+ Error *local_err = NULL;
|
||||
+ AioContext *ctx;
|
||||
+
|
||||
+ if (!s->use_linux_aio) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ ctx = qemu_get_current_aio_context();
|
||||
+ if (unlikely(!aio_setup_linux_aio(ctx, &local_err))) {
|
||||
+ error_reportf_err(local_err, "Unable to use Linux AIO, "
|
||||
+ "falling back to thread pool: ");
|
||||
+ s->use_linux_aio = false;
|
||||
+ return false;
|
||||
+ }
|
||||
+ return true;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
|
||||
uint64_t bytes, QEMUIOVector *qiov, int type)
|
||||
{
|
||||
@@ -2474,13 +2503,13 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
|
||||
if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) {
|
||||
type |= QEMU_AIO_MISALIGNED;
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
- } else if (s->use_linux_io_uring) {
|
||||
+ } else if (raw_check_linux_io_uring(s)) {
|
||||
assert(qiov->size == bytes);
|
||||
ret = luring_co_submit(bs, s->fd, offset, qiov, type);
|
||||
goto out;
|
||||
#endif
|
||||
#ifdef CONFIG_LINUX_AIO
|
||||
- } else if (s->use_linux_aio) {
|
||||
+ } else if (raw_check_linux_aio(s)) {
|
||||
assert(qiov->size == bytes);
|
||||
ret = laio_co_submit(s->fd, offset, qiov, type,
|
||||
s->aio_max_batch);
|
||||
@@ -2567,39 +2596,13 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
|
||||
};
|
||||
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
- if (s->use_linux_io_uring) {
|
||||
+ if (raw_check_linux_io_uring(s)) {
|
||||
return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
|
||||
}
|
||||
#endif
|
||||
return raw_thread_pool_submit(handle_aiocb_flush, &acb);
|
||||
}
|
||||
|
||||
-static void raw_aio_attach_aio_context(BlockDriverState *bs,
|
||||
- AioContext *new_context)
|
||||
-{
|
||||
- BDRVRawState __attribute__((unused)) *s = bs->opaque;
|
||||
-#ifdef CONFIG_LINUX_AIO
|
||||
- if (s->use_linux_aio) {
|
||||
- Error *local_err = NULL;
|
||||
- if (!aio_setup_linux_aio(new_context, &local_err)) {
|
||||
- error_reportf_err(local_err, "Unable to use native AIO, "
|
||||
- "falling back to thread pool: ");
|
||||
- s->use_linux_aio = false;
|
||||
- }
|
||||
- }
|
||||
-#endif
|
||||
-#ifdef CONFIG_LINUX_IO_URING
|
||||
- if (s->use_linux_io_uring) {
|
||||
- Error *local_err = NULL;
|
||||
- if (!aio_setup_linux_io_uring(new_context, &local_err)) {
|
||||
- error_reportf_err(local_err, "Unable to use linux io_uring, "
|
||||
- "falling back to thread pool: ");
|
||||
- s->use_linux_io_uring = false;
|
||||
- }
|
||||
- }
|
||||
-#endif
|
||||
-}
|
||||
-
|
||||
static void raw_close(BlockDriverState *bs)
|
||||
{
|
||||
BDRVRawState *s = bs->opaque;
|
||||
@@ -3896,7 +3899,6 @@ BlockDriver bdrv_file = {
|
||||
.bdrv_co_copy_range_from = raw_co_copy_range_from,
|
||||
.bdrv_co_copy_range_to = raw_co_copy_range_to,
|
||||
.bdrv_refresh_limits = raw_refresh_limits,
|
||||
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||||
|
||||
.bdrv_co_truncate = raw_co_truncate,
|
||||
.bdrv_co_getlength = raw_co_getlength,
|
||||
@@ -4266,7 +4268,6 @@ static BlockDriver bdrv_host_device = {
|
||||
.bdrv_co_copy_range_from = raw_co_copy_range_from,
|
||||
.bdrv_co_copy_range_to = raw_co_copy_range_to,
|
||||
.bdrv_refresh_limits = raw_refresh_limits,
|
||||
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||||
|
||||
.bdrv_co_truncate = raw_co_truncate,
|
||||
.bdrv_co_getlength = raw_co_getlength,
|
||||
@@ -4402,7 +4403,6 @@ static BlockDriver bdrv_host_cdrom = {
|
||||
.bdrv_co_pwritev = raw_co_pwritev,
|
||||
.bdrv_co_flush_to_disk = raw_co_flush_to_disk,
|
||||
.bdrv_refresh_limits = cdrom_refresh_limits,
|
||||
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||||
|
||||
.bdrv_co_truncate = raw_co_truncate,
|
||||
.bdrv_co_getlength = raw_co_getlength,
|
||||
@@ -4528,7 +4528,6 @@ static BlockDriver bdrv_host_cdrom = {
|
||||
.bdrv_co_pwritev = raw_co_pwritev,
|
||||
.bdrv_co_flush_to_disk = raw_co_flush_to_disk,
|
||||
.bdrv_refresh_limits = cdrom_refresh_limits,
|
||||
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||||
|
||||
.bdrv_co_truncate = raw_co_truncate,
|
||||
.bdrv_co_getlength = raw_co_getlength,
|
||||
--
|
||||
2.39.3
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,97 +0,0 @@
|
||||
From d0514c7d5d6cc1aa140119c95d5ea2c1591b01e9 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 5 Dec 2023 13:20:04 -0500
|
||||
Subject: [PATCH 087/101] block: remove bdrv_co_lock()
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [18/26] a303f861ea5e84d8e89fd51e530fd0cb2da17b89 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
The bdrv_co_lock() and bdrv_co_unlock() functions are already no-ops.
|
||||
Remove them.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20231205182011.1976568-8-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block.c | 10 ----------
|
||||
blockdev.c | 5 -----
|
||||
include/block/block-global-state.h | 14 --------------
|
||||
3 files changed, 29 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 91ace5d2d5..434b7f4d72 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -7431,16 +7431,6 @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx)
|
||||
bdrv_dec_in_flight(bs);
|
||||
}
|
||||
|
||||
-void coroutine_fn bdrv_co_lock(BlockDriverState *bs)
|
||||
-{
|
||||
- /* TODO removed in next patch */
|
||||
-}
|
||||
-
|
||||
-void coroutine_fn bdrv_co_unlock(BlockDriverState *bs)
|
||||
-{
|
||||
- /* TODO removed in next patch */
|
||||
-}
|
||||
-
|
||||
static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
|
||||
{
|
||||
GLOBAL_STATE_CODE();
|
||||
diff --git a/blockdev.c b/blockdev.c
|
||||
index 5d8b3a23eb..3a5e7222ec 100644
|
||||
--- a/blockdev.c
|
||||
+++ b/blockdev.c
|
||||
@@ -2264,18 +2264,13 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
|
||||
return;
|
||||
}
|
||||
|
||||
- bdrv_co_lock(bs);
|
||||
bdrv_drained_begin(bs);
|
||||
- bdrv_co_unlock(bs);
|
||||
|
||||
old_ctx = bdrv_co_enter(bs);
|
||||
blk_co_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp);
|
||||
bdrv_co_leave(bs, old_ctx);
|
||||
|
||||
- bdrv_co_lock(bs);
|
||||
bdrv_drained_end(bs);
|
||||
- bdrv_co_unlock(bs);
|
||||
-
|
||||
blk_co_unref(blk);
|
||||
}
|
||||
|
||||
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
|
||||
index 0327f1c605..4ec0b217f0 100644
|
||||
--- a/include/block/block-global-state.h
|
||||
+++ b/include/block/block-global-state.h
|
||||
@@ -267,20 +267,6 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
|
||||
int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
|
||||
bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
|
||||
|
||||
-/**
|
||||
- * Locks the AioContext of @bs if it's not the current AioContext. This avoids
|
||||
- * double locking which could lead to deadlocks: This is a coroutine_fn, so we
|
||||
- * know we already own the lock of the current AioContext.
|
||||
- *
|
||||
- * May only be called in the main thread.
|
||||
- */
|
||||
-void coroutine_fn bdrv_co_lock(BlockDriverState *bs);
|
||||
-
|
||||
-/**
|
||||
- * Unlocks the AioContext of @bs if it's not the current AioContext.
|
||||
- */
|
||||
-void coroutine_fn bdrv_co_unlock(BlockDriverState *bs);
|
||||
-
|
||||
bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx,
|
||||
GHashTable *visited, Transaction *tran,
|
||||
Error **errp);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,411 +0,0 @@
|
||||
From dc4eb64185957a01948217814478abc450ce5f26 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 5 Dec 2023 13:20:11 -0500
|
||||
Subject: [PATCH 094/101] block: remove outdated AioContext locking comments
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [25/26] 395e18fb40d28d4bc961acee1a00da7f60748076 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
The AioContext lock no longer exists.
|
||||
|
||||
There is one noteworthy change:
|
||||
|
||||
- * More specifically, these functions use BDRV_POLL_WHILE(bs), which
|
||||
- * requires the caller to be either in the main thread and hold
|
||||
- * the BlockdriverState (bs) AioContext lock, or directly in the
|
||||
- * home thread that runs the bs AioContext. Calling them from
|
||||
- * another thread in another AioContext would cause deadlocks.
|
||||
+ * More specifically, these functions use BDRV_POLL_WHILE(bs), which requires
|
||||
+ * the caller to be either in the main thread or directly in the home thread
|
||||
+ * that runs the bs AioContext. Calling them from another thread in another
|
||||
+ * AioContext would cause deadlocks.
|
||||
|
||||
I am not sure whether deadlocks are still possible. Maybe they have just
|
||||
moved to the fine-grained locks that have replaced the AioContext. Since
|
||||
I am not sure if the deadlocks are gone, I have kept the substance
|
||||
unchanged and just removed mention of the AioContext.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20231205182011.1976568-15-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block.c | 73 ++++++----------------------
|
||||
block/block-backend.c | 8 ---
|
||||
block/export/vhost-user-blk-server.c | 4 --
|
||||
include/block/block-common.h | 3 --
|
||||
include/block/block-io.h | 9 ++--
|
||||
include/block/block_int-common.h | 2 -
|
||||
tests/qemu-iotests/202 | 2 +-
|
||||
tests/qemu-iotests/203 | 3 +-
|
||||
8 files changed, 22 insertions(+), 82 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 434b7f4d72..a097772238 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -1616,11 +1616,6 @@ out:
|
||||
g_free(gen_node_name);
|
||||
}
|
||||
|
||||
-/*
|
||||
- * The caller must always hold @bs AioContext lock, because this function calls
|
||||
- * bdrv_refresh_total_sectors() which polls when called from non-coroutine
|
||||
- * context.
|
||||
- */
|
||||
static int no_coroutine_fn GRAPH_UNLOCKED
|
||||
bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name,
|
||||
QDict *options, int open_flags, Error **errp)
|
||||
@@ -2901,7 +2896,7 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
|
||||
* Replaces the node that a BdrvChild points to without updating permissions.
|
||||
*
|
||||
* If @new_bs is non-NULL, the parent of @child must already be drained through
|
||||
- * @child and the caller must hold the AioContext lock for @new_bs.
|
||||
+ * @child.
|
||||
*/
|
||||
static void GRAPH_WRLOCK
|
||||
bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs)
|
||||
@@ -3041,9 +3036,8 @@ static TransactionActionDrv bdrv_attach_child_common_drv = {
|
||||
*
|
||||
* Returns new created child.
|
||||
*
|
||||
- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
|
||||
- * @child_bs can move to a different AioContext in this function. Callers must
|
||||
- * make sure that their AioContext locking is still correct after this.
|
||||
+ * Both @parent_bs and @child_bs can move to a different AioContext in this
|
||||
+ * function.
|
||||
*/
|
||||
static BdrvChild * GRAPH_WRLOCK
|
||||
bdrv_attach_child_common(BlockDriverState *child_bs,
|
||||
@@ -3142,9 +3136,8 @@ bdrv_attach_child_common(BlockDriverState *child_bs,
|
||||
/*
|
||||
* Function doesn't update permissions, caller is responsible for this.
|
||||
*
|
||||
- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
|
||||
- * @child_bs can move to a different AioContext in this function. Callers must
|
||||
- * make sure that their AioContext locking is still correct after this.
|
||||
+ * Both @parent_bs and @child_bs can move to a different AioContext in this
|
||||
+ * function.
|
||||
*
|
||||
* After calling this function, the transaction @tran may only be completed
|
||||
* while holding a writer lock for the graph.
|
||||
@@ -3184,9 +3177,6 @@ bdrv_attach_child_noperm(BlockDriverState *parent_bs,
|
||||
*
|
||||
* On failure NULL is returned, errp is set and the reference to
|
||||
* child_bs is also dropped.
|
||||
- *
|
||||
- * The caller must hold the AioContext lock @child_bs, but not that of @ctx
|
||||
- * (unless @child_bs is already in @ctx).
|
||||
*/
|
||||
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
|
||||
const char *child_name,
|
||||
@@ -3226,9 +3216,6 @@ out:
|
||||
*
|
||||
* On failure NULL is returned, errp is set and the reference to
|
||||
* child_bs is also dropped.
|
||||
- *
|
||||
- * If @parent_bs and @child_bs are in different AioContexts, the caller must
|
||||
- * hold the AioContext lock for @child_bs, but not for @parent_bs.
|
||||
*/
|
||||
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
|
||||
BlockDriverState *child_bs,
|
||||
@@ -3418,9 +3405,8 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
|
||||
*
|
||||
* Function doesn't update permissions, caller is responsible for this.
|
||||
*
|
||||
- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
|
||||
- * @child_bs can move to a different AioContext in this function. Callers must
|
||||
- * make sure that their AioContext locking is still correct after this.
|
||||
+ * Both @parent_bs and @child_bs can move to a different AioContext in this
|
||||
+ * function.
|
||||
*
|
||||
* After calling this function, the transaction @tran may only be completed
|
||||
* while holding a writer lock for the graph.
|
||||
@@ -3513,9 +3499,8 @@ out:
|
||||
}
|
||||
|
||||
/*
|
||||
- * The caller must hold the AioContext lock for @backing_hd. Both @bs and
|
||||
- * @backing_hd can move to a different AioContext in this function. Callers must
|
||||
- * make sure that their AioContext locking is still correct after this.
|
||||
+ * Both @bs and @backing_hd can move to a different AioContext in this
|
||||
+ * function.
|
||||
*
|
||||
* If a backing child is already present (i.e. we're detaching a node), that
|
||||
* child node must be drained.
|
||||
@@ -3574,8 +3559,6 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
|
||||
* itself, all options starting with "${bdref_key}." are considered part of the
|
||||
* BlockdevRef.
|
||||
*
|
||||
- * The caller must hold the main AioContext lock.
|
||||
- *
|
||||
* TODO Can this be unified with bdrv_open_image()?
|
||||
*/
|
||||
int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
|
||||
@@ -3745,9 +3728,7 @@ done:
|
||||
*
|
||||
* The BlockdevRef will be removed from the options QDict.
|
||||
*
|
||||
- * The caller must hold the lock of the main AioContext and no other AioContext.
|
||||
- * @parent can move to a different AioContext in this function. Callers must
|
||||
- * make sure that their AioContext locking is still correct after this.
|
||||
+ * @parent can move to a different AioContext in this function.
|
||||
*/
|
||||
BdrvChild *bdrv_open_child(const char *filename,
|
||||
QDict *options, const char *bdref_key,
|
||||
@@ -3778,9 +3759,7 @@ BdrvChild *bdrv_open_child(const char *filename,
|
||||
/*
|
||||
* Wrapper on bdrv_open_child() for most popular case: open primary child of bs.
|
||||
*
|
||||
- * The caller must hold the lock of the main AioContext and no other AioContext.
|
||||
- * @parent can move to a different AioContext in this function. Callers must
|
||||
- * make sure that their AioContext locking is still correct after this.
|
||||
+ * @parent can move to a different AioContext in this function.
|
||||
*/
|
||||
int bdrv_open_file_child(const char *filename,
|
||||
QDict *options, const char *bdref_key,
|
||||
@@ -3923,8 +3902,6 @@ out:
|
||||
* The reference parameter may be used to specify an existing block device which
|
||||
* should be opened. If specified, neither options nor a filename may be given,
|
||||
* nor can an existing BDS be reused (that is, *pbs has to be NULL).
|
||||
- *
|
||||
- * The caller must always hold the main AioContext lock.
|
||||
*/
|
||||
static BlockDriverState * no_coroutine_fn
|
||||
bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
|
||||
@@ -4217,7 +4194,6 @@ close_and_fail:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
-/* The caller must always hold the main AioContext lock. */
|
||||
BlockDriverState *bdrv_open(const char *filename, const char *reference,
|
||||
QDict *options, int flags, Error **errp)
|
||||
{
|
||||
@@ -4665,10 +4641,7 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
|
||||
*
|
||||
* Return 0 on success, otherwise return < 0 and set @errp.
|
||||
*
|
||||
- * The caller must hold the AioContext lock of @reopen_state->bs.
|
||||
* @reopen_state->bs can move to a different AioContext in this function.
|
||||
- * Callers must make sure that their AioContext locking is still correct after
|
||||
- * this.
|
||||
*/
|
||||
static int GRAPH_UNLOCKED
|
||||
bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
|
||||
@@ -4801,8 +4774,6 @@ out_rdlock:
|
||||
* It is the responsibility of the caller to then call the abort() or
|
||||
* commit() for any other BDS that have been left in a prepare() state
|
||||
*
|
||||
- * The caller must hold the AioContext lock of @reopen_state->bs.
|
||||
- *
|
||||
* After calling this function, the transaction @change_child_tran may only be
|
||||
* completed while holding a writer lock for the graph.
|
||||
*/
|
||||
@@ -5437,8 +5408,6 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
|
||||
* child.
|
||||
*
|
||||
* This function does not create any image files.
|
||||
- *
|
||||
- * The caller must hold the AioContext lock for @bs_top.
|
||||
*/
|
||||
int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
|
||||
Error **errp)
|
||||
@@ -5545,9 +5514,8 @@ static void bdrv_delete(BlockDriverState *bs)
|
||||
* after the call (even on failure), so if the caller intends to reuse the
|
||||
* dictionary, it needs to use qobject_ref() before calling bdrv_open.
|
||||
*
|
||||
- * The caller holds the AioContext lock for @bs. It must make sure that @bs
|
||||
- * stays in the same AioContext, i.e. @options must not refer to nodes in a
|
||||
- * different AioContext.
|
||||
+ * The caller must make sure that @bs stays in the same AioContext, i.e.
|
||||
+ * @options must not refer to nodes in a different AioContext.
|
||||
*/
|
||||
BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
|
||||
int flags, Error **errp)
|
||||
@@ -7565,10 +7533,6 @@ static TransactionActionDrv set_aio_context = {
|
||||
*
|
||||
* Must be called from the main AioContext.
|
||||
*
|
||||
- * The caller must own the AioContext lock for the old AioContext of bs, but it
|
||||
- * must not own the AioContext lock for new_context (unless new_context is the
|
||||
- * same as the current context of bs).
|
||||
- *
|
||||
* @visited will accumulate all visited BdrvChild objects. The caller is
|
||||
* responsible for freeing the list afterwards.
|
||||
*/
|
||||
@@ -7621,13 +7585,6 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
|
||||
*
|
||||
* If ignore_child is not NULL, that child (and its subgraph) will not
|
||||
* be touched.
|
||||
- *
|
||||
- * This function still requires the caller to take the bs current
|
||||
- * AioContext lock, otherwise draining will fail since AIO_WAIT_WHILE
|
||||
- * assumes the lock is always held if bs is in another AioContext.
|
||||
- * For the same reason, it temporarily also holds the new AioContext, since
|
||||
- * bdrv_drained_end calls BDRV_POLL_WHILE that assumes the lock is taken too.
|
||||
- * Therefore the new AioContext lock must not be taken by the caller.
|
||||
*/
|
||||
int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
|
||||
BdrvChild *ignore_child, Error **errp)
|
||||
@@ -7653,8 +7610,8 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
|
||||
|
||||
/*
|
||||
* Linear phase: go through all callbacks collected in the transaction.
|
||||
- * Run all callbacks collected in the recursion to switch all nodes
|
||||
- * AioContext lock (transaction commit), or undo all changes done in the
|
||||
+ * Run all callbacks collected in the recursion to switch every node's
|
||||
+ * AioContext (transaction commit), or undo all changes done in the
|
||||
* recursion (transaction abort).
|
||||
*/
|
||||
|
||||
diff --git a/block/block-backend.c b/block/block-backend.c
|
||||
index f412bed274..209eb07528 100644
|
||||
--- a/block/block-backend.c
|
||||
+++ b/block/block-backend.c
|
||||
@@ -390,8 +390,6 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
|
||||
* Both sets of permissions can be changed later using blk_set_perm().
|
||||
*
|
||||
* Return the new BlockBackend on success, null on failure.
|
||||
- *
|
||||
- * Callers must hold the AioContext lock of @bs.
|
||||
*/
|
||||
BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
|
||||
uint64_t shared_perm, Error **errp)
|
||||
@@ -416,8 +414,6 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
|
||||
* Just as with bdrv_open(), after having called this function the reference to
|
||||
* @options belongs to the block layer (even on failure).
|
||||
*
|
||||
- * Called without holding an AioContext lock.
|
||||
- *
|
||||
* TODO: Remove @filename and @flags; it should be possible to specify a whole
|
||||
* BDS tree just by specifying the @options QDict (or @reference,
|
||||
* alternatively). At the time of adding this function, this is not possible,
|
||||
@@ -872,8 +868,6 @@ BlockBackend *blk_by_public(BlockBackendPublic *public)
|
||||
|
||||
/*
|
||||
* Disassociates the currently associated BlockDriverState from @blk.
|
||||
- *
|
||||
- * The caller must hold the AioContext lock for the BlockBackend.
|
||||
*/
|
||||
void blk_remove_bs(BlockBackend *blk)
|
||||
{
|
||||
@@ -915,8 +909,6 @@ void blk_remove_bs(BlockBackend *blk)
|
||||
|
||||
/*
|
||||
* Associates a new BlockDriverState with @blk.
|
||||
- *
|
||||
- * Callers must hold the AioContext lock of @bs.
|
||||
*/
|
||||
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
|
||||
{
|
||||
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
|
||||
index 16f48388d3..50c358e8cd 100644
|
||||
--- a/block/export/vhost-user-blk-server.c
|
||||
+++ b/block/export/vhost-user-blk-server.c
|
||||
@@ -278,7 +278,6 @@ static void vu_blk_exp_resize(void *opaque)
|
||||
vu_config_change_msg(&vexp->vu_server.vu_dev);
|
||||
}
|
||||
|
||||
-/* Called with vexp->export.ctx acquired */
|
||||
static void vu_blk_drained_begin(void *opaque)
|
||||
{
|
||||
VuBlkExport *vexp = opaque;
|
||||
@@ -287,7 +286,6 @@ static void vu_blk_drained_begin(void *opaque)
|
||||
vhost_user_server_detach_aio_context(&vexp->vu_server);
|
||||
}
|
||||
|
||||
-/* Called with vexp->export.blk AioContext acquired */
|
||||
static void vu_blk_drained_end(void *opaque)
|
||||
{
|
||||
VuBlkExport *vexp = opaque;
|
||||
@@ -300,8 +298,6 @@ static void vu_blk_drained_end(void *opaque)
|
||||
* Ensures that bdrv_drained_begin() waits until in-flight requests complete
|
||||
* and the server->co_trip coroutine has terminated. It will be restarted in
|
||||
* vhost_user_server_attach_aio_context().
|
||||
- *
|
||||
- * Called with vexp->export.ctx acquired.
|
||||
*/
|
||||
static bool vu_blk_drained_poll(void *opaque)
|
||||
{
|
||||
diff --git a/include/block/block-common.h b/include/block/block-common.h
|
||||
index d7599564db..a846023a09 100644
|
||||
--- a/include/block/block-common.h
|
||||
+++ b/include/block/block-common.h
|
||||
@@ -70,9 +70,6 @@
|
||||
* automatically takes the graph rdlock when calling the wrapped function. In
|
||||
* the same way, no_co_wrapper_bdrv_wrlock functions automatically take the
|
||||
* graph wrlock.
|
||||
- *
|
||||
- * If the first parameter of the function is a BlockDriverState, BdrvChild or
|
||||
- * BlockBackend pointer, the AioContext lock for it is taken in the wrapper.
|
||||
*/
|
||||
#define no_co_wrapper
|
||||
#define no_co_wrapper_bdrv_rdlock
|
||||
diff --git a/include/block/block-io.h b/include/block/block-io.h
|
||||
index 8eb39a858b..b49e0537dd 100644
|
||||
--- a/include/block/block-io.h
|
||||
+++ b/include/block/block-io.h
|
||||
@@ -332,11 +332,10 @@ bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
|
||||
* "I/O or GS" API functions. These functions can run without
|
||||
* the BQL, but only in one specific iothread/main loop.
|
||||
*
|
||||
- * More specifically, these functions use BDRV_POLL_WHILE(bs), which
|
||||
- * requires the caller to be either in the main thread and hold
|
||||
- * the BlockdriverState (bs) AioContext lock, or directly in the
|
||||
- * home thread that runs the bs AioContext. Calling them from
|
||||
- * another thread in another AioContext would cause deadlocks.
|
||||
+ * More specifically, these functions use BDRV_POLL_WHILE(bs), which requires
|
||||
+ * the caller to be either in the main thread or directly in the home thread
|
||||
+ * that runs the bs AioContext. Calling them from another thread in another
|
||||
+ * AioContext would cause deadlocks.
|
||||
*
|
||||
* Therefore, these functions are not proper I/O, because they
|
||||
* can't run in *any* iothreads, but only in a specific one.
|
||||
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
|
||||
index 4e31d161c5..151279d481 100644
|
||||
--- a/include/block/block_int-common.h
|
||||
+++ b/include/block/block_int-common.h
|
||||
@@ -1192,8 +1192,6 @@ struct BlockDriverState {
|
||||
/* The error object in use for blocking operations on backing_hd */
|
||||
Error *backing_blocker;
|
||||
|
||||
- /* Protected by AioContext lock */
|
||||
-
|
||||
/*
|
||||
* If we are reading a disk image, give its size in sectors.
|
||||
* Generally read-only; it is written to by load_snapshot and
|
||||
diff --git a/tests/qemu-iotests/202 b/tests/qemu-iotests/202
|
||||
index b784dcd791..13304242e5 100755
|
||||
--- a/tests/qemu-iotests/202
|
||||
+++ b/tests/qemu-iotests/202
|
||||
@@ -21,7 +21,7 @@
|
||||
# Check that QMP 'transaction' blockdev-snapshot-sync with multiple drives on a
|
||||
# single IOThread completes successfully. This particular command triggered a
|
||||
# hang due to recursive AioContext locking and BDRV_POLL_WHILE(). Protect
|
||||
-# against regressions.
|
||||
+# against regressions even though the AioContext lock no longer exists.
|
||||
|
||||
import iotests
|
||||
|
||||
diff --git a/tests/qemu-iotests/203 b/tests/qemu-iotests/203
|
||||
index ab80fd0e44..1ba878522b 100755
|
||||
--- a/tests/qemu-iotests/203
|
||||
+++ b/tests/qemu-iotests/203
|
||||
@@ -21,7 +21,8 @@
|
||||
# Check that QMP 'migrate' with multiple drives on a single IOThread completes
|
||||
# successfully. This particular command triggered a hang in the source QEMU
|
||||
# process due to recursive AioContext locking in bdrv_invalidate_all() and
|
||||
-# BDRV_POLL_WHILE().
|
||||
+# BDRV_POLL_WHILE(). Protect against regressions even though the AioContext
|
||||
+# lock no longer exists.
|
||||
|
||||
import iotests
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,105 +0,0 @@
|
||||
From 95b2ffc5f01dc4309c2e747ed883d22cd1d26347 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Sat, 2 Mar 2024 17:00:23 +0100
|
||||
Subject: [PATCH 2/2] chardev/char-socket: Fix TLS io channels sending too much
|
||||
data to the backend
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Thomas Huth <thuth@redhat.com>
|
||||
RH-MergeRequest: 227: Fix TLS io channels sending too much data to the backend
|
||||
RH-Jira: RHEL-24614
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
RH-Commit: [1/1] fce871914e0ce52e16a6edae0e007513f9fec1ae (thuth/qemu-kvm-cs9)
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-24614
|
||||
|
||||
commit 462945cd22d2bcd233401ed3aa167d83a8e35b05
|
||||
Author: Thomas Huth <thuth@redhat.com>
|
||||
Date: Thu Feb 29 11:43:37 2024 +0100
|
||||
|
||||
chardev/char-socket: Fix TLS io channels sending too much data to the backend
|
||||
|
||||
Commit ffda5db65a ("io/channel-tls: fix handling of bigger read buffers")
|
||||
changed the behavior of the TLS io channels to schedule a second reading
|
||||
attempt if there is still incoming data pending. This caused a regression
|
||||
with backends like the sclpconsole that check in their read function that
|
||||
the sender does not try to write more bytes to it than the device can
|
||||
currently handle.
|
||||
|
||||
The problem can be reproduced like this:
|
||||
|
||||
1) In one terminal, do this:
|
||||
|
||||
mkdir qemu-pki
|
||||
cd qemu-pki
|
||||
openssl genrsa 2048 > ca-key.pem
|
||||
openssl req -new -x509 -nodes -days 365000 -key ca-key.pem -out ca-cert.pem
|
||||
# enter some dummy value for the cert
|
||||
openssl genrsa 2048 > server-key.pem
|
||||
openssl req -new -x509 -nodes -days 365000 -key server-key.pem \
|
||||
-out server-cert.pem
|
||||
# enter some other dummy values for the cert
|
||||
|
||||
gnutls-serv --echo --x509cafile ca-cert.pem --x509keyfile server-key.pem \
|
||||
--x509certfile server-cert.pem -p 8338
|
||||
|
||||
2) In another terminal, do this:
|
||||
|
||||
wget https://download.fedoraproject.org/pub/fedora-secondary/releases/39/Cloud/s390x/images/Fedora-Cloud-Base-39-1.5.s390x.qcow2
|
||||
|
||||
qemu-system-s390x -nographic -nodefaults \
|
||||
-hda Fedora-Cloud-Base-39-1.5.s390x.qcow2 \
|
||||
-object tls-creds-x509,id=tls0,endpoint=client,verify-peer=false,dir=$PWD/qemu-pki \
|
||||
-chardev socket,id=tls_chardev,host=localhost,port=8338,tls-creds=tls0 \
|
||||
-device sclpconsole,chardev=tls_chardev,id=tls_serial
|
||||
|
||||
QEMU then aborts after a second or two with:
|
||||
|
||||
qemu-system-s390x: ../hw/char/sclpconsole.c:73: chr_read: Assertion
|
||||
`size <= SIZE_BUFFER_VT220 - scon->iov_data_len' failed.
|
||||
Aborted (core dumped)
|
||||
|
||||
It looks like the second read does not trigger the chr_can_read() function
|
||||
to be called before the second read, which should normally always be done
|
||||
before sending bytes to a character device to see how much it can handle,
|
||||
so the s->max_size in tcp_chr_read() still contains the old value from the
|
||||
previous read. Let's make sure that we use the up-to-date value by calling
|
||||
tcp_chr_read_poll() again here.
|
||||
|
||||
Fixes: ffda5db65a ("io/channel-tls: fix handling of bigger read buffers")
|
||||
Buglink: https://issues.redhat.com/browse/RHEL-24614
|
||||
Reviewed-by: "Daniel P. Berrangé" <berrange@redhat.com>
|
||||
Message-ID: <20240229104339.42574-1-thuth@redhat.com>
|
||||
Reviewed-by: Antoine Damhet <antoine.damhet@blade-group.com>
|
||||
Tested-by: Antoine Damhet <antoine.damhet@blade-group.com>
|
||||
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
chardev/char-socket.c | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
|
||||
index 73947da188..034840593d 100644
|
||||
--- a/chardev/char-socket.c
|
||||
+++ b/chardev/char-socket.c
|
||||
@@ -492,9 +492,9 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
|
||||
s->max_size <= 0) {
|
||||
return TRUE;
|
||||
}
|
||||
- len = sizeof(buf);
|
||||
- if (len > s->max_size) {
|
||||
- len = s->max_size;
|
||||
+ len = tcp_chr_read_poll(opaque);
|
||||
+ if (len > sizeof(buf)) {
|
||||
+ len = sizeof(buf);
|
||||
}
|
||||
size = tcp_chr_recv(chr, (void *)buf, len);
|
||||
if (size == 0 || (size == -1 && errno != EAGAIN)) {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,78 +0,0 @@
|
||||
From 4d4102f6e2f9afd6182888787ae8b570347df87d Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= <berrange@redhat.com>
|
||||
Date: Mon, 18 Mar 2024 18:06:59 +0000
|
||||
Subject: [PATCH 1/3] chardev: lower priority of the HUP GSource in socket
|
||||
chardev
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Daniel P. Berrangé <berrange@redhat.com>
|
||||
RH-MergeRequest: 233: Fix handling of TLS sessions in chardevs
|
||||
RH-Jira: RHEL-24614
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
RH-Commit: [1/3] 842f54349191b0206e68f35a7a80155f5a584942 (berrange/centos-src-qemu)
|
||||
|
||||
The socket chardev often has 2 GSource object registered against the
|
||||
same FD. One is registered all the time and is just intended to handle
|
||||
POLLHUP events, while the other gets registered & unregistered on the
|
||||
fly as the frontend is ready to receive more data or not.
|
||||
|
||||
It is very common for poll() to signal a POLLHUP event at the same time
|
||||
as there is pending incoming data from the disconnected client. It is
|
||||
therefore essential to process incoming data prior to processing HUP.
|
||||
The problem with having 2 GSource on the same FD is that there is no
|
||||
guaranteed ordering of execution between them, so the chardev code may
|
||||
process HUP first and thus discard data.
|
||||
|
||||
This failure scenario is non-deterministic but can be seen fairly
|
||||
reliably by reverting a7077b8e354d90fec26c2921aa2dea85b90dff90, and
|
||||
then running 'tests/unit/test-char', which will sometimes fail with
|
||||
missing data.
|
||||
|
||||
Ideally QEMU would only have 1 GSource, but that's a complex code
|
||||
refactoring job. The next best solution is to try to ensure ordering
|
||||
between the 2 GSource objects. This can be achieved by lowering the
|
||||
priority of the HUP GSource, so that it is never dispatched if the
|
||||
main GSource is also ready to dispatch. Counter-intuitively, lowering
|
||||
the priority of a GSource is done by raising its priority number.
|
||||
|
||||
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
(cherry picked from commit 8bd8b04adc9f18904f323dff085f8b4ec77915c6)
|
||||
---
|
||||
chardev/char-socket.c | 16 ++++++++++++++++
|
||||
1 file changed, 16 insertions(+)
|
||||
|
||||
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
|
||||
index 034840593d..f48d341ebc 100644
|
||||
--- a/chardev/char-socket.c
|
||||
+++ b/chardev/char-socket.c
|
||||
@@ -597,6 +597,22 @@ static void update_ioc_handlers(SocketChardev *s)
|
||||
|
||||
remove_hup_source(s);
|
||||
s->hup_source = qio_channel_create_watch(s->ioc, G_IO_HUP);
|
||||
+ /*
|
||||
+ * poll() is liable to return POLLHUP even when there is
|
||||
+ * still incoming data available to read on the FD. If
|
||||
+ * we have the hup_source at the same priority as the
|
||||
+ * main io_add_watch_poll GSource, then we might end up
|
||||
+ * processing the POLLHUP event first, closing the FD,
|
||||
+ * and as a result silently discard data we should have
|
||||
+ * read.
|
||||
+ *
|
||||
+ * By setting the hup_source to G_PRIORITY_DEFAULT + 1,
|
||||
+ * we ensure that io_add_watch_poll GSource will always
|
||||
+ * be dispatched first, thus guaranteeing we will be
|
||||
+ * able to process all incoming data before closing the
|
||||
+ * FD
|
||||
+ */
|
||||
+ g_source_set_priority(s->hup_source, G_PRIORITY_DEFAULT + 1);
|
||||
g_source_set_callback(s->hup_source, (GSourceFunc)tcp_chr_hup,
|
||||
chr, NULL);
|
||||
g_source_attach(s->hup_source, chr->gcontext);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,90 @@
|
||||
From 0f0a3a860a07addea21a0282556a5022b9cb8b2c Mon Sep 17 00:00:00 2001
|
||||
From: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Date: Thu, 29 Feb 2024 01:00:35 -0500
|
||||
Subject: [PATCH 011/100] confidential guest support: Add kvm_init() and
|
||||
kvm_reset() in class
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [11/91] 21d2178178bf181a8e4d0b051f64bd983f0d0cf1 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Different confidential VMs in different architectures all have the same
|
||||
needs to do their specific initialization (and maybe resetting) stuffs
|
||||
with KVM. Currently each of them exposes individual *_kvm_init()
|
||||
functions and let machine code or kvm code to call it.
|
||||
|
||||
To facilitate the introduction of confidential guest technology from
|
||||
different x86 vendors, add two virtual functions, kvm_init() and kvm_reset()
|
||||
in ConfidentialGuestSupportClass, and expose two helpers functions for
|
||||
invodking them.
|
||||
|
||||
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 41a605944e3fecae43ca18ded95ec31f28e0c7fe)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
include/exec/confidential-guest-support.h | 34 ++++++++++++++++++++++-
|
||||
1 file changed, 33 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h
|
||||
index ba2dd4b5df..e5b188cffb 100644
|
||||
--- a/include/exec/confidential-guest-support.h
|
||||
+++ b/include/exec/confidential-guest-support.h
|
||||
@@ -23,7 +23,10 @@
|
||||
#include "qom/object.h"
|
||||
|
||||
#define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support"
|
||||
-OBJECT_DECLARE_SIMPLE_TYPE(ConfidentialGuestSupport, CONFIDENTIAL_GUEST_SUPPORT)
|
||||
+OBJECT_DECLARE_TYPE(ConfidentialGuestSupport,
|
||||
+ ConfidentialGuestSupportClass,
|
||||
+ CONFIDENTIAL_GUEST_SUPPORT)
|
||||
+
|
||||
|
||||
struct ConfidentialGuestSupport {
|
||||
Object parent;
|
||||
@@ -55,8 +58,37 @@ struct ConfidentialGuestSupport {
|
||||
|
||||
typedef struct ConfidentialGuestSupportClass {
|
||||
ObjectClass parent;
|
||||
+
|
||||
+ int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp);
|
||||
+ int (*kvm_reset)(ConfidentialGuestSupport *cgs, Error **errp);
|
||||
} ConfidentialGuestSupportClass;
|
||||
|
||||
+static inline int confidential_guest_kvm_init(ConfidentialGuestSupport *cgs,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ ConfidentialGuestSupportClass *klass;
|
||||
+
|
||||
+ klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs);
|
||||
+ if (klass->kvm_init) {
|
||||
+ return klass->kvm_init(cgs, errp);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static inline int confidential_guest_kvm_reset(ConfidentialGuestSupport *cgs,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ ConfidentialGuestSupportClass *klass;
|
||||
+
|
||||
+ klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs);
|
||||
+ if (klass->kvm_reset) {
|
||||
+ return klass->kvm_reset(cgs, errp);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
#endif /* !CONFIG_USER_ONLY */
|
||||
|
||||
#endif /* QEMU_CONFIDENTIAL_GUEST_SUPPORT_H */
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,412 +0,0 @@
|
||||
From e99c56752a1c4021a93c92b7be78856ebefaa1b3 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Mon, 18 Mar 2024 14:34:29 -0400
|
||||
Subject: [PATCH 1/2] coroutine: cap per-thread local pool size
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 234: coroutine: cap per-thread local pool size
|
||||
RH-Jira: RHEL-28947
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Commit: [1/2] 5971de1c1e238457925bfb9c4bfc932de857b28d (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
The coroutine pool implementation can hit the Linux vm.max_map_count
|
||||
limit, causing QEMU to abort with "failed to allocate memory for stack"
|
||||
or "failed to set up stack guard page" during coroutine creation.
|
||||
|
||||
This happens because per-thread pools can grow to tens of thousands of
|
||||
coroutines. Each coroutine causes 2 virtual memory areas to be created.
|
||||
Eventually vm.max_map_count is reached and memory-related syscalls fail.
|
||||
The per-thread pool sizes are non-uniform and depend on past coroutine
|
||||
usage in each thread, so it's possible for one thread to have a large
|
||||
pool while another thread's pool is empty.
|
||||
|
||||
Switch to a new coroutine pool implementation with a global pool that
|
||||
grows to a maximum number of coroutines and per-thread local pools that
|
||||
are capped at hardcoded small number of coroutines.
|
||||
|
||||
This approach does not leave large numbers of coroutines pooled in a
|
||||
thread that may not use them again. In order to perform well it
|
||||
amortizes the cost of global pool accesses by working in batches of
|
||||
coroutines instead of individual coroutines.
|
||||
|
||||
The global pool is a list. Threads donate batches of coroutines to when
|
||||
they have too many and take batches from when they have too few:
|
||||
|
||||
.-----------------------------------.
|
||||
| Batch 1 | Batch 2 | Batch 3 | ... | global_pool
|
||||
`-----------------------------------'
|
||||
|
||||
Each thread has up to 2 batches of coroutines:
|
||||
|
||||
.-------------------.
|
||||
| Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches)
|
||||
`-------------------'
|
||||
|
||||
The goal of this change is to reduce the excessive number of pooled
|
||||
coroutines that cause QEMU to abort when vm.max_map_count is reached
|
||||
without losing the performance of an adequately sized coroutine pool.
|
||||
|
||||
Here are virtio-blk disk I/O benchmark results:
|
||||
|
||||
RW BLKSIZE IODEPTH OLD NEW CHANGE
|
||||
randread 4k 1 113725 117451 +3.3%
|
||||
randread 4k 8 192968 198510 +2.9%
|
||||
randread 4k 16 207138 209429 +1.1%
|
||||
randread 4k 32 212399 215145 +1.3%
|
||||
randread 4k 64 218319 221277 +1.4%
|
||||
randread 128k 1 17587 17535 -0.3%
|
||||
randread 128k 8 17614 17616 +0.0%
|
||||
randread 128k 16 17608 17609 +0.0%
|
||||
randread 128k 32 17552 17553 +0.0%
|
||||
randread 128k 64 17484 17484 +0.0%
|
||||
|
||||
See files/{fio.sh,test.xml.j2} for the benchmark configuration:
|
||||
https://gitlab.com/stefanha/virt-playbooks/-/tree/coroutine-pool-fix-sizing
|
||||
|
||||
Buglink: https://issues.redhat.com/browse/RHEL-28947
|
||||
Reported-by: Sanjay Rao <srao@redhat.com>
|
||||
Reported-by: Boaz Ben Shabat <bbenshab@redhat.com>
|
||||
Reported-by: Joe Mario <jmario@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20240318183429.1039340-1-stefanha@redhat.com>
|
||||
(cherry picked from commit 86a637e48104ae74d8be53bed6441ce32be33433)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
util/qemu-coroutine.c | 282 +++++++++++++++++++++++++++++++++---------
|
||||
1 file changed, 223 insertions(+), 59 deletions(-)
|
||||
|
||||
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
|
||||
index 5fd2dbaf8b..2790959eaf 100644
|
||||
--- a/util/qemu-coroutine.c
|
||||
+++ b/util/qemu-coroutine.c
|
||||
@@ -18,39 +18,200 @@
|
||||
#include "qemu/atomic.h"
|
||||
#include "qemu/coroutine_int.h"
|
||||
#include "qemu/coroutine-tls.h"
|
||||
+#include "qemu/cutils.h"
|
||||
#include "block/aio.h"
|
||||
|
||||
-/**
|
||||
- * The minimal batch size is always 64, coroutines from the release_pool are
|
||||
- * reused as soon as there are 64 coroutines in it. The maximum pool size starts
|
||||
- * with 64 and is increased on demand so that coroutines are not deleted even if
|
||||
- * they are not immediately reused.
|
||||
- */
|
||||
enum {
|
||||
- POOL_MIN_BATCH_SIZE = 64,
|
||||
- POOL_INITIAL_MAX_SIZE = 64,
|
||||
+ COROUTINE_POOL_BATCH_MAX_SIZE = 128,
|
||||
};
|
||||
|
||||
-/** Free list to speed up creation */
|
||||
-static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
|
||||
-static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE;
|
||||
-static unsigned int release_pool_size;
|
||||
+/*
|
||||
+ * Coroutine creation and deletion is expensive so a pool of unused coroutines
|
||||
+ * is kept as a cache. When the pool has coroutines available, they are
|
||||
+ * recycled instead of creating new ones from scratch. Coroutines are added to
|
||||
+ * the pool upon termination.
|
||||
+ *
|
||||
+ * The pool is global but each thread maintains a small local pool to avoid
|
||||
+ * global pool contention. Threads fetch and return batches of coroutines from
|
||||
+ * the global pool to maintain their local pool. The local pool holds up to two
|
||||
+ * batches whereas the maximum size of the global pool is controlled by the
|
||||
+ * qemu_coroutine_inc_pool_size() API.
|
||||
+ *
|
||||
+ * .-----------------------------------.
|
||||
+ * | Batch 1 | Batch 2 | Batch 3 | ... | global_pool
|
||||
+ * `-----------------------------------'
|
||||
+ *
|
||||
+ * .-------------------.
|
||||
+ * | Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches)
|
||||
+ * `-------------------'
|
||||
+ */
|
||||
+typedef struct CoroutinePoolBatch {
|
||||
+ /* Batches are kept in a list */
|
||||
+ QSLIST_ENTRY(CoroutinePoolBatch) next;
|
||||
+
|
||||
+ /* This batch holds up to @COROUTINE_POOL_BATCH_MAX_SIZE coroutines */
|
||||
+ QSLIST_HEAD(, Coroutine) list;
|
||||
+ unsigned int size;
|
||||
+} CoroutinePoolBatch;
|
||||
+
|
||||
+typedef QSLIST_HEAD(, CoroutinePoolBatch) CoroutinePool;
|
||||
+
|
||||
+/* Host operating system limit on number of pooled coroutines */
|
||||
+static unsigned int global_pool_hard_max_size;
|
||||
+
|
||||
+static QemuMutex global_pool_lock; /* protects the following variables */
|
||||
+static CoroutinePool global_pool = QSLIST_HEAD_INITIALIZER(global_pool);
|
||||
+static unsigned int global_pool_size;
|
||||
+static unsigned int global_pool_max_size = COROUTINE_POOL_BATCH_MAX_SIZE;
|
||||
+
|
||||
+QEMU_DEFINE_STATIC_CO_TLS(CoroutinePool, local_pool);
|
||||
+QEMU_DEFINE_STATIC_CO_TLS(Notifier, local_pool_cleanup_notifier);
|
||||
|
||||
-typedef QSLIST_HEAD(, Coroutine) CoroutineQSList;
|
||||
-QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool);
|
||||
-QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size);
|
||||
-QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier);
|
||||
+static CoroutinePoolBatch *coroutine_pool_batch_new(void)
|
||||
+{
|
||||
+ CoroutinePoolBatch *batch = g_new(CoroutinePoolBatch, 1);
|
||||
+
|
||||
+ QSLIST_INIT(&batch->list);
|
||||
+ batch->size = 0;
|
||||
+ return batch;
|
||||
+}
|
||||
|
||||
-static void coroutine_pool_cleanup(Notifier *n, void *value)
|
||||
+static void coroutine_pool_batch_delete(CoroutinePoolBatch *batch)
|
||||
{
|
||||
Coroutine *co;
|
||||
Coroutine *tmp;
|
||||
- CoroutineQSList *alloc_pool = get_ptr_alloc_pool();
|
||||
|
||||
- QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) {
|
||||
- QSLIST_REMOVE_HEAD(alloc_pool, pool_next);
|
||||
+ QSLIST_FOREACH_SAFE(co, &batch->list, pool_next, tmp) {
|
||||
+ QSLIST_REMOVE_HEAD(&batch->list, pool_next);
|
||||
qemu_coroutine_delete(co);
|
||||
}
|
||||
+ g_free(batch);
|
||||
+}
|
||||
+
|
||||
+static void local_pool_cleanup(Notifier *n, void *value)
|
||||
+{
|
||||
+ CoroutinePool *local_pool = get_ptr_local_pool();
|
||||
+ CoroutinePoolBatch *batch;
|
||||
+ CoroutinePoolBatch *tmp;
|
||||
+
|
||||
+ QSLIST_FOREACH_SAFE(batch, local_pool, next, tmp) {
|
||||
+ QSLIST_REMOVE_HEAD(local_pool, next);
|
||||
+ coroutine_pool_batch_delete(batch);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Ensure the atexit notifier is registered */
|
||||
+static void local_pool_cleanup_init_once(void)
|
||||
+{
|
||||
+ Notifier *notifier = get_ptr_local_pool_cleanup_notifier();
|
||||
+ if (!notifier->notify) {
|
||||
+ notifier->notify = local_pool_cleanup;
|
||||
+ qemu_thread_atexit_add(notifier);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Helper to get the next unused coroutine from the local pool */
|
||||
+static Coroutine *coroutine_pool_get_local(void)
|
||||
+{
|
||||
+ CoroutinePool *local_pool = get_ptr_local_pool();
|
||||
+ CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool);
|
||||
+ Coroutine *co;
|
||||
+
|
||||
+ if (unlikely(!batch)) {
|
||||
+ return NULL;
|
||||
+ }
|
||||
+
|
||||
+ co = QSLIST_FIRST(&batch->list);
|
||||
+ QSLIST_REMOVE_HEAD(&batch->list, pool_next);
|
||||
+ batch->size--;
|
||||
+
|
||||
+ if (batch->size == 0) {
|
||||
+ QSLIST_REMOVE_HEAD(local_pool, next);
|
||||
+ coroutine_pool_batch_delete(batch);
|
||||
+ }
|
||||
+ return co;
|
||||
+}
|
||||
+
|
||||
+/* Get the next batch from the global pool */
|
||||
+static void coroutine_pool_refill_local(void)
|
||||
+{
|
||||
+ CoroutinePool *local_pool = get_ptr_local_pool();
|
||||
+ CoroutinePoolBatch *batch;
|
||||
+
|
||||
+ WITH_QEMU_LOCK_GUARD(&global_pool_lock) {
|
||||
+ batch = QSLIST_FIRST(&global_pool);
|
||||
+
|
||||
+ if (batch) {
|
||||
+ QSLIST_REMOVE_HEAD(&global_pool, next);
|
||||
+ global_pool_size -= batch->size;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (batch) {
|
||||
+ QSLIST_INSERT_HEAD(local_pool, batch, next);
|
||||
+ local_pool_cleanup_init_once();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Add a batch of coroutines to the global pool */
|
||||
+static void coroutine_pool_put_global(CoroutinePoolBatch *batch)
|
||||
+{
|
||||
+ WITH_QEMU_LOCK_GUARD(&global_pool_lock) {
|
||||
+ unsigned int max = MIN(global_pool_max_size,
|
||||
+ global_pool_hard_max_size);
|
||||
+
|
||||
+ if (global_pool_size < max) {
|
||||
+ QSLIST_INSERT_HEAD(&global_pool, batch, next);
|
||||
+
|
||||
+ /* Overshooting the max pool size is allowed */
|
||||
+ global_pool_size += batch->size;
|
||||
+ return;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* The global pool was full, so throw away this batch */
|
||||
+ coroutine_pool_batch_delete(batch);
|
||||
+}
|
||||
+
|
||||
+/* Get the next unused coroutine from the pool or return NULL */
|
||||
+static Coroutine *coroutine_pool_get(void)
|
||||
+{
|
||||
+ Coroutine *co;
|
||||
+
|
||||
+ co = coroutine_pool_get_local();
|
||||
+ if (!co) {
|
||||
+ coroutine_pool_refill_local();
|
||||
+ co = coroutine_pool_get_local();
|
||||
+ }
|
||||
+ return co;
|
||||
+}
|
||||
+
|
||||
+static void coroutine_pool_put(Coroutine *co)
|
||||
+{
|
||||
+ CoroutinePool *local_pool = get_ptr_local_pool();
|
||||
+ CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool);
|
||||
+
|
||||
+ if (unlikely(!batch)) {
|
||||
+ batch = coroutine_pool_batch_new();
|
||||
+ QSLIST_INSERT_HEAD(local_pool, batch, next);
|
||||
+ local_pool_cleanup_init_once();
|
||||
+ }
|
||||
+
|
||||
+ if (unlikely(batch->size >= COROUTINE_POOL_BATCH_MAX_SIZE)) {
|
||||
+ CoroutinePoolBatch *next = QSLIST_NEXT(batch, next);
|
||||
+
|
||||
+ /* Is the local pool full? */
|
||||
+ if (next) {
|
||||
+ QSLIST_REMOVE_HEAD(local_pool, next);
|
||||
+ coroutine_pool_put_global(batch);
|
||||
+ }
|
||||
+
|
||||
+ batch = coroutine_pool_batch_new();
|
||||
+ QSLIST_INSERT_HEAD(local_pool, batch, next);
|
||||
+ }
|
||||
+
|
||||
+ QSLIST_INSERT_HEAD(&batch->list, co, pool_next);
|
||||
+ batch->size++;
|
||||
}
|
||||
|
||||
Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
|
||||
@@ -58,31 +219,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
|
||||
Coroutine *co = NULL;
|
||||
|
||||
if (IS_ENABLED(CONFIG_COROUTINE_POOL)) {
|
||||
- CoroutineQSList *alloc_pool = get_ptr_alloc_pool();
|
||||
-
|
||||
- co = QSLIST_FIRST(alloc_pool);
|
||||
- if (!co) {
|
||||
- if (release_pool_size > POOL_MIN_BATCH_SIZE) {
|
||||
- /* Slow path; a good place to register the destructor, too. */
|
||||
- Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier();
|
||||
- if (!notifier->notify) {
|
||||
- notifier->notify = coroutine_pool_cleanup;
|
||||
- qemu_thread_atexit_add(notifier);
|
||||
- }
|
||||
-
|
||||
- /* This is not exact; there could be a little skew between
|
||||
- * release_pool_size and the actual size of release_pool. But
|
||||
- * it is just a heuristic, it does not need to be perfect.
|
||||
- */
|
||||
- set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0));
|
||||
- QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool);
|
||||
- co = QSLIST_FIRST(alloc_pool);
|
||||
- }
|
||||
- }
|
||||
- if (co) {
|
||||
- QSLIST_REMOVE_HEAD(alloc_pool, pool_next);
|
||||
- set_alloc_pool_size(get_alloc_pool_size() - 1);
|
||||
- }
|
||||
+ co = coroutine_pool_get();
|
||||
}
|
||||
|
||||
if (!co) {
|
||||
@@ -100,19 +237,10 @@ static void coroutine_delete(Coroutine *co)
|
||||
co->caller = NULL;
|
||||
|
||||
if (IS_ENABLED(CONFIG_COROUTINE_POOL)) {
|
||||
- if (release_pool_size < qatomic_read(&pool_max_size) * 2) {
|
||||
- QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
|
||||
- qatomic_inc(&release_pool_size);
|
||||
- return;
|
||||
- }
|
||||
- if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) {
|
||||
- QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next);
|
||||
- set_alloc_pool_size(get_alloc_pool_size() + 1);
|
||||
- return;
|
||||
- }
|
||||
+ coroutine_pool_put(co);
|
||||
+ } else {
|
||||
+ qemu_coroutine_delete(co);
|
||||
}
|
||||
-
|
||||
- qemu_coroutine_delete(co);
|
||||
}
|
||||
|
||||
void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
|
||||
@@ -223,10 +351,46 @@ AioContext *qemu_coroutine_get_aio_context(Coroutine *co)
|
||||
|
||||
void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size)
|
||||
{
|
||||
- qatomic_add(&pool_max_size, additional_pool_size);
|
||||
+ QEMU_LOCK_GUARD(&global_pool_lock);
|
||||
+ global_pool_max_size += additional_pool_size;
|
||||
}
|
||||
|
||||
void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size)
|
||||
{
|
||||
- qatomic_sub(&pool_max_size, removing_pool_size);
|
||||
+ QEMU_LOCK_GUARD(&global_pool_lock);
|
||||
+ global_pool_max_size -= removing_pool_size;
|
||||
+}
|
||||
+
|
||||
+static unsigned int get_global_pool_hard_max_size(void)
|
||||
+{
|
||||
+#ifdef __linux__
|
||||
+ g_autofree char *contents = NULL;
|
||||
+ int max_map_count;
|
||||
+
|
||||
+ /*
|
||||
+ * Linux processes can have up to max_map_count virtual memory areas
|
||||
+ * (VMAs). mmap(2), mprotect(2), etc fail with ENOMEM beyond this limit. We
|
||||
+ * must limit the coroutine pool to a safe size to avoid running out of
|
||||
+ * VMAs.
|
||||
+ */
|
||||
+ if (g_file_get_contents("/proc/sys/vm/max_map_count", &contents, NULL,
|
||||
+ NULL) &&
|
||||
+ qemu_strtoi(contents, NULL, 10, &max_map_count) == 0) {
|
||||
+ /*
|
||||
+ * This is a conservative upper bound that avoids exceeding
|
||||
+ * max_map_count. Leave half for non-coroutine users like library
|
||||
+ * dependencies, vhost-user, etc. Each coroutine takes up 2 VMAs so
|
||||
+ * halve the amount again.
|
||||
+ */
|
||||
+ return max_map_count / 4;
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
+ return UINT_MAX;
|
||||
+}
|
||||
+
|
||||
+static void __attribute__((constructor)) qemu_coroutine_init(void)
|
||||
+{
|
||||
+ qemu_mutex_init(&global_pool_lock);
|
||||
+ global_pool_hard_max_size = get_global_pool_hard_max_size();
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,61 +0,0 @@
|
||||
From 0aa65dc3acba481f7064df936ab49e3bceb1d5bd Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Wed, 20 Mar 2024 14:12:32 -0400
|
||||
Subject: [PATCH 2/2] coroutine: reserve 5,000 mappings
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 234: coroutine: cap per-thread local pool size
|
||||
RH-Jira: RHEL-28947
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Commit: [2/2] 78560c2b947471111cc16c313d6f38db42860a1c (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
Daniel P. Berrangé <berrange@redhat.com> pointed out that the coroutine
|
||||
pool size heuristic is very conservative. Instead of halving
|
||||
max_map_count, he suggested reserving 5,000 mappings for non-coroutine
|
||||
users based on observations of guests he has access to.
|
||||
|
||||
Fixes: 86a637e48104 ("coroutine: cap per-thread local pool size")
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Message-id: 20240320181232.1464819-1-stefanha@redhat.com
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 9352f80cd926fe2dde7c89b93ee33bb0356ff40e)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
util/qemu-coroutine.c | 15 ++++++++++-----
|
||||
1 file changed, 10 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
|
||||
index 2790959eaf..eb4eebefdf 100644
|
||||
--- a/util/qemu-coroutine.c
|
||||
+++ b/util/qemu-coroutine.c
|
||||
@@ -377,12 +377,17 @@ static unsigned int get_global_pool_hard_max_size(void)
|
||||
NULL) &&
|
||||
qemu_strtoi(contents, NULL, 10, &max_map_count) == 0) {
|
||||
/*
|
||||
- * This is a conservative upper bound that avoids exceeding
|
||||
- * max_map_count. Leave half for non-coroutine users like library
|
||||
- * dependencies, vhost-user, etc. Each coroutine takes up 2 VMAs so
|
||||
- * halve the amount again.
|
||||
+ * This is an upper bound that avoids exceeding max_map_count. Leave a
|
||||
+ * fixed amount for non-coroutine users like library dependencies,
|
||||
+ * vhost-user, etc. Each coroutine takes up 2 VMAs so halve the
|
||||
+ * remaining amount.
|
||||
*/
|
||||
- return max_map_count / 4;
|
||||
+ if (max_map_count > 5000) {
|
||||
+ return (max_map_count - 5000) / 2;
|
||||
+ } else {
|
||||
+ /* Disable the global pool but threads still have local pools */
|
||||
+ return 0;
|
||||
+ }
|
||||
}
|
||||
#endif
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,228 @@
|
||||
From 117486e0820f135f191e19f8ebb8838a98b121c6 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Mon, 27 May 2024 11:58:51 -0400
|
||||
Subject: [PATCH 5/5] crypto/block: drop qcrypto_block_open() n_threads
|
||||
argument
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 251: block/crypto: create ciphers on demand
|
||||
RH-Jira: RHEL-36159
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/2] 68290935b174b1f2b76aa857a926da9011e54abe (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
The n_threads argument is no longer used since the previous commit.
|
||||
Remove it.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20240527155851.892885-3-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Acked-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 3ab0f063e58ed9224237d69c4211ca83335164c4)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
block/crypto.c | 1 -
|
||||
block/qcow.c | 2 +-
|
||||
block/qcow2.c | 5 ++---
|
||||
crypto/block-luks.c | 1 -
|
||||
crypto/block-qcow.c | 6 ++----
|
||||
crypto/block.c | 3 +--
|
||||
crypto/blockpriv.h | 1 -
|
||||
include/crypto/block.h | 2 --
|
||||
tests/unit/test-crypto-block.c | 4 ----
|
||||
9 files changed, 6 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/block/crypto.c b/block/crypto.c
|
||||
index 21eed909c1..4eed3ffa6a 100644
|
||||
--- a/block/crypto.c
|
||||
+++ b/block/crypto.c
|
||||
@@ -363,7 +363,6 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
|
||||
block_crypto_read_func,
|
||||
bs,
|
||||
cflags,
|
||||
- 1,
|
||||
errp);
|
||||
|
||||
if (!crypto->block) {
|
||||
diff --git a/block/qcow.c b/block/qcow.c
|
||||
index ca8e1d5ec8..c2f89db055 100644
|
||||
--- a/block/qcow.c
|
||||
+++ b/block/qcow.c
|
||||
@@ -211,7 +211,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
|
||||
}
|
||||
s->crypto = qcrypto_block_open(crypto_opts, "encrypt.",
|
||||
- NULL, NULL, cflags, 1, errp);
|
||||
+ NULL, NULL, cflags, errp);
|
||||
if (!s->crypto) {
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
diff --git a/block/qcow2.c b/block/qcow2.c
|
||||
index 0e8b2f7518..0ebd455dc8 100644
|
||||
--- a/block/qcow2.c
|
||||
+++ b/block/qcow2.c
|
||||
@@ -321,7 +321,7 @@ qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
|
||||
}
|
||||
s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
|
||||
qcow2_crypto_hdr_read_func,
|
||||
- bs, cflags, QCOW2_MAX_THREADS, errp);
|
||||
+ bs, cflags, errp);
|
||||
if (!s->crypto) {
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -1707,8 +1707,7 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
|
||||
}
|
||||
s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
|
||||
- NULL, NULL, cflags,
|
||||
- QCOW2_MAX_THREADS, errp);
|
||||
+ NULL, NULL, cflags, errp);
|
||||
if (!s->crypto) {
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
diff --git a/crypto/block-luks.c b/crypto/block-luks.c
|
||||
index 3357852c0a..5b777c15d3 100644
|
||||
--- a/crypto/block-luks.c
|
||||
+++ b/crypto/block-luks.c
|
||||
@@ -1189,7 +1189,6 @@ qcrypto_block_luks_open(QCryptoBlock *block,
|
||||
QCryptoBlockReadFunc readfunc,
|
||||
void *opaque,
|
||||
unsigned int flags,
|
||||
- size_t n_threads,
|
||||
Error **errp)
|
||||
{
|
||||
QCryptoBlockLUKS *luks = NULL;
|
||||
diff --git a/crypto/block-qcow.c b/crypto/block-qcow.c
|
||||
index 02305058e3..42e9556e42 100644
|
||||
--- a/crypto/block-qcow.c
|
||||
+++ b/crypto/block-qcow.c
|
||||
@@ -44,7 +44,6 @@ qcrypto_block_qcow_has_format(const uint8_t *buf G_GNUC_UNUSED,
|
||||
static int
|
||||
qcrypto_block_qcow_init(QCryptoBlock *block,
|
||||
const char *keysecret,
|
||||
- size_t n_threads,
|
||||
Error **errp)
|
||||
{
|
||||
char *password;
|
||||
@@ -100,7 +99,6 @@ qcrypto_block_qcow_open(QCryptoBlock *block,
|
||||
QCryptoBlockReadFunc readfunc G_GNUC_UNUSED,
|
||||
void *opaque G_GNUC_UNUSED,
|
||||
unsigned int flags,
|
||||
- size_t n_threads,
|
||||
Error **errp)
|
||||
{
|
||||
if (flags & QCRYPTO_BLOCK_OPEN_NO_IO) {
|
||||
@@ -115,7 +113,7 @@ qcrypto_block_qcow_open(QCryptoBlock *block,
|
||||
return -1;
|
||||
}
|
||||
return qcrypto_block_qcow_init(block, options->u.qcow.key_secret,
|
||||
- n_threads, errp);
|
||||
+ errp);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -135,7 +133,7 @@ qcrypto_block_qcow_create(QCryptoBlock *block,
|
||||
return -1;
|
||||
}
|
||||
/* QCow2 has no special header, since everything is hardwired */
|
||||
- return qcrypto_block_qcow_init(block, options->u.qcow.key_secret, 1, errp);
|
||||
+ return qcrypto_block_qcow_init(block, options->u.qcow.key_secret, errp);
|
||||
}
|
||||
|
||||
|
||||
diff --git a/crypto/block.c b/crypto/block.c
|
||||
index ba6d1cebc7..3bcc4270c3 100644
|
||||
--- a/crypto/block.c
|
||||
+++ b/crypto/block.c
|
||||
@@ -53,7 +53,6 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,
|
||||
QCryptoBlockReadFunc readfunc,
|
||||
void *opaque,
|
||||
unsigned int flags,
|
||||
- size_t n_threads,
|
||||
Error **errp)
|
||||
{
|
||||
QCryptoBlock *block = g_new0(QCryptoBlock, 1);
|
||||
@@ -73,7 +72,7 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,
|
||||
block->driver = qcrypto_block_drivers[options->format];
|
||||
|
||||
if (block->driver->open(block, options, optprefix,
|
||||
- readfunc, opaque, flags, n_threads, errp) < 0)
|
||||
+ readfunc, opaque, flags, errp) < 0)
|
||||
{
|
||||
g_free(block);
|
||||
return NULL;
|
||||
diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h
|
||||
index 4bf6043d5d..b8f77cb5eb 100644
|
||||
--- a/crypto/blockpriv.h
|
||||
+++ b/crypto/blockpriv.h
|
||||
@@ -59,7 +59,6 @@ struct QCryptoBlockDriver {
|
||||
QCryptoBlockReadFunc readfunc,
|
||||
void *opaque,
|
||||
unsigned int flags,
|
||||
- size_t n_threads,
|
||||
Error **errp);
|
||||
|
||||
int (*create)(QCryptoBlock *block,
|
||||
diff --git a/include/crypto/block.h b/include/crypto/block.h
|
||||
index 92e823c9f2..5b5d039800 100644
|
||||
--- a/include/crypto/block.h
|
||||
+++ b/include/crypto/block.h
|
||||
@@ -76,7 +76,6 @@ typedef enum {
|
||||
* @readfunc: callback for reading data from the volume
|
||||
* @opaque: data to pass to @readfunc
|
||||
* @flags: bitmask of QCryptoBlockOpenFlags values
|
||||
- * @n_threads: allow concurrent I/O from up to @n_threads threads
|
||||
* @errp: pointer to a NULL-initialized error object
|
||||
*
|
||||
* Create a new block encryption object for an existing
|
||||
@@ -113,7 +112,6 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,
|
||||
QCryptoBlockReadFunc readfunc,
|
||||
void *opaque,
|
||||
unsigned int flags,
|
||||
- size_t n_threads,
|
||||
Error **errp);
|
||||
|
||||
typedef enum {
|
||||
diff --git a/tests/unit/test-crypto-block.c b/tests/unit/test-crypto-block.c
|
||||
index 6cfc817a92..42cfab6067 100644
|
||||
--- a/tests/unit/test-crypto-block.c
|
||||
+++ b/tests/unit/test-crypto-block.c
|
||||
@@ -303,7 +303,6 @@ static void test_block(gconstpointer opaque)
|
||||
test_block_read_func,
|
||||
&header,
|
||||
0,
|
||||
- 1,
|
||||
NULL);
|
||||
g_assert(blk == NULL);
|
||||
|
||||
@@ -312,7 +311,6 @@ static void test_block(gconstpointer opaque)
|
||||
test_block_read_func,
|
||||
&header,
|
||||
QCRYPTO_BLOCK_OPEN_NO_IO,
|
||||
- 1,
|
||||
&error_abort);
|
||||
|
||||
g_assert(qcrypto_block_get_cipher(blk) == NULL);
|
||||
@@ -327,7 +325,6 @@ static void test_block(gconstpointer opaque)
|
||||
test_block_read_func,
|
||||
&header,
|
||||
0,
|
||||
- 1,
|
||||
&error_abort);
|
||||
g_assert(blk);
|
||||
|
||||
@@ -384,7 +381,6 @@ test_luks_bad_header(gconstpointer data)
|
||||
test_block_read_func,
|
||||
&buf,
|
||||
0,
|
||||
- 1,
|
||||
&err);
|
||||
g_assert(!blk);
|
||||
g_assert(err);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,75 +0,0 @@
|
||||
From ac9dc8ea241ef6d3a0447d696620d4d4053b71bf Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Mon, 4 Dec 2023 11:42:59 -0500
|
||||
Subject: [PATCH 080/101] dma-helpers: don't lock AioContext in dma_blk_cb()
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [11/26] a8580463ba6aee4ca248c0b947b9e72bd9e87aab (kmwolf/centos-qemu-kvm)
|
||||
|
||||
Commit abfcd2760b3e ("dma-helpers: prevent dma_blk_cb() vs
|
||||
dma_aio_cancel() race") acquired the AioContext lock inside dma_blk_cb()
|
||||
to avoid a race with scsi_device_purge_requests() running in the main
|
||||
loop thread.
|
||||
|
||||
The SCSI code no longer calls dma_aio_cancel() from the main loop thread
|
||||
while I/O is running in the IOThread AioContext. Therefore it is no
|
||||
longer necessary to take this lock to protect DMAAIOCB fields. The
|
||||
->cb() function also does not require the lock because blk_aio_*() and
|
||||
friends do not need the AioContext lock.
|
||||
|
||||
Both hw/ide/core.c and hw/ide/macio.c also call dma_blk_io() but don't
|
||||
rely on it taking the AioContext lock, so this change is safe.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-ID: <20231204164259.1515217-5-stefanha@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
system/dma-helpers.c | 7 ++-----
|
||||
1 file changed, 2 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/system/dma-helpers.c b/system/dma-helpers.c
|
||||
index 36211acc7e..528117f256 100644
|
||||
--- a/system/dma-helpers.c
|
||||
+++ b/system/dma-helpers.c
|
||||
@@ -119,13 +119,12 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||
|
||||
trace_dma_blk_cb(dbs, ret);
|
||||
|
||||
- aio_context_acquire(ctx);
|
||||
dbs->acb = NULL;
|
||||
dbs->offset += dbs->iov.size;
|
||||
|
||||
if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
|
||||
dma_complete(dbs, ret);
|
||||
- goto out;
|
||||
+ return;
|
||||
}
|
||||
dma_blk_unmap(dbs);
|
||||
|
||||
@@ -168,7 +167,7 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||
trace_dma_map_wait(dbs);
|
||||
dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
|
||||
cpu_register_map_client(dbs->bh);
|
||||
- goto out;
|
||||
+ return;
|
||||
}
|
||||
|
||||
if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
|
||||
@@ -179,8 +178,6 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
|
||||
dma_blk_cb, dbs, dbs->io_func_opaque);
|
||||
assert(dbs->acb);
|
||||
-out:
|
||||
- aio_context_release(ctx);
|
||||
}
|
||||
|
||||
static void dma_aio_cancel(BlockAIOCB *acb)
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,228 +0,0 @@
|
||||
From 71aa0219f7c84cbf175eb2a091d48d5fd5daa40b Mon Sep 17 00:00:00 2001
|
||||
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Date: Tue, 21 Nov 2023 16:44:26 +0800
|
||||
Subject: [PATCH 047/101] docs/devel: Add VFIO iommufd backend documentation
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [46/67] 6cf49d00e87788f894d690a985bb6798eae24505 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Suggested-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit 98dad2b01931f6064c6c4b48ca3c2a1d9f542cd8)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
MAINTAINERS | 1 +
|
||||
docs/devel/index-internals.rst | 1 +
|
||||
docs/devel/vfio-iommufd.rst | 166 +++++++++++++++++++++++++++++++++
|
||||
3 files changed, 168 insertions(+)
|
||||
create mode 100644 docs/devel/vfio-iommufd.rst
|
||||
|
||||
diff --git a/MAINTAINERS b/MAINTAINERS
|
||||
index ca70bb4e64..0ddb20a35f 100644
|
||||
--- a/MAINTAINERS
|
||||
+++ b/MAINTAINERS
|
||||
@@ -2176,6 +2176,7 @@ F: backends/iommufd.c
|
||||
F: include/sysemu/iommufd.h
|
||||
F: include/qemu/chardev_open.h
|
||||
F: util/chardev_open.c
|
||||
+F: docs/devel/vfio-iommufd.rst
|
||||
|
||||
vhost
|
||||
M: Michael S. Tsirkin <mst@redhat.com>
|
||||
diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst
|
||||
index 6f81df92bc..3def4a138b 100644
|
||||
--- a/docs/devel/index-internals.rst
|
||||
+++ b/docs/devel/index-internals.rst
|
||||
@@ -18,5 +18,6 @@ Details about QEMU's various subsystems including how to add features to them.
|
||||
s390-dasd-ipl
|
||||
tracing
|
||||
vfio-migration
|
||||
+ vfio-iommufd
|
||||
writing-monitor-commands
|
||||
virtio-backends
|
||||
diff --git a/docs/devel/vfio-iommufd.rst b/docs/devel/vfio-iommufd.rst
|
||||
new file mode 100644
|
||||
index 0000000000..3d1c11f175
|
||||
--- /dev/null
|
||||
+++ b/docs/devel/vfio-iommufd.rst
|
||||
@@ -0,0 +1,166 @@
|
||||
+===============================
|
||||
+IOMMUFD BACKEND usage with VFIO
|
||||
+===============================
|
||||
+
|
||||
+(Same meaning for backend/container/BE)
|
||||
+
|
||||
+With the introduction of iommufd, the Linux kernel provides a generic
|
||||
+interface for user space drivers to propagate their DMA mappings to kernel
|
||||
+for assigned devices. While the legacy kernel interface is group-centric,
|
||||
+the new iommufd interface is device-centric, relying on device fd and iommufd.
|
||||
+
|
||||
+To support both interfaces in the QEMU VFIO device, introduce a base container
|
||||
+to abstract the common part of VFIO legacy and iommufd container. So that the
|
||||
+generic VFIO code can use either container.
|
||||
+
|
||||
+The base container implements generic functions such as memory_listener and
|
||||
+address space management whereas the derived container implements callbacks
|
||||
+specific to either legacy or iommufd. Each container has its own way to setup
|
||||
+secure context and dma management interface. The below diagram shows how it
|
||||
+looks like with both containers.
|
||||
+
|
||||
+::
|
||||
+
|
||||
+ VFIO AddressSpace/Memory
|
||||
+ +-------+ +----------+ +-----+ +-----+
|
||||
+ | pci | | platform | | ap | | ccw |
|
||||
+ +---+---+ +----+-----+ +--+--+ +--+--+ +----------------------+
|
||||
+ | | | | | AddressSpace |
|
||||
+ | | | | +------------+---------+
|
||||
+ +---V-----------V-----------V--------V----+ /
|
||||
+ | VFIOAddressSpace | <------------+
|
||||
+ | | | MemoryListener
|
||||
+ | VFIOContainerBase list |
|
||||
+ +-------+----------------------------+----+
|
||||
+ | |
|
||||
+ | |
|
||||
+ +-------V------+ +--------V----------+
|
||||
+ | iommufd | | vfio legacy |
|
||||
+ | container | | container |
|
||||
+ +-------+------+ +--------+----------+
|
||||
+ | |
|
||||
+ | /dev/iommu | /dev/vfio/vfio
|
||||
+ | /dev/vfio/devices/vfioX | /dev/vfio/$group_id
|
||||
+ Userspace | |
|
||||
+ ============+============================+===========================
|
||||
+ Kernel | device fd |
|
||||
+ +---------------+ | group/container fd
|
||||
+ | (BIND_IOMMUFD | | (SET_CONTAINER/SET_IOMMU)
|
||||
+ | ATTACH_IOAS) | | device fd
|
||||
+ | | |
|
||||
+ | +-------V------------V-----------------+
|
||||
+ iommufd | | vfio |
|
||||
+ (map/unmap | +---------+--------------------+-------+
|
||||
+ ioas_copy) | | | map/unmap
|
||||
+ | | |
|
||||
+ +------V------+ +-----V------+ +------V--------+
|
||||
+ | iommfd core | | device | | vfio iommu |
|
||||
+ +-------------+ +------------+ +---------------+
|
||||
+
|
||||
+* Secure Context setup
|
||||
+
|
||||
+ - iommufd BE: uses device fd and iommufd to setup secure context
|
||||
+ (bind_iommufd, attach_ioas)
|
||||
+ - vfio legacy BE: uses group fd and container fd to setup secure context
|
||||
+ (set_container, set_iommu)
|
||||
+
|
||||
+* Device access
|
||||
+
|
||||
+ - iommufd BE: device fd is opened through ``/dev/vfio/devices/vfioX``
|
||||
+ - vfio legacy BE: device fd is retrieved from group fd ioctl
|
||||
+
|
||||
+* DMA Mapping flow
|
||||
+
|
||||
+ 1. VFIOAddressSpace receives MemoryRegion add/del via MemoryListener
|
||||
+ 2. VFIO populates DMA map/unmap via the container BEs
|
||||
+ * iommufd BE: uses iommufd
|
||||
+ * vfio legacy BE: uses container fd
|
||||
+
|
||||
+Example configuration
|
||||
+=====================
|
||||
+
|
||||
+Step 1: configure the host device
|
||||
+---------------------------------
|
||||
+
|
||||
+It's exactly same as the VFIO device with legacy VFIO container.
|
||||
+
|
||||
+Step 2: configure QEMU
|
||||
+----------------------
|
||||
+
|
||||
+Interactions with the ``/dev/iommu`` are abstracted by a new iommufd
|
||||
+object (compiled in with the ``CONFIG_IOMMUFD`` option).
|
||||
+
|
||||
+Any QEMU device (e.g. VFIO device) wishing to use ``/dev/iommu`` must
|
||||
+be linked with an iommufd object. It gets a new optional property
|
||||
+named iommufd which allows to pass an iommufd object. Take ``vfio-pci``
|
||||
+device for example:
|
||||
+
|
||||
+.. code-block:: bash
|
||||
+
|
||||
+ -object iommufd,id=iommufd0
|
||||
+ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0
|
||||
+
|
||||
+Note the ``/dev/iommu`` and VFIO cdev can be externally opened by a
|
||||
+management layer. In such a case the fd is passed, the fd supports a
|
||||
+string naming the fd or a number, for example:
|
||||
+
|
||||
+.. code-block:: bash
|
||||
+
|
||||
+ -object iommufd,id=iommufd0,fd=22
|
||||
+ -device vfio-pci,iommufd=iommufd0,fd=23
|
||||
+
|
||||
+If the ``fd`` property is not passed, the fd is opened by QEMU.
|
||||
+
|
||||
+If no ``iommufd`` object is passed to the ``vfio-pci`` device, iommufd
|
||||
+is not used and the user gets the behavior based on the legacy VFIO
|
||||
+container:
|
||||
+
|
||||
+.. code-block:: bash
|
||||
+
|
||||
+ -device vfio-pci,host=0000:02:00.0
|
||||
+
|
||||
+Supported platform
|
||||
+==================
|
||||
+
|
||||
+Supports x86, ARM and s390x currently.
|
||||
+
|
||||
+Caveats
|
||||
+=======
|
||||
+
|
||||
+Dirty page sync
|
||||
+---------------
|
||||
+
|
||||
+Dirty page sync with iommufd backend is unsupported yet, live migration is
|
||||
+disabled by default. But it can be force enabled like below, low efficient
|
||||
+though.
|
||||
+
|
||||
+.. code-block:: bash
|
||||
+
|
||||
+ -object iommufd,id=iommufd0
|
||||
+ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0,enable-migration=on
|
||||
+
|
||||
+P2P DMA
|
||||
+-------
|
||||
+
|
||||
+PCI p2p DMA is unsupported as IOMMUFD doesn't support mapping hardware PCI
|
||||
+BAR region yet. Below warning shows for assigned PCI device, it's not a bug.
|
||||
+
|
||||
+.. code-block:: none
|
||||
+
|
||||
+ qemu-system-x86_64: warning: IOMMU_IOAS_MAP failed: Bad address, PCI BAR?
|
||||
+ qemu-system-x86_64: vfio_container_dma_map(0x560cb6cb1620, 0xe000000021000, 0x3000, 0x7f32ed55c000) = -14 (Bad address)
|
||||
+
|
||||
+FD passing with mdev
|
||||
+--------------------
|
||||
+
|
||||
+``vfio-pci`` device checks sysfsdev property to decide if backend is a mdev.
|
||||
+If FD passing is used, there is no way to know that and the mdev is treated
|
||||
+like a real PCI device. There is an error as below if user wants to enable
|
||||
+RAM discarding for mdev.
|
||||
+
|
||||
+.. code-block:: none
|
||||
+
|
||||
+ qemu-system-x86_64: -device vfio-pci,iommufd=iommufd0,x-balloon-allowed=on,fd=9: vfio VFIO_FD9: x-balloon-allowed only potentially compatible with mdev devices
|
||||
+
|
||||
+``vfio-ap`` and ``vfio-ccw`` devices don't have same issue as their backend
|
||||
+devices are always mdev and RAM discarding is force enabled.
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,98 +0,0 @@
|
||||
From fc69df3a70bed5722643cc16828ca20beae3a20d Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 5 Dec 2023 13:20:08 -0500
|
||||
Subject: [PATCH 091/101] docs: remove AioContext lock from IOThread docs
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 214: Remove AioContext lock
|
||||
RH-Jira: RHEL-15965
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [22/26] ab89cda483e74ded983d26e1c6e50217405e0a55 (kmwolf/centos-qemu-kvm)
|
||||
|
||||
Encourage the use of locking primitives and stop mentioning the
|
||||
AioContext lock since it is being removed.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20231205182011.1976568-12-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
docs/devel/multiple-iothreads.txt | 47 +++++++++++--------------------
|
||||
1 file changed, 16 insertions(+), 31 deletions(-)
|
||||
|
||||
diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt
|
||||
index a3e949f6b3..4865196bde 100644
|
||||
--- a/docs/devel/multiple-iothreads.txt
|
||||
+++ b/docs/devel/multiple-iothreads.txt
|
||||
@@ -88,27 +88,18 @@ loop, depending on which AioContext instance the caller passes in.
|
||||
|
||||
How to synchronize with an IOThread
|
||||
-----------------------------------
|
||||
-AioContext is not thread-safe so some rules must be followed when using file
|
||||
-descriptors, event notifiers, timers, or BHs across threads:
|
||||
+Variables that can be accessed by multiple threads require some form of
|
||||
+synchronization such as qemu_mutex_lock(), rcu_read_lock(), etc.
|
||||
|
||||
-1. AioContext functions can always be called safely. They handle their
|
||||
-own locking internally.
|
||||
-
|
||||
-2. Other threads wishing to access the AioContext must use
|
||||
-aio_context_acquire()/aio_context_release() for mutual exclusion. Once the
|
||||
-context is acquired no other thread can access it or run event loop iterations
|
||||
-in this AioContext.
|
||||
-
|
||||
-Legacy code sometimes nests aio_context_acquire()/aio_context_release() calls.
|
||||
-Do not use nesting anymore, it is incompatible with the BDRV_POLL_WHILE() macro
|
||||
-used in the block layer and can lead to hangs.
|
||||
-
|
||||
-There is currently no lock ordering rule if a thread needs to acquire multiple
|
||||
-AioContexts simultaneously. Therefore, it is only safe for code holding the
|
||||
-QEMU global mutex to acquire other AioContexts.
|
||||
+AioContext functions like aio_set_fd_handler(), aio_set_event_notifier(),
|
||||
+aio_bh_new(), and aio_timer_new() are thread-safe. They can be used to trigger
|
||||
+activity in an IOThread.
|
||||
|
||||
Side note: the best way to schedule a function call across threads is to call
|
||||
-aio_bh_schedule_oneshot(). No acquire/release or locking is needed.
|
||||
+aio_bh_schedule_oneshot().
|
||||
+
|
||||
+The main loop thread can wait synchronously for a condition using
|
||||
+AIO_WAIT_WHILE().
|
||||
|
||||
AioContext and the block layer
|
||||
------------------------------
|
||||
@@ -124,22 +115,16 @@ Block layer code must therefore expect to run in an IOThread and avoid using
|
||||
old APIs that implicitly use the main loop. See the "How to program for
|
||||
IOThreads" above for information on how to do that.
|
||||
|
||||
-If main loop code such as a QMP function wishes to access a BlockDriverState
|
||||
-it must first call aio_context_acquire(bdrv_get_aio_context(bs)) to ensure
|
||||
-that callbacks in the IOThread do not run in parallel.
|
||||
-
|
||||
Code running in the monitor typically needs to ensure that past
|
||||
requests from the guest are completed. When a block device is running
|
||||
in an IOThread, the IOThread can also process requests from the guest
|
||||
(via ioeventfd). To achieve both objects, wrap the code between
|
||||
bdrv_drained_begin() and bdrv_drained_end(), thus creating a "drained
|
||||
-section". The functions must be called between aio_context_acquire()
|
||||
-and aio_context_release(). You can freely release and re-acquire the
|
||||
-AioContext within a drained section.
|
||||
-
|
||||
-Long-running jobs (usually in the form of coroutines) are best scheduled in
|
||||
-the BlockDriverState's AioContext to avoid the need to acquire/release around
|
||||
-each bdrv_*() call. The functions bdrv_add/remove_aio_context_notifier,
|
||||
-or alternatively blk_add/remove_aio_context_notifier if you use BlockBackends,
|
||||
-can be used to get a notification whenever bdrv_try_change_aio_context() moves a
|
||||
+section".
|
||||
+
|
||||
+Long-running jobs (usually in the form of coroutines) are often scheduled in
|
||||
+the BlockDriverState's AioContext. The functions
|
||||
+bdrv_add/remove_aio_context_notifier, or alternatively
|
||||
+blk_add/remove_aio_context_notifier if you use BlockBackends, can be used to
|
||||
+get a notification whenever bdrv_try_change_aio_context() moves a
|
||||
BlockDriverState to a different AioContext.
|
||||
--
|
||||
2.39.3
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,94 +0,0 @@
|
||||
From a5b4eec5f456b1ca3fe753e1d76f96cf3f8914ef Mon Sep 17 00:00:00 2001
|
||||
From: David Hildenbrand <david@redhat.com>
|
||||
Date: Wed, 17 Jan 2024 14:55:53 +0100
|
||||
Subject: [PATCH 01/22] hv-balloon: use get_min_alignment() to express 32 GiB
|
||||
alignment
|
||||
|
||||
RH-Author: David Hildenbrand <david@redhat.com>
|
||||
RH-MergeRequest: 221: memory-device: reintroduce memory region size check
|
||||
RH-Jira: RHEL-20341
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-Commit: [1/2] cbe092fe549552928270892253b31cd8fe199825
|
||||
|
||||
https://issues.redhat.com/browse/RHEL-20341
|
||||
|
||||
Let's implement the get_min_alignment() callback for memory devices, and
|
||||
copy for the device memory region the alignment of the host memory
|
||||
region. This mimics what virtio-mem does, and allows for re-introducing
|
||||
proper alignment checks for the memory region size (where we don't care
|
||||
about additional device requirements) in memory device core.
|
||||
|
||||
Message-ID: <20240117135554.787344-2-david@redhat.com>
|
||||
Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
|
||||
Signed-off-by: David Hildenbrand <david@redhat.com>
|
||||
(cherry picked from commit f77c5f38f49c71bc14cf1019ac92b0b95f572414)
|
||||
Signed-off-by: David Hildenbrand <david@redhat.com>
|
||||
---
|
||||
hw/hyperv/hv-balloon.c | 37 +++++++++++++++++++++----------------
|
||||
1 file changed, 21 insertions(+), 16 deletions(-)
|
||||
|
||||
diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c
|
||||
index 66f297c1d7..0829c495b0 100644
|
||||
--- a/hw/hyperv/hv-balloon.c
|
||||
+++ b/hw/hyperv/hv-balloon.c
|
||||
@@ -1476,22 +1476,7 @@ static void hv_balloon_ensure_mr(HvBalloon *balloon)
|
||||
balloon->mr = g_new0(MemoryRegion, 1);
|
||||
memory_region_init(balloon->mr, OBJECT(balloon), TYPE_HV_BALLOON,
|
||||
memory_region_size(hostmem_mr));
|
||||
-
|
||||
- /*
|
||||
- * The VM can indicate an alignment up to 32 GiB. Memory device core can
|
||||
- * usually only handle/guarantee 1 GiB alignment. The user will have to
|
||||
- * specify a larger maxmem eventually.
|
||||
- *
|
||||
- * The memory device core will warn the user in case maxmem might have to be
|
||||
- * increased and will fail plugging the device if there is not sufficient
|
||||
- * space after alignment.
|
||||
- *
|
||||
- * TODO: we could do the alignment ourselves in a slightly bigger region.
|
||||
- * But this feels better, although the warning might be annoying. Maybe
|
||||
- * we can optimize that in the future (e.g., with such a device on the
|
||||
- * cmdline place/size the device memory region differently.
|
||||
- */
|
||||
- balloon->mr->align = MAX(32 * GiB, memory_region_get_alignment(hostmem_mr));
|
||||
+ balloon->mr->align = memory_region_get_alignment(hostmem_mr);
|
||||
}
|
||||
|
||||
static void hv_balloon_free_mr(HvBalloon *balloon)
|
||||
@@ -1653,6 +1638,25 @@ static MemoryRegion *hv_balloon_md_get_memory_region(MemoryDeviceState *md,
|
||||
return balloon->mr;
|
||||
}
|
||||
|
||||
+static uint64_t hv_balloon_md_get_min_alignment(const MemoryDeviceState *md)
|
||||
+{
|
||||
+ /*
|
||||
+ * The VM can indicate an alignment up to 32 GiB. Memory device core can
|
||||
+ * usually only handle/guarantee 1 GiB alignment. The user will have to
|
||||
+ * specify a larger maxmem eventually.
|
||||
+ *
|
||||
+ * The memory device core will warn the user in case maxmem might have to be
|
||||
+ * increased and will fail plugging the device if there is not sufficient
|
||||
+ * space after alignment.
|
||||
+ *
|
||||
+ * TODO: we could do the alignment ourselves in a slightly bigger region.
|
||||
+ * But this feels better, although the warning might be annoying. Maybe
|
||||
+ * we can optimize that in the future (e.g., with such a device on the
|
||||
+ * cmdline place/size the device memory region differently.
|
||||
+ */
|
||||
+ return 32 * GiB;
|
||||
+}
|
||||
+
|
||||
static void hv_balloon_md_fill_device_info(const MemoryDeviceState *md,
|
||||
MemoryDeviceInfo *info)
|
||||
{
|
||||
@@ -1765,5 +1769,6 @@ static void hv_balloon_class_init(ObjectClass *klass, void *data)
|
||||
mdc->get_memory_region = hv_balloon_md_get_memory_region;
|
||||
mdc->decide_memslots = hv_balloon_decide_memslots;
|
||||
mdc->get_memslots = hv_balloon_get_memslots;
|
||||
+ mdc->get_min_alignment = hv_balloon_md_get_min_alignment;
|
||||
mdc->fill_device_info = hv_balloon_md_fill_device_info;
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,42 +0,0 @@
|
||||
From ceaee9c4372bbdc4196cb6808515047388f7aa26 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Tue, 21 Nov 2023 16:44:18 +0800
|
||||
Subject: [PATCH 039/101] hw/arm: Activate IOMMUFD for virt machines
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [38/67] 0a059ae661616e95eb8455e17f35774495cae8e7 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||||
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit 0970238343af45a8b547695bfc22f18d4eb7da7e)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/arm/Kconfig | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
|
||||
index 3ada335a24..660f49db49 100644
|
||||
--- a/hw/arm/Kconfig
|
||||
+++ b/hw/arm/Kconfig
|
||||
@@ -8,6 +8,7 @@ config ARM_VIRT
|
||||
imply TPM_TIS_SYSBUS
|
||||
imply TPM_TIS_I2C
|
||||
imply NVDIMM
|
||||
+ imply IOMMUFD
|
||||
select ARM_GIC
|
||||
select ACPI
|
||||
select ARM_SMMUV3
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,88 +0,0 @@
|
||||
From e670722b9a6460d41497688d820d5a9a9b51d8e9 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Tue, 9 Jan 2024 11:36:42 +1000
|
||||
Subject: [PATCH 001/101] hw/arm/virt: Add properties to disable high memory
|
||||
regions
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 210: hw/arm/virt: Add properties to disable high memory regions
|
||||
RH-Jira: RHEL-19738
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [1/1] 4097ba5133a67126e30b84202cb40df4e019c5f4
|
||||
|
||||
Upstream: RHEL-only
|
||||
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=57927352
|
||||
|
||||
There are 3 high memory regions for GICv3 or GICv4 redistributor, PCI
|
||||
ECAM and PCI MMIO. Each of them has a property introduced by upstream
|
||||
commit 6a48c64eec ("hw/arm/virt: Add properties to disable high memory
|
||||
regions") so that the corresponding high memory region can be disabled.
|
||||
|
||||
It's notable that another property ("compact-highmem") introduced by
|
||||
upstream commit f40408a9fe ("hw/arm/virt: Add 'compact-highmem' property")
|
||||
so that the compact high memory region layout during assignment can be
|
||||
disabled, compatible to the old machine types. However, we don't have
|
||||
the compatible issue since the compact high memory region layout is
|
||||
always kept as disabled until RHEL9.2.0 machine type and onwards.
|
||||
|
||||
Expose those 3 properties: "highmem-redists", "highmem-ecam" and
|
||||
"highmem-mmio". The property "compact-highmem" is kept as hidden.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 24 +++++++++++++++++++++++-
|
||||
1 file changed, 23 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 5cab00b4cd..60f117f0d2 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -2456,6 +2456,7 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp)
|
||||
|
||||
vms->highmem_compact = value;
|
||||
}
|
||||
+#endif /* disabled for RHEL */
|
||||
|
||||
static bool virt_get_highmem_redists(Object *obj, Error **errp)
|
||||
{
|
||||
@@ -2498,7 +2499,6 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp)
|
||||
|
||||
vms->highmem_mmio = value;
|
||||
}
|
||||
-#endif /* disabled for RHEL */
|
||||
|
||||
static bool virt_get_its(Object *obj, Error **errp)
|
||||
{
|
||||
@@ -3521,6 +3521,28 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data)
|
||||
"Set on/off to enable/disable using "
|
||||
"physical address space above 32 bits");
|
||||
|
||||
+ object_class_property_add_bool(oc, "highmem-redists",
|
||||
+ virt_get_highmem_redists,
|
||||
+ virt_set_highmem_redists);
|
||||
+ object_class_property_set_description(oc, "highmem-redists",
|
||||
+ "Set on/off to enable/disable high "
|
||||
+ "memory region for GICv3 or GICv4 "
|
||||
+ "redistributor");
|
||||
+
|
||||
+ object_class_property_add_bool(oc, "highmem-ecam",
|
||||
+ virt_get_highmem_ecam,
|
||||
+ virt_set_highmem_ecam);
|
||||
+ object_class_property_set_description(oc, "highmem-ecam",
|
||||
+ "Set on/off to enable/disable high "
|
||||
+ "memory region for PCI ECAM");
|
||||
+
|
||||
+ object_class_property_add_bool(oc, "highmem-mmio",
|
||||
+ virt_get_highmem_mmio,
|
||||
+ virt_set_highmem_mmio);
|
||||
+ object_class_property_set_description(oc, "highmem-mmio",
|
||||
+ "Set on/off to enable/disable high "
|
||||
+ "memory region for PCI MMIO");
|
||||
+
|
||||
object_class_property_add_str(oc, "gic-version", virt_get_gic_version,
|
||||
virt_set_gic_version);
|
||||
object_class_property_set_description(oc, "gic-version",
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,120 @@
|
||||
From 41c4083269ec772b406c6c57b496ca2011f928c7 Mon Sep 17 00:00:00 2001
|
||||
From: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
Date: Tue, 9 Jul 2024 23:08:59 -0400
|
||||
Subject: [PATCH 2/2] hw/arm/virt: Avoid unexpected warning from Linux guest on
|
||||
host with Fujitsu CPUs
|
||||
|
||||
RH-Author: zhenyzha <None>
|
||||
RH-MergeRequest: 256: hw/arm/virt: Avoid unexpected warning from Linux guest on host with Fujitsu CPUs
|
||||
RH-Jira: RHEL-39936
|
||||
RH-Acked-by: Gavin Shan <gshan@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Commit: [1/1] fdf156fd05b219a06e2e2ca409fff0f728c1e2cf (zhenyzha/qemu-kvm)
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-39936
|
||||
|
||||
Multiple warning messages and corresponding backtraces are observed when Linux
|
||||
guest is booted on the host with Fujitsu CPUs. One of them is shown as below.
|
||||
|
||||
[ 0.032443] ------------[ cut here ]------------
|
||||
[ 0.032446] uart-pl011 9000000.pl011: ARCH_DMA_MINALIGN smaller than
|
||||
CTR_EL0.CWG (128 < 256)
|
||||
[ 0.032454] WARNING: CPU: 0 PID: 1 at arch/arm64/mm/dma-mapping.c:54
|
||||
arch_setup_dma_ops+0xbc/0xcc
|
||||
[ 0.032470] Modules linked in:
|
||||
[ 0.032475] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-452.el9.aarch64
|
||||
[ 0.032481] Hardware name: linux,dummy-virt (DT)
|
||||
[ 0.032484] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
|
||||
[ 0.032490] pc : arch_setup_dma_ops+0xbc/0xcc
|
||||
[ 0.032496] lr : arch_setup_dma_ops+0xbc/0xcc
|
||||
[ 0.032501] sp : ffff80008003b860
|
||||
[ 0.032503] x29: ffff80008003b860 x28: 0000000000000000 x27: ffffaae4b949049c
|
||||
[ 0.032510] x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000
|
||||
[ 0.032517] x23: 0000000000000100 x22: 0000000000000000 x21: 0000000000000000
|
||||
[ 0.032523] x20: 0000000100000000 x19: ffff2f06c02ea400 x18: ffffffffffffffff
|
||||
[ 0.032529] x17: 00000000208a5f76 x16: 000000006589dbcb x15: ffffaae4ba071c89
|
||||
[ 0.032535] x14: 0000000000000000 x13: ffffaae4ba071c84 x12: 455f525443206e61
|
||||
[ 0.032541] x11: 68742072656c6c61 x10: 0000000000000029 x9 : ffffaae4b7d21da4
|
||||
[ 0.032547] x8 : 0000000000000029 x7 : 4c414e494d5f414d x6 : 0000000000000029
|
||||
[ 0.032553] x5 : 000000000000000f x4 : ffffaae4b9617a00 x3 : 0000000000000001
|
||||
[ 0.032558] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff2f06c029be40
|
||||
[ 0.032564] Call trace:
|
||||
[ 0.032566] arch_setup_dma_ops+0xbc/0xcc
|
||||
[ 0.032572] of_dma_configure_id+0x138/0x300
|
||||
[ 0.032591] amba_dma_configure+0x34/0xc0
|
||||
[ 0.032600] really_probe+0x78/0x3dc
|
||||
[ 0.032614] __driver_probe_device+0x108/0x160
|
||||
[ 0.032619] driver_probe_device+0x44/0x114
|
||||
[ 0.032624] __device_attach_driver+0xb8/0x14c
|
||||
[ 0.032629] bus_for_each_drv+0x88/0xe4
|
||||
[ 0.032634] __device_attach+0xb0/0x1e0
|
||||
[ 0.032638] device_initial_probe+0x18/0x20
|
||||
[ 0.032643] bus_probe_device+0xa8/0xb0
|
||||
[ 0.032648] device_add+0x4b4/0x6c0
|
||||
[ 0.032652] amba_device_try_add.part.0+0x48/0x360
|
||||
[ 0.032657] amba_device_add+0x104/0x144
|
||||
[ 0.032662] of_amba_device_create.isra.0+0x100/0x1c4
|
||||
[ 0.032666] of_platform_bus_create+0x294/0x35c
|
||||
[ 0.032669] of_platform_populate+0x5c/0x150
|
||||
[ 0.032672] of_platform_default_populate_init+0xd0/0xec
|
||||
[ 0.032697] do_one_initcall+0x4c/0x2e0
|
||||
[ 0.032701] do_initcalls+0x100/0x13c
|
||||
[ 0.032707] kernel_init_freeable+0x1c8/0x21c
|
||||
[ 0.032712] kernel_init+0x28/0x140
|
||||
[ 0.032731] ret_from_fork+0x10/0x20
|
||||
[ 0.032735] ---[ end trace 0000000000000000 ]---
|
||||
|
||||
In Linux, a check is applied to every device which is exposed through
|
||||
device-tree node. The warning message is raised when the device isn't
|
||||
DMA coherent and the cache line size is larger than ARCH_DMA_MINALIGN
|
||||
(128 bytes). The cache line is sorted from CTR_EL0[CWG], which corresponds
|
||||
to 256 bytes on the guest CPUs. The DMA coherent capability is claimed
|
||||
through 'dma-coherent' in their device-tree nodes or parent nodes.
|
||||
This happens even when the device doesn't implement or use DMA at all,
|
||||
for legacy reasons.
|
||||
|
||||
Fix the issue by adding 'dma-coherent' property to the device-tree root
|
||||
node, meaning all devices are capable of DMA coherent by default.
|
||||
This both suppresses the spurious kernel warnings and also guards
|
||||
against possible future QEMU bugs where we add a DMA-capable device
|
||||
and forget to mark it as dma-coherent.
|
||||
|
||||
Signed-off-by: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
Reviewed-by: Gavin Shan <gshan@redhat.com>
|
||||
Reviewed-by: Donald Dutile <ddutile@redhat.com
|
||||
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
|
||||
Message-id: 20240612020506.307793-1-zhenyzha@redhat.com
|
||||
[PMM: tweaked commit message]
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit dda533087ad5559674ff486e7031c88dc01e0abd)
|
||||
Signed-off-by: Zhenyu Zhang <zhenyzha@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 11 +++++++++++
|
||||
1 file changed, 11 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 3f0496cdb9..6ece67f11d 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -330,6 +330,17 @@ static void create_fdt(VirtMachineState *vms)
|
||||
qemu_fdt_setprop_cell(fdt, "/", "#size-cells", 0x2);
|
||||
qemu_fdt_setprop_string(fdt, "/", "model", "linux,dummy-virt");
|
||||
|
||||
+ /*
|
||||
+ * For QEMU, all DMA is coherent. Advertising this in the root node
|
||||
+ * has two benefits:
|
||||
+ *
|
||||
+ * - It avoids potential bugs where we forget to mark a DMA
|
||||
+ * capable device as being dma-coherent
|
||||
+ * - It avoids spurious warnings from the Linux kernel about
|
||||
+ * devices which can't do DMA at all
|
||||
+ */
|
||||
+ qemu_fdt_setprop(fdt, "/", "dma-coherent", NULL, 0);
|
||||
+
|
||||
/* /chosen must exist for load_dtb to fill in necessary properties later */
|
||||
qemu_fdt_add_subnode(fdt, "/chosen");
|
||||
if (vms->dtb_randomness) {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,132 +0,0 @@
|
||||
From 3f58194f8642a71c47d91d3c00a34faf44ea2c11 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Wed, 3 Jan 2024 05:57:38 -0500
|
||||
Subject: [PATCH] hw/arm/virt: Fix compats
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 209: hw/arm/virt: Fix compats
|
||||
RH-Jira: RHEL-17168
|
||||
RH-Acked-by: Gavin Shan <gshan@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [1/1] bcdf6493bbd6d7b52b0b88ff44441d22aeddfde2 (eauger1/centos-qemu-kvm)
|
||||
|
||||
arm_rhel_compat is not added for virt-rhel9.4.0 machine causing
|
||||
the efi-virtio.rom to be looked for when instantiating a virtio-net-pci
|
||||
device and it won't be found since not shipped on ARM. This is a
|
||||
regression compared to 9.2.
|
||||
|
||||
Actually we do not need any rom file for any virtio-net-pci variant
|
||||
because edk2 already brings the functionality. So for 9.4 onwards, we
|
||||
want to set romfiles to "" for all of them.
|
||||
|
||||
However at the moment we apply arm_rhel_compat from the latest
|
||||
rhel*_virt_options(). This is not aligned with the generic compat
|
||||
usage which sets compats for a given machine type to accomodate for
|
||||
changes that occured after its advent. Here we are somehow abusing
|
||||
the compat infra to set general driver options that should apply for
|
||||
all machines. On top of that this is really error prone and we have
|
||||
forgotten to add arm_rhel_compat several times in the past.
|
||||
|
||||
So let's introduce set_arm_rhel_compat() being called before any
|
||||
*virt_options in the non abstract machine class. That way the setting
|
||||
will apply to any machine type without any need to add it in any
|
||||
future machine types.
|
||||
|
||||
For < 9.4 machines we don't really care keeping non void romfiles
|
||||
for transitional and non transitional devices because anyway this was
|
||||
not working. So let's keep things simple and apply the new defaults for
|
||||
all RHEL9 machine types.
|
||||
|
||||
Finally, to follow the generic pattern we should set hw_compat_rhel_9_0
|
||||
in 9.0 machine as it is done on x86 or ccw. This has no consequence on
|
||||
aarch64 because it only contains x86 stuff but that helps understanding
|
||||
the consistency.
|
||||
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 43 +++++++++++++++++++++++++++++--------------
|
||||
1 file changed, 29 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 0b17c94ad7..5cab00b4cd 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -111,11 +111,39 @@
|
||||
DEFINE_VIRT_MACHINE_LATEST(major, minor, false)
|
||||
#endif /* disabled for RHEL */
|
||||
|
||||
+/*
|
||||
+ * This variable is for changes to properties that are RHEL specific,
|
||||
+ * different to the current upstream and to be applied to the latest
|
||||
+ * machine type. They may be overriden by older machine compats.
|
||||
+ *
|
||||
+ * virtio-net-pci variant romfiles are not needed because edk2 does
|
||||
+ * fully support the pxe boot. Besides virtio romfiles are not shipped
|
||||
+ * on rhel/aarch64.
|
||||
+ */
|
||||
+GlobalProperty arm_rhel_compat[] = {
|
||||
+ {"virtio-net-pci", "romfile", "" },
|
||||
+ {"virtio-net-pci-transitional", "romfile", "" },
|
||||
+ {"virtio-net-pci-non-transitional", "romfile", "" },
|
||||
+};
|
||||
+const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat);
|
||||
+
|
||||
+/*
|
||||
+ * This cannot be called from the rhel_virt_class_init() because
|
||||
+ * TYPE_RHEL_MACHINE is abstract and mc->compat_props g_ptr_array_new()
|
||||
+ * only is called on virt-rhelm.n.s non abstract class init.
|
||||
+ */
|
||||
+static void arm_rhel_compat_set(MachineClass *mc)
|
||||
+{
|
||||
+ compat_props_add(mc->compat_props, arm_rhel_compat,
|
||||
+ arm_rhel_compat_len);
|
||||
+}
|
||||
+
|
||||
#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \
|
||||
static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \
|
||||
void *data) \
|
||||
{ \
|
||||
MachineClass *mc = MACHINE_CLASS(oc); \
|
||||
+ arm_rhel_compat_set(mc); \
|
||||
rhel##m##n##s##_virt_options(mc); \
|
||||
mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \
|
||||
if (latest) { \
|
||||
@@ -139,19 +167,6 @@
|
||||
#define DEFINE_RHEL_MACHINE(major, minor, subminor) \
|
||||
DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false)
|
||||
|
||||
-/* This variable is for changes to properties that are RHEL specific,
|
||||
- * different to the current upstream and to be applied to the latest
|
||||
- * machine type.
|
||||
- */
|
||||
-GlobalProperty arm_rhel_compat[] = {
|
||||
- {
|
||||
- .driver = "virtio-net-pci",
|
||||
- .property = "romfile",
|
||||
- .value = "",
|
||||
- },
|
||||
-};
|
||||
-const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat);
|
||||
-
|
||||
/* Number of external interrupt lines to configure the GIC with */
|
||||
#define NUM_IRQS 256
|
||||
|
||||
@@ -3639,7 +3654,6 @@ static void rhel920_virt_options(MachineClass *mc)
|
||||
{
|
||||
rhel940_virt_options(mc);
|
||||
|
||||
- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
|
||||
@@ -3653,6 +3667,7 @@ static void rhel900_virt_options(MachineClass *mc)
|
||||
rhel920_virt_options(mc);
|
||||
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
|
||||
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len);
|
||||
|
||||
/* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */
|
||||
vmc->no_tcg_lpa2 = true;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,59 @@
|
||||
From e3360c415f7de923d27c3167260a93cb679afabe Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Mon, 6 May 2024 15:09:43 +0200
|
||||
Subject: [PATCH 1/2] hw/arm/virt: Fix spurious call to arm_virt_compat_set()
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 238: hw/arm/virt: Fix spurious call to arm_virt_compat_set()
|
||||
RH-Jira: RHEL-34945
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Gavin Shan <gshan@redhat.com>
|
||||
RH-Commit: [1/1] a858a3e1dff12b28e14f7e4bd2b896a9f06eacbb (eauger1/centos-qemu-kvm)
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-34945
|
||||
Status: RHEL-only
|
||||
|
||||
Downstream, we apply arm_rhel_compat in place of arm_virt_compat.
|
||||
This is done though arm_rhel_compat_set() transparently called in
|
||||
DEFINE_RHEL_MACHINE_LATEST(). So there is no need to call
|
||||
arm_virt_compat_set() in rhel_machine_class_init(). Besides
|
||||
this triggers a "GLib: g_ptr_array_add: assertion 'rarray' failed"
|
||||
warning.
|
||||
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 3 +--
|
||||
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index f1af9495c6..3f0496cdb9 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -85,6 +85,7 @@
|
||||
#include "hw/char/pl011.h"
|
||||
#include "qemu/guest-random.h"
|
||||
|
||||
+#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
static GlobalProperty arm_virt_compat[] = {
|
||||
{ TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "48" },
|
||||
};
|
||||
@@ -101,7 +102,6 @@ static void arm_virt_compat_set(MachineClass *mc)
|
||||
arm_virt_compat_len);
|
||||
}
|
||||
|
||||
-#if 0 /* Disabled for Red Hat Enterprise Linux */
|
||||
#define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
|
||||
static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
|
||||
void *data) \
|
||||
@@ -3536,7 +3536,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
MachineClass *mc = MACHINE_CLASS(oc);
|
||||
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
|
||||
- arm_virt_compat_set(mc);
|
||||
|
||||
mc->family = "virt-rhel-Z";
|
||||
mc->init = machvirt_init;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,41 +0,0 @@
|
||||
From 4c1d07995a7afb6fae68a7e7a8b6b6c94fa0a7bb Mon Sep 17 00:00:00 2001
|
||||
From: Cornelia Huck <cohuck@redhat.com>
|
||||
Date: Mon, 12 Feb 2024 10:37:54 +0100
|
||||
Subject: [PATCH 5/6] hw/arm/virt: deprecate virt-rhel9.{0,2}.0 machine types
|
||||
|
||||
RH-Author: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-MergeRequest: 225: hw/arm/virt: deprecate virt-rhel9.{0,2}.0 machine types
|
||||
RH-Jira: RHEL-24988
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [1/1] f15579db44808fa8a2d7bc01b3915aa59c064411 (cohuck/qemu-kvm-c9s)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-24988
|
||||
Upstream: RHEL only
|
||||
|
||||
We do not plan to support any machine types prior to 9.4.0; leave them
|
||||
in, but mark as deprecated.
|
||||
|
||||
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 60f117f0d2..943c563391 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3679,6 +3679,10 @@ static void rhel920_virt_options(MachineClass *mc)
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len);
|
||||
compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
|
||||
+
|
||||
+ /* RHEL 9.4 is the first supported release */
|
||||
+ mc->deprecation_reason =
|
||||
+ "machine types for versions prior to 9.4 are deprecated";
|
||||
}
|
||||
DEFINE_RHEL_MACHINE(9, 2, 0)
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,41 +0,0 @@
|
||||
From 7a6be312c11911bdd2ce82566be22a3e014947c2 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Tue, 21 Nov 2023 16:44:20 +0800
|
||||
Subject: [PATCH 041/101] hw/i386: Activate IOMMUFD for q35 machines
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [40/67] b15764ab24fd57389a8d219736613484acd7d29e (eauger1/centos-qemu-kvm)
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit 64ad06f6eba66c514477f490bcba409439a480d8)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/i386/Kconfig | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
|
||||
index 55850791df..a1846be6f7 100644
|
||||
--- a/hw/i386/Kconfig
|
||||
+++ b/hw/i386/Kconfig
|
||||
@@ -95,6 +95,7 @@ config Q35
|
||||
imply E1000E_PCI_EXPRESS
|
||||
imply VMPORT
|
||||
imply VMMOUSE
|
||||
+ imply IOMMUFD
|
||||
select PC_PCI
|
||||
select PC_ACPI
|
||||
select PCI_EXPRESS_Q35
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,73 @@
|
||||
From e74980be81d641736ea9d44d0fe9af02af63a220 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:40 -0500
|
||||
Subject: [PATCH 083/100] hw/i386: Add support for loading BIOS using
|
||||
guest_memfd
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [83/91] 7b77d212ef7d83b66ad9d8348179ee84e64fb911 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
When guest_memfd is enabled, the BIOS is generally part of the initial
|
||||
encrypted guest image and will be accessed as private guest memory. Add
|
||||
the necessary changes to set up the associated RAM region with a
|
||||
guest_memfd backend to allow for this.
|
||||
|
||||
Current support centers around using -bios to load the BIOS data.
|
||||
Support for loading the BIOS via pflash requires additional enablement
|
||||
since those interfaces rely on the use of ROM memory regions which make
|
||||
use of the KVM_MEM_READONLY memslot flag, which is not supported for
|
||||
guest_memfd-backed memslots.
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-29-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit fc7a69e177e4ba26d11fcf47b853f85115b35a11)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/x86-common.c | 17 ++++++++++++-----
|
||||
1 file changed, 12 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c
|
||||
index 35fe6eabea..6cbb76c25c 100644
|
||||
--- a/hw/i386/x86-common.c
|
||||
+++ b/hw/i386/x86-common.c
|
||||
@@ -969,8 +969,13 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
(bios_size % 65536) != 0) {
|
||||
goto bios_error;
|
||||
}
|
||||
- memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size,
|
||||
- &error_fatal);
|
||||
+ if (machine_require_guest_memfd(MACHINE(x86ms))) {
|
||||
+ memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios",
|
||||
+ bios_size, &error_fatal);
|
||||
+ } else {
|
||||
+ memory_region_init_ram(&x86ms->bios, NULL, "pc.bios",
|
||||
+ bios_size, &error_fatal);
|
||||
+ }
|
||||
if (sev_enabled()) {
|
||||
/*
|
||||
* The concept of a "reset" simply doesn't exist for
|
||||
@@ -991,9 +996,11 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
}
|
||||
g_free(filename);
|
||||
|
||||
- /* map the last 128KB of the BIOS in ISA space */
|
||||
- x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios,
|
||||
- !isapc_ram_fw);
|
||||
+ if (!machine_require_guest_memfd(MACHINE(x86ms))) {
|
||||
+ /* map the last 128KB of the BIOS in ISA space */
|
||||
+ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios,
|
||||
+ !isapc_ram_fw);
|
||||
+ }
|
||||
|
||||
/* map all the bios at the top of memory */
|
||||
memory_region_add_subregion(rom_memory,
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,106 @@
|
||||
From c1e615d6b8f609b72a94ffe6d31a9848a41744ef Mon Sep 17 00:00:00 2001
|
||||
From: Bernhard Beschow <shentey@gmail.com>
|
||||
Date: Tue, 30 Apr 2024 17:06:39 +0200
|
||||
Subject: [PATCH 038/100] hw/i386: Have x86_bios_rom_init() take
|
||||
X86MachineState rather than MachineState
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [38/91] 59f388b1dffc5d0aa2f0fff768194d755bc3efbb (bonzini/rhel-qemu-kvm)
|
||||
|
||||
The function creates and leaks two MemoryRegion objects regarding the BIOS which
|
||||
will be moved into X86MachineState in the next steps to avoid the leakage.
|
||||
|
||||
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Message-ID: <20240430150643.111976-3-shentey@gmail.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
(cherry picked from commit 848351840148f8c3b53ddf6210194506547d3ffd)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/microvm.c | 2 +-
|
||||
hw/i386/pc_sysfw.c | 4 ++--
|
||||
hw/i386/x86.c | 4 ++--
|
||||
include/hw/i386/x86.h | 2 +-
|
||||
4 files changed, 6 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
|
||||
index 61a772dfe6..fec63cacfa 100644
|
||||
--- a/hw/i386/microvm.c
|
||||
+++ b/hw/i386/microvm.c
|
||||
@@ -278,7 +278,7 @@ static void microvm_devices_init(MicrovmMachineState *mms)
|
||||
default_firmware = x86_machine_is_acpi_enabled(x86ms)
|
||||
? MICROVM_BIOS_FILENAME
|
||||
: MICROVM_QBOOT_FILENAME;
|
||||
- x86_bios_rom_init(MACHINE(mms), default_firmware, get_system_memory(), true);
|
||||
+ x86_bios_rom_init(x86ms, default_firmware, get_system_memory(), true);
|
||||
}
|
||||
|
||||
static void microvm_memory_init(MicrovmMachineState *mms)
|
||||
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
|
||||
index 3efabbbab2..ef7dea9798 100644
|
||||
--- a/hw/i386/pc_sysfw.c
|
||||
+++ b/hw/i386/pc_sysfw.c
|
||||
@@ -206,7 +206,7 @@ void pc_system_firmware_init(PCMachineState *pcms,
|
||||
BlockBackend *pflash_blk[ARRAY_SIZE(pcms->flash)];
|
||||
|
||||
if (!pcmc->pci_enabled) {
|
||||
- x86_bios_rom_init(MACHINE(pcms), "bios.bin", rom_memory, true);
|
||||
+ x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, true);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -227,7 +227,7 @@ void pc_system_firmware_init(PCMachineState *pcms,
|
||||
|
||||
if (!pflash_blk[0]) {
|
||||
/* Machine property pflash0 not set, use ROM mode */
|
||||
- x86_bios_rom_init(MACHINE(pcms), "bios.bin", rom_memory, false);
|
||||
+ x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, false);
|
||||
} else {
|
||||
if (kvm_enabled() && !kvm_readonly_mem_enabled()) {
|
||||
/*
|
||||
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
|
||||
index 2a4f3ee285..6d3c72f124 100644
|
||||
--- a/hw/i386/x86.c
|
||||
+++ b/hw/i386/x86.c
|
||||
@@ -1128,7 +1128,7 @@ void x86_load_linux(X86MachineState *x86ms,
|
||||
nb_option_roms++;
|
||||
}
|
||||
|
||||
-void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
|
||||
+void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
MemoryRegion *rom_memory, bool isapc_ram_fw)
|
||||
{
|
||||
const char *bios_name;
|
||||
@@ -1138,7 +1138,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
|
||||
ssize_t ret;
|
||||
|
||||
/* BIOS load */
|
||||
- bios_name = ms->firmware ?: default_firmware;
|
||||
+ bios_name = MACHINE(x86ms)->firmware ?: default_firmware;
|
||||
filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
|
||||
if (filename) {
|
||||
bios_size = get_image_size(filename);
|
||||
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
|
||||
index 4dc30dcb4d..cb07618d19 100644
|
||||
--- a/include/hw/i386/x86.h
|
||||
+++ b/include/hw/i386/x86.h
|
||||
@@ -116,7 +116,7 @@ void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev,
|
||||
void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev,
|
||||
DeviceState *dev, Error **errp);
|
||||
|
||||
-void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
|
||||
+void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
MemoryRegion *rom_memory, bool isapc_ram_fw);
|
||||
|
||||
void x86_load_linux(X86MachineState *x86ms,
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,51 @@
|
||||
From 7bb1f124413891bc5d2187f12cd19da6e794904b Mon Sep 17 00:00:00 2001
|
||||
From: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Date: Wed, 3 Apr 2024 10:59:53 -0400
|
||||
Subject: [PATCH 010/100] hw/i386/acpi: Set PCAT_COMPAT bit only when pic is
|
||||
not disabled
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [10/91] 62110e4bf52cb3e106c8d2a902bbd31548beba00 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
A value 1 of PCAT_COMPAT (bit 0) of MADT.Flags indicates that the system
|
||||
also has a PC-AT-compatible dual-8259 setup, i.e., the PIC. When PIC
|
||||
is not enabled (pic=off) for x86 machine, the PCAT_COMPAT bit needs to
|
||||
be cleared. The PIC probe should then print:
|
||||
|
||||
[ 0.155970] Using NULL legacy PIC
|
||||
|
||||
However, no such log printed in guest kernel unless PCAT_COMPAT is
|
||||
cleared.
|
||||
|
||||
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Message-ID: <20240403145953.3082491-1-xiaoyao.li@intel.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 292dd287e78e0cbafde9d1522c729349d132d844)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/acpi-common.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/i386/acpi-common.c b/hw/i386/acpi-common.c
|
||||
index 20f19269da..0cc2919bb8 100644
|
||||
--- a/hw/i386/acpi-common.c
|
||||
+++ b/hw/i386/acpi-common.c
|
||||
@@ -107,7 +107,9 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker,
|
||||
acpi_table_begin(&table, table_data);
|
||||
/* Local APIC Address */
|
||||
build_append_int_noprefix(table_data, APIC_DEFAULT_ADDRESS, 4);
|
||||
- build_append_int_noprefix(table_data, 1 /* PCAT_COMPAT */, 4); /* Flags */
|
||||
+ /* Flags. bit 0: PCAT_COMPAT */
|
||||
+ build_append_int_noprefix(table_data,
|
||||
+ x86ms->pic != ON_OFF_AUTO_OFF ? 1 : 0 , 4);
|
||||
|
||||
for (i = 0; i < apic_ids->len; i++) {
|
||||
pc_madt_cpu_entry(i, apic_ids, table_data, false);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,186 +0,0 @@
|
||||
From ea2e2368dcf4140be47288472f2c2a094358e0c7 Mon Sep 17 00:00:00 2001
|
||||
From: Igor Mammedov <imammedo@redhat.com>
|
||||
Date: Thu, 8 Feb 2024 23:03:45 +0100
|
||||
Subject: [PATCH 03/20] hw/i386/pc: Defer smbios_set_defaults() to machine_done
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS
|
||||
RH-Jira: RHEL-21705
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Acked-by: Ani Sinha <None>
|
||||
RH-Commit: [1/18] 9d4c1d1a910fec7d310429d6fc0b10c798932db7
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-21705
|
||||
|
||||
commit: a0204a5ed091dfe79aced7ec8f3ce1931fd25816
|
||||
Author: Bernhard Beschow <shentey@gmail.com>
|
||||
|
||||
Handling most of smbios data generation in the machine_done notifier is similar
|
||||
to how the ARM virt machine handles it which also calls smbios_set_defaults()
|
||||
there. The result is that all pc machines are freed from explicitly worrying
|
||||
about smbios setup.
|
||||
|
||||
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Message-ID: <20240208220349.4948-6-shentey@gmail.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
|
||||
Conflicts: hw/i386/pc_q35.c, hw/i386/pc_piix.c
|
||||
due to missing 4d3457fef9 (w/i386/pc: Merge pc_guest_info_init() into pc_machine_initfn())
|
||||
and different signature of smbios_set_defaults() downstream
|
||||
Fixup: hw/i386/fw_cfg.c to account for downstream changes smbios_set_defaults()
|
||||
|
||||
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
|
||||
---
|
||||
hw/i386/fw_cfg.c | 14 +++++++++++++-
|
||||
hw/i386/fw_cfg.h | 3 ++-
|
||||
hw/i386/pc.c | 2 +-
|
||||
hw/i386/pc_piix.c | 12 ------------
|
||||
hw/i386/pc_q35.c | 11 -----------
|
||||
include/hw/i386/pc.h | 1 -
|
||||
6 files changed, 16 insertions(+), 27 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c
|
||||
index 7362daa45a..6a5466faf0 100644
|
||||
--- a/hw/i386/fw_cfg.c
|
||||
+++ b/hw/i386/fw_cfg.c
|
||||
@@ -48,15 +48,27 @@ const char *fw_cfg_arch_key_name(uint16_t key)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
-void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg)
|
||||
+void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg)
|
||||
{
|
||||
#ifdef CONFIG_SMBIOS
|
||||
uint8_t *smbios_tables, *smbios_anchor;
|
||||
size_t smbios_tables_len, smbios_anchor_len;
|
||||
struct smbios_phys_mem_area *mem_array;
|
||||
unsigned i, array_count;
|
||||
+ MachineState *ms = MACHINE(pcms);
|
||||
+ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
|
||||
+ MachineClass *mc = MACHINE_GET_CLASS(pcms);
|
||||
X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu);
|
||||
|
||||
+ if (pcmc->smbios_defaults) {
|
||||
+ /* These values are guest ABI, do not change */
|
||||
+ smbios_set_defaults("QEMU", mc->desc, mc->name,
|
||||
+ pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded,
|
||||
+ pcmc->smbios_stream_product,
|
||||
+ pcmc->smbios_stream_version,
|
||||
+ pcms->smbios_entry_point_type);
|
||||
+ }
|
||||
+
|
||||
/* tell smbios about cpuid version and features */
|
||||
smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]);
|
||||
|
||||
diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h
|
||||
index 86ca7c1c0c..1e1de6b4a3 100644
|
||||
--- a/hw/i386/fw_cfg.h
|
||||
+++ b/hw/i386/fw_cfg.h
|
||||
@@ -10,6 +10,7 @@
|
||||
#define HW_I386_FW_CFG_H
|
||||
|
||||
#include "hw/boards.h"
|
||||
+#include "hw/i386/pc.h"
|
||||
#include "hw/nvram/fw_cfg.h"
|
||||
|
||||
#define FW_CFG_IO_BASE 0x510
|
||||
@@ -22,7 +23,7 @@
|
||||
FWCfgState *fw_cfg_arch_create(MachineState *ms,
|
||||
uint16_t boot_cpus,
|
||||
uint16_t apic_id_limit);
|
||||
-void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg);
|
||||
+void fw_cfg_build_smbios(PCMachineState *ms, FWCfgState *fw_cfg);
|
||||
void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg);
|
||||
void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg);
|
||||
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index a1faa9e92c..16de2a59e8 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -847,7 +847,7 @@ void pc_machine_done(Notifier *notifier, void *data)
|
||||
|
||||
acpi_setup();
|
||||
if (x86ms->fw_cfg) {
|
||||
- fw_cfg_build_smbios(MACHINE(pcms), x86ms->fw_cfg);
|
||||
+ fw_cfg_build_smbios(pcms, x86ms->fw_cfg);
|
||||
fw_cfg_build_feature_control(MACHINE(pcms), x86ms->fw_cfg);
|
||||
/* update FW_CFG_NB_CPUS to account for -device added CPUs */
|
||||
fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index 09d02cc91f..7344b35cf1 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -36,7 +36,6 @@
|
||||
#include "hw/rtc/mc146818rtc.h"
|
||||
#include "hw/southbridge/piix.h"
|
||||
#include "hw/display/ramfb.h"
|
||||
-#include "hw/firmware/smbios.h"
|
||||
#include "hw/pci/pci.h"
|
||||
#include "hw/pci/pci_ids.h"
|
||||
#include "hw/usb.h"
|
||||
@@ -233,17 +232,6 @@ static void pc_init1(MachineState *machine,
|
||||
|
||||
pc_guest_info_init(pcms);
|
||||
|
||||
- if (pcmc->smbios_defaults) {
|
||||
- MachineClass *mc = MACHINE_GET_CLASS(machine);
|
||||
- /* These values are guest ABI, do not change */
|
||||
- smbios_set_defaults("Red Hat", "KVM",
|
||||
- mc->desc, pcmc->smbios_legacy_mode,
|
||||
- pcmc->smbios_uuid_encoded,
|
||||
- pcmc->smbios_stream_product,
|
||||
- pcmc->smbios_stream_version,
|
||||
- pcms->smbios_entry_point_type);
|
||||
- }
|
||||
-
|
||||
/* allocate ram and load rom/bios */
|
||||
if (!xen_enabled()) {
|
||||
pc_memory_init(pcms, system_memory, rom_memory, hole64_size);
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index c6967e1846..9a22ff5dd6 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -45,7 +45,6 @@
|
||||
#include "hw/i386/amd_iommu.h"
|
||||
#include "hw/i386/intel_iommu.h"
|
||||
#include "hw/display/ramfb.h"
|
||||
-#include "hw/firmware/smbios.h"
|
||||
#include "hw/ide/pci.h"
|
||||
#include "hw/ide/ahci.h"
|
||||
#include "hw/intc/ioapic.h"
|
||||
@@ -201,16 +200,6 @@ static void pc_q35_init(MachineState *machine)
|
||||
|
||||
pc_guest_info_init(pcms);
|
||||
|
||||
- if (pcmc->smbios_defaults) {
|
||||
- /* These values are guest ABI, do not change */
|
||||
- smbios_set_defaults("Red Hat", "KVM",
|
||||
- mc->desc, pcmc->smbios_legacy_mode,
|
||||
- pcmc->smbios_uuid_encoded,
|
||||
- pcmc->smbios_stream_product,
|
||||
- pcmc->smbios_stream_version,
|
||||
- pcms->smbios_entry_point_type);
|
||||
- }
|
||||
-
|
||||
/* create pci host bus */
|
||||
phb = OBJECT(qdev_new(TYPE_Q35_HOST_DEVICE));
|
||||
|
||||
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
|
||||
index 37644ede7e..c286c10bc3 100644
|
||||
--- a/include/hw/i386/pc.h
|
||||
+++ b/include/hw/i386/pc.h
|
||||
@@ -12,7 +12,6 @@
|
||||
#include "hw/hotplug.h"
|
||||
#include "qom/object.h"
|
||||
#include "hw/i386/sgx-epc.h"
|
||||
-#include "hw/firmware/smbios.h"
|
||||
#include "hw/cxl/cxl.h"
|
||||
|
||||
#define HPET_INTCAP "hpet-intcap"
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,164 @@
|
||||
From fd6de3c5e97bdf13a39342fc71815a20c66867ae Mon Sep 17 00:00:00 2001
|
||||
From: Bernhard Beschow <shentey@gmail.com>
|
||||
Date: Wed, 8 May 2024 19:55:07 +0200
|
||||
Subject: [PATCH 043/100] hw/i386/pc_sysfw: Alias rather than copy isa-bios
|
||||
region
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [43/91] f64dab2a091838a10a9b94e3d09ea11432b0809f (bonzini/rhel-qemu-kvm)
|
||||
|
||||
In the -bios case the "isa-bios" memory region is an alias to the BIOS mapped
|
||||
to the top of the 4G memory boundary. Do the same in the -pflash case, but only
|
||||
for new machine versions for migration compatibility. This establishes common
|
||||
behavior and makes pflash commands work in the "isa-bios" region which some
|
||||
real-world legacy bioses rely on.
|
||||
|
||||
Note that in the sev_enabled() case, the "isa-bios" memory region in the -pflash
|
||||
case will now also point to encrypted memory, just like it already does in the
|
||||
-bios case.
|
||||
|
||||
When running `info mtree` before and after this commit with
|
||||
`qemu-system-x86_64 -S -drive \
|
||||
if=pflash,format=raw,readonly=on,file=/usr/share/qemu/bios-256k.bin` and running
|
||||
`diff -u before.mtree after.mtree` results in the following changes in the
|
||||
memory tree:
|
||||
|
||||
| --- before.mtree
|
||||
| +++ after.mtree
|
||||
| @@ -71,7 +71,7 @@
|
||||
| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci
|
||||
| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem
|
||||
| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom
|
||||
| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios
|
||||
| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff
|
||||
| 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff
|
||||
| 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff
|
||||
| 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff
|
||||
| @@ -108,7 +108,7 @@
|
||||
| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci
|
||||
| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem
|
||||
| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom
|
||||
| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios
|
||||
| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff
|
||||
| 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff
|
||||
| 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff
|
||||
| 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff
|
||||
| @@ -131,11 +131,14 @@
|
||||
| memory-region: pc.ram
|
||||
| 0000000000000000-0000000007ffffff (prio 0, ram): pc.ram
|
||||
|
|
||||
| +memory-region: system.flash0
|
||||
| + 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0
|
||||
| +
|
||||
| memory-region: pci
|
||||
| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci
|
||||
| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem
|
||||
| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom
|
||||
| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios
|
||||
| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff
|
||||
|
|
||||
| memory-region: smram
|
||||
| 00000000000a0000-00000000000bffff (prio 0, ram): alias smram-low @pc.ram 00000000000a0000-00000000000bffff
|
||||
|
||||
Note that in both cases the "system" memory region contains the entry
|
||||
|
||||
00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0
|
||||
|
||||
but the "system.flash0" memory region only appears standalone when "isa-bios" is
|
||||
an alias.
|
||||
|
||||
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
|
||||
Message-ID: <20240508175507.22270-7-shentey@gmail.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit a44ea3fa7f2aa1d809fdca1b84a52695b53d8ad0)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/pc.c | 1 +
|
||||
hw/i386/pc_piix.c | 1 +
|
||||
hw/i386/pc_q35.c | 1 +
|
||||
hw/i386/pc_sysfw.c | 8 +++++++-
|
||||
include/hw/i386/pc.h | 1 +
|
||||
5 files changed, 11 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index 1a34bc4522..660a59c63b 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -1967,6 +1967,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
|
||||
pcmc->has_reserved_memory = true;
|
||||
pcmc->enforce_aligned_dimm = true;
|
||||
pcmc->enforce_amd_1tb_hole = true;
|
||||
+ pcmc->isa_bios_alias = true;
|
||||
/* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported
|
||||
* to be used at the moment, 32K should be enough for a while. */
|
||||
pcmc->acpi_data_size = 0x20000 + 0x8000;
|
||||
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
|
||||
index bef3e8b73e..dbb7f2ed17 100644
|
||||
--- a/hw/i386/pc_piix.c
|
||||
+++ b/hw/i386/pc_piix.c
|
||||
@@ -975,6 +975,7 @@ static void pc_machine_rhel7_options(MachineClass *m)
|
||||
m->alias = "pc";
|
||||
m->is_default = 1;
|
||||
m->smp_props.prefer_sockets = true;
|
||||
+ pcmc->isa_bios_alias = false;
|
||||
}
|
||||
|
||||
static void pc_init_rhel760(MachineState *machine)
|
||||
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
|
||||
index dedc86eec9..f9900ad798 100644
|
||||
--- a/hw/i386/pc_q35.c
|
||||
+++ b/hw/i386/pc_q35.c
|
||||
@@ -735,6 +735,7 @@ static void pc_q35_machine_rhel940_options(MachineClass *m)
|
||||
m->desc = "RHEL-9.4.0 PC (Q35 + ICH9, 2009)";
|
||||
pcmc->smbios_stream_product = "RHEL";
|
||||
pcmc->smbios_stream_version = "9.4.0";
|
||||
+ pcmc->isa_bios_alias = false;
|
||||
|
||||
compat_props_add(m->compat_props, pc_rhel_9_5_compat,
|
||||
pc_rhel_9_5_compat_len);
|
||||
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
|
||||
index 82d37cb376..ac88ad4eb9 100644
|
||||
--- a/hw/i386/pc_sysfw.c
|
||||
+++ b/hw/i386/pc_sysfw.c
|
||||
@@ -135,6 +135,7 @@ static void pc_system_flash_map(PCMachineState *pcms,
|
||||
MemoryRegion *rom_memory)
|
||||
{
|
||||
X86MachineState *x86ms = X86_MACHINE(pcms);
|
||||
+ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
|
||||
hwaddr total_size = 0;
|
||||
int i;
|
||||
BlockBackend *blk;
|
||||
@@ -184,7 +185,12 @@ static void pc_system_flash_map(PCMachineState *pcms,
|
||||
|
||||
if (i == 0) {
|
||||
flash_mem = pflash_cfi01_get_memory(system_flash);
|
||||
- pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem);
|
||||
+ if (pcmc->isa_bios_alias) {
|
||||
+ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem,
|
||||
+ true);
|
||||
+ } else {
|
||||
+ pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem);
|
||||
+ }
|
||||
|
||||
/* Encrypt the pflash boot ROM */
|
||||
if (sev_enabled()) {
|
||||
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
|
||||
index 467e7fb52f..3f53ec73ac 100644
|
||||
--- a/include/hw/i386/pc.h
|
||||
+++ b/include/hw/i386/pc.h
|
||||
@@ -122,6 +122,7 @@ struct PCMachineClass {
|
||||
bool enforce_aligned_dimm;
|
||||
bool broken_reserved_end;
|
||||
bool enforce_amd_1tb_hole;
|
||||
+ bool isa_bios_alias;
|
||||
|
||||
/* generate legacy CPU hotplug AML */
|
||||
bool legacy_cpu_hotplug;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,53 @@
|
||||
From 9bf1d368c4b53139db39649833d475e097fc98d1 Mon Sep 17 00:00:00 2001
|
||||
From: Bernhard Beschow <shentey@gmail.com>
|
||||
Date: Mon, 22 Apr 2024 22:06:22 +0200
|
||||
Subject: [PATCH 039/100] hw/i386/pc_sysfw: Remove unused parameter from
|
||||
pc_isa_bios_init()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [39/91] c0019dc2706a8e3f40486fd4a4c0dd1fbe23237b (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Message-ID: <20240422200625.2768-2-shentey@gmail.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
(cherry picked from commit f4b63768b91811cdcf1fb7b270587123251dfea5)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/pc_sysfw.c | 5 ++---
|
||||
1 file changed, 2 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
|
||||
index ef7dea9798..59c7a81692 100644
|
||||
--- a/hw/i386/pc_sysfw.c
|
||||
+++ b/hw/i386/pc_sysfw.c
|
||||
@@ -41,8 +41,7 @@
|
||||
#define FLASH_SECTOR_SIZE 4096
|
||||
|
||||
static void pc_isa_bios_init(MemoryRegion *rom_memory,
|
||||
- MemoryRegion *flash_mem,
|
||||
- int ram_size)
|
||||
+ MemoryRegion *flash_mem)
|
||||
{
|
||||
int isa_bios_size;
|
||||
MemoryRegion *isa_bios;
|
||||
@@ -186,7 +185,7 @@ static void pc_system_flash_map(PCMachineState *pcms,
|
||||
|
||||
if (i == 0) {
|
||||
flash_mem = pflash_cfi01_get_memory(system_flash);
|
||||
- pc_isa_bios_init(rom_memory, flash_mem, size);
|
||||
+ pc_isa_bios_init(rom_memory, flash_mem);
|
||||
|
||||
/* Encrypt the pflash boot ROM */
|
||||
if (sev_enabled()) {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,158 @@
|
||||
From e6472ff46cbed97c2a238a8ef7d321351931333a Mon Sep 17 00:00:00 2001
|
||||
From: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:30 -0500
|
||||
Subject: [PATCH 070/100] hw/i386/sev: Add function to get SEV metadata from
|
||||
OVMF header
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [70/91] ba818dade96119c8a51ca1fb222f4f69e2752396 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
A recent version of OVMF expanded the reset vector GUID list to add
|
||||
SEV-specific metadata GUID. The SEV metadata describes the reserved
|
||||
memory regions such as the secrets and CPUID page used during the SEV-SNP
|
||||
guest launch.
|
||||
|
||||
The pc_system_get_ovmf_sev_metadata_ptr() is used to retieve the SEV
|
||||
metadata pointer from the OVMF GUID list.
|
||||
|
||||
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-19-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit f3c30c575d34122573b7370a7da5ca3a27dde481)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/pc_sysfw.c | 4 ++++
|
||||
include/hw/i386/pc.h | 26 ++++++++++++++++++++++++++
|
||||
target/i386/sev-sysemu-stub.c | 4 ++++
|
||||
target/i386/sev.c | 32 ++++++++++++++++++++++++++++++++
|
||||
target/i386/sev.h | 2 ++
|
||||
5 files changed, 68 insertions(+)
|
||||
|
||||
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
|
||||
index ac88ad4eb9..9b8671c441 100644
|
||||
--- a/hw/i386/pc_sysfw.c
|
||||
+++ b/hw/i386/pc_sysfw.c
|
||||
@@ -260,6 +260,10 @@ void x86_firmware_configure(void *ptr, int size)
|
||||
pc_system_parse_ovmf_flash(ptr, size);
|
||||
|
||||
if (sev_enabled()) {
|
||||
+
|
||||
+ /* Copy the SEV metadata table (if it exists) */
|
||||
+ pc_system_parse_sev_metadata(ptr, size);
|
||||
+
|
||||
ret = sev_es_save_reset_vector(ptr, size);
|
||||
if (ret) {
|
||||
error_report("failed to locate and/or save reset vector");
|
||||
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
|
||||
index 3f53ec73ac..94b49310f5 100644
|
||||
--- a/include/hw/i386/pc.h
|
||||
+++ b/include/hw/i386/pc.h
|
||||
@@ -167,6 +167,32 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level);
|
||||
#define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size"
|
||||
#define PCI_HOST_PROP_SMM_RANGES "smm-ranges"
|
||||
|
||||
+typedef enum {
|
||||
+ SEV_DESC_TYPE_UNDEF,
|
||||
+ /* The section contains the region that must be validated by the VMM. */
|
||||
+ SEV_DESC_TYPE_SNP_SEC_MEM,
|
||||
+ /* The section contains the SNP secrets page */
|
||||
+ SEV_DESC_TYPE_SNP_SECRETS,
|
||||
+ /* The section contains address that can be used as a CPUID page */
|
||||
+ SEV_DESC_TYPE_CPUID,
|
||||
+
|
||||
+} ovmf_sev_metadata_desc_type;
|
||||
+
|
||||
+typedef struct __attribute__((__packed__)) OvmfSevMetadataDesc {
|
||||
+ uint32_t base;
|
||||
+ uint32_t len;
|
||||
+ ovmf_sev_metadata_desc_type type;
|
||||
+} OvmfSevMetadataDesc;
|
||||
+
|
||||
+typedef struct __attribute__((__packed__)) OvmfSevMetadata {
|
||||
+ uint8_t signature[4];
|
||||
+ uint32_t len;
|
||||
+ uint32_t version;
|
||||
+ uint32_t num_desc;
|
||||
+ OvmfSevMetadataDesc descs[];
|
||||
+} OvmfSevMetadata;
|
||||
+
|
||||
+OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void);
|
||||
|
||||
void pc_pci_as_mapping_init(MemoryRegion *system_memory,
|
||||
MemoryRegion *pci_address_space);
|
||||
diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c
|
||||
index 96e1c15cc3..fc1c57c411 100644
|
||||
--- a/target/i386/sev-sysemu-stub.c
|
||||
+++ b/target/i386/sev-sysemu-stub.c
|
||||
@@ -67,3 +67,7 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict)
|
||||
{
|
||||
monitor_printf(mon, "SEV is not available in this QEMU\n");
|
||||
}
|
||||
+
|
||||
+void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size)
|
||||
+{
|
||||
+}
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index e84e4395a5..17281bb2c7 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -597,6 +597,38 @@ SevCapability *qmp_query_sev_capabilities(Error **errp)
|
||||
return sev_get_capabilities(errp);
|
||||
}
|
||||
|
||||
+static OvmfSevMetadata *ovmf_sev_metadata_table;
|
||||
+
|
||||
+#define OVMF_SEV_META_DATA_GUID "dc886566-984a-4798-A75e-5585a7bf67cc"
|
||||
+typedef struct __attribute__((__packed__)) OvmfSevMetadataOffset {
|
||||
+ uint32_t offset;
|
||||
+} OvmfSevMetadataOffset;
|
||||
+
|
||||
+OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void)
|
||||
+{
|
||||
+ return ovmf_sev_metadata_table;
|
||||
+}
|
||||
+
|
||||
+void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size)
|
||||
+{
|
||||
+ OvmfSevMetadata *metadata;
|
||||
+ OvmfSevMetadataOffset *data;
|
||||
+
|
||||
+ if (!pc_system_ovmf_table_find(OVMF_SEV_META_DATA_GUID, (uint8_t **)&data,
|
||||
+ NULL)) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ metadata = (OvmfSevMetadata *)(flash_ptr + flash_size - data->offset);
|
||||
+ if (memcmp(metadata->signature, "ASEV", 4) != 0 ||
|
||||
+ metadata->len < sizeof(OvmfSevMetadata) ||
|
||||
+ metadata->len > flash_size - data->offset) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ ovmf_sev_metadata_table = g_memdup2(metadata, metadata->len);
|
||||
+}
|
||||
+
|
||||
static SevAttestationReport *sev_get_attestation_report(const char *mnonce,
|
||||
Error **errp)
|
||||
{
|
||||
diff --git a/target/i386/sev.h b/target/i386/sev.h
|
||||
index 5dc4767b1e..cc12824dd6 100644
|
||||
--- a/target/i386/sev.h
|
||||
+++ b/target/i386/sev.h
|
||||
@@ -66,4 +66,6 @@ int sev_inject_launch_secret(const char *hdr, const char *secret,
|
||||
int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size);
|
||||
void sev_es_set_reset_vector(CPUState *cpu);
|
||||
|
||||
+void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size);
|
||||
+
|
||||
#endif
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,165 @@
|
||||
From 226cf6c3d3e2fd1a35422043dbe0b73d1216df83 Mon Sep 17 00:00:00 2001
|
||||
From: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:36 -0500
|
||||
Subject: [PATCH 073/100] hw/i386/sev: Add support to encrypt BIOS when SEV-SNP
|
||||
is enabled
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [73/91] 844afd322c12c3e8992cf6ec692c94e70747bd0c (bonzini/rhel-qemu-kvm)
|
||||
|
||||
As with SEV, an SNP guest requires that the BIOS be part of the initial
|
||||
encrypted/measured guest payload. Extend sev_encrypt_flash() to handle
|
||||
the SNP case and plumb through the GPA of the BIOS location since this
|
||||
is needed for SNP.
|
||||
|
||||
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-25-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 77d1abd91e5352ad30ae2f83790f95fa6a3c0b6b)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/pc_sysfw.c | 12 +++++++-----
|
||||
hw/i386/x86-common.c | 2 +-
|
||||
include/hw/i386/x86.h | 2 +-
|
||||
target/i386/sev-sysemu-stub.c | 2 +-
|
||||
target/i386/sev.c | 5 +++--
|
||||
target/i386/sev.h | 2 +-
|
||||
6 files changed, 14 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
|
||||
index 9b8671c441..7cdbafc8d2 100644
|
||||
--- a/hw/i386/pc_sysfw.c
|
||||
+++ b/hw/i386/pc_sysfw.c
|
||||
@@ -148,6 +148,8 @@ static void pc_system_flash_map(PCMachineState *pcms,
|
||||
assert(PC_MACHINE_GET_CLASS(pcms)->pci_enabled);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(pcms->flash); i++) {
|
||||
+ hwaddr gpa;
|
||||
+
|
||||
system_flash = pcms->flash[i];
|
||||
blk = pflash_cfi01_get_blk(system_flash);
|
||||
if (!blk) {
|
||||
@@ -177,11 +179,11 @@ static void pc_system_flash_map(PCMachineState *pcms,
|
||||
}
|
||||
|
||||
total_size += size;
|
||||
+ gpa = 0x100000000ULL - total_size; /* where the flash is mapped */
|
||||
qdev_prop_set_uint32(DEVICE(system_flash), "num-blocks",
|
||||
size / FLASH_SECTOR_SIZE);
|
||||
sysbus_realize_and_unref(SYS_BUS_DEVICE(system_flash), &error_fatal);
|
||||
- sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0,
|
||||
- 0x100000000ULL - total_size);
|
||||
+ sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0, gpa);
|
||||
|
||||
if (i == 0) {
|
||||
flash_mem = pflash_cfi01_get_memory(system_flash);
|
||||
@@ -196,7 +198,7 @@ static void pc_system_flash_map(PCMachineState *pcms,
|
||||
if (sev_enabled()) {
|
||||
flash_ptr = memory_region_get_ram_ptr(flash_mem);
|
||||
flash_size = memory_region_size(flash_mem);
|
||||
- x86_firmware_configure(flash_ptr, flash_size);
|
||||
+ x86_firmware_configure(gpa, flash_ptr, flash_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -249,7 +251,7 @@ void pc_system_firmware_init(PCMachineState *pcms,
|
||||
pc_system_flash_cleanup_unused(pcms);
|
||||
}
|
||||
|
||||
-void x86_firmware_configure(void *ptr, int size)
|
||||
+void x86_firmware_configure(hwaddr gpa, void *ptr, int size)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@@ -270,6 +272,6 @@ void x86_firmware_configure(void *ptr, int size)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
- sev_encrypt_flash(ptr, size, &error_fatal);
|
||||
+ sev_encrypt_flash(gpa, ptr, size, &error_fatal);
|
||||
}
|
||||
}
|
||||
diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c
|
||||
index 67b03c913a..35fe6eabea 100644
|
||||
--- a/hw/i386/x86-common.c
|
||||
+++ b/hw/i386/x86-common.c
|
||||
@@ -981,7 +981,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
*/
|
||||
void *ptr = memory_region_get_ram_ptr(&x86ms->bios);
|
||||
load_image_size(filename, ptr, bios_size);
|
||||
- x86_firmware_configure(ptr, bios_size);
|
||||
+ x86_firmware_configure(0x100000000ULL - bios_size, ptr, bios_size);
|
||||
} else {
|
||||
memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw);
|
||||
ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1);
|
||||
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
|
||||
index b006f16b8d..d43cb3908e 100644
|
||||
--- a/include/hw/i386/x86.h
|
||||
+++ b/include/hw/i386/x86.h
|
||||
@@ -154,6 +154,6 @@ void ioapic_init_gsi(GSIState *gsi_state, Object *parent);
|
||||
DeviceState *ioapic_init_secondary(GSIState *gsi_state);
|
||||
|
||||
/* pc_sysfw.c */
|
||||
-void x86_firmware_configure(void *ptr, int size);
|
||||
+void x86_firmware_configure(hwaddr gpa, void *ptr, int size);
|
||||
|
||||
#endif
|
||||
diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c
|
||||
index fc1c57c411..d5bf886e79 100644
|
||||
--- a/target/i386/sev-sysemu-stub.c
|
||||
+++ b/target/i386/sev-sysemu-stub.c
|
||||
@@ -42,7 +42,7 @@ void qmp_sev_inject_launch_secret(const char *packet_header, const char *secret,
|
||||
error_setg(errp, "SEV is not available in this QEMU");
|
||||
}
|
||||
|
||||
-int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp)
|
||||
+int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp)
|
||||
{
|
||||
g_assert_not_reached();
|
||||
}
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 06401f0526..7b5c4b4874 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -1484,7 +1484,7 @@ static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
}
|
||||
|
||||
int
|
||||
-sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp)
|
||||
+sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp)
|
||||
{
|
||||
SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
|
||||
|
||||
@@ -1841,7 +1841,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
|
||||
/* zero the excess data so the measurement can be reliably calculated */
|
||||
memset(padded_ht->padding, 0, sizeof(padded_ht->padding));
|
||||
|
||||
- if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) {
|
||||
+ if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht,
|
||||
+ sizeof(*padded_ht), errp) < 0) {
|
||||
ret = false;
|
||||
}
|
||||
|
||||
diff --git a/target/i386/sev.h b/target/i386/sev.h
|
||||
index cc12824dd6..858005a119 100644
|
||||
--- a/target/i386/sev.h
|
||||
+++ b/target/i386/sev.h
|
||||
@@ -59,7 +59,7 @@ uint32_t sev_get_cbit_position(void);
|
||||
uint32_t sev_get_reduced_phys_bits(void);
|
||||
bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp);
|
||||
|
||||
-int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp);
|
||||
+int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp);
|
||||
int sev_inject_launch_secret(const char *hdr, const char *secret,
|
||||
uint64_t gpa, Error **errp);
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,123 @@
|
||||
From a20b2e3e52b9589ac1abc8b9b818d526c86368cf Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:39 -0500
|
||||
Subject: [PATCH 082/100] hw/i386/sev: Use guest_memfd for legacy ROMs
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [82/91] a591e85e00c353009803b143c80852b8c9b1f15e (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Current SNP guest kernels will attempt to access these regions with
|
||||
with C-bit set, so guest_memfd is needed to handle that. Otherwise,
|
||||
kvm_convert_memory() will fail when the guest kernel tries to access it
|
||||
and QEMU attempts to call KVM_SET_MEMORY_ATTRIBUTES to set these ranges
|
||||
to private.
|
||||
|
||||
Whether guests should actually try to access ROM regions in this way (or
|
||||
need to deal with legacy ROM regions at all), is a separate issue to be
|
||||
addressed on kernel side, but current SNP guest kernels will exhibit
|
||||
this behavior and so this handling is needed to allow QEMU to continue
|
||||
running existing SNP guest kernels.
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
[pankaj: Added sev_snp_enabled() check]
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-28-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 413a67450750e0459efeffc3db3ba9759c3e381c)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/pc.c | 14 ++++++++++----
|
||||
hw/i386/pc_sysfw.c | 19 +++++++++++++------
|
||||
2 files changed, 23 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index 0aca0cc79e..b25d075b59 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -62,6 +62,7 @@
|
||||
#include "hw/mem/memory-device.h"
|
||||
#include "e820_memory_layout.h"
|
||||
#include "trace.h"
|
||||
+#include "sev.h"
|
||||
#include CONFIG_DEVICES
|
||||
|
||||
#ifdef CONFIG_XEN_EMU
|
||||
@@ -1173,10 +1174,15 @@ void pc_memory_init(PCMachineState *pcms,
|
||||
pc_system_firmware_init(pcms, rom_memory);
|
||||
|
||||
option_rom_mr = g_malloc(sizeof(*option_rom_mr));
|
||||
- memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
|
||||
- &error_fatal);
|
||||
- if (pcmc->pci_enabled) {
|
||||
- memory_region_set_readonly(option_rom_mr, true);
|
||||
+ if (machine_require_guest_memfd(machine)) {
|
||||
+ memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
|
||||
+ PC_ROM_SIZE, &error_fatal);
|
||||
+ } else {
|
||||
+ memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
|
||||
+ &error_fatal);
|
||||
+ if (pcmc->pci_enabled) {
|
||||
+ memory_region_set_readonly(option_rom_mr, true);
|
||||
+ }
|
||||
}
|
||||
memory_region_add_subregion_overlap(rom_memory,
|
||||
PC_ROM_MIN_VGA,
|
||||
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
|
||||
index 7cdbafc8d2..ef80281d28 100644
|
||||
--- a/hw/i386/pc_sysfw.c
|
||||
+++ b/hw/i386/pc_sysfw.c
|
||||
@@ -40,8 +40,8 @@
|
||||
|
||||
#define FLASH_SECTOR_SIZE 4096
|
||||
|
||||
-static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory,
|
||||
- MemoryRegion *flash_mem)
|
||||
+static void pc_isa_bios_init(PCMachineState *pcms, MemoryRegion *isa_bios,
|
||||
+ MemoryRegion *rom_memory, MemoryRegion *flash_mem)
|
||||
{
|
||||
int isa_bios_size;
|
||||
uint64_t flash_size;
|
||||
@@ -51,8 +51,13 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory,
|
||||
|
||||
/* map the last 128KB of the BIOS in ISA space */
|
||||
isa_bios_size = MIN(flash_size, 128 * KiB);
|
||||
- memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
|
||||
- &error_fatal);
|
||||
+ if (machine_require_guest_memfd(MACHINE(pcms))) {
|
||||
+ memory_region_init_ram_guest_memfd(isa_bios, NULL, "isa-bios",
|
||||
+ isa_bios_size, &error_fatal);
|
||||
+ } else {
|
||||
+ memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
|
||||
+ &error_fatal);
|
||||
+ }
|
||||
memory_region_add_subregion_overlap(rom_memory,
|
||||
0x100000 - isa_bios_size,
|
||||
isa_bios,
|
||||
@@ -65,7 +70,9 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory,
|
||||
((uint8_t*)flash_ptr) + (flash_size - isa_bios_size),
|
||||
isa_bios_size);
|
||||
|
||||
- memory_region_set_readonly(isa_bios, true);
|
||||
+ if (!machine_require_guest_memfd(current_machine)) {
|
||||
+ memory_region_set_readonly(isa_bios, true);
|
||||
+ }
|
||||
}
|
||||
|
||||
static PFlashCFI01 *pc_pflash_create(PCMachineState *pcms,
|
||||
@@ -191,7 +198,7 @@ static void pc_system_flash_map(PCMachineState *pcms,
|
||||
x86_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem,
|
||||
true);
|
||||
} else {
|
||||
- pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem);
|
||||
+ pc_isa_bios_init(pcms, &x86ms->isa_bios, rom_memory, flash_mem);
|
||||
}
|
||||
|
||||
/* Encrypt the pflash boot ROM */
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,58 @@
|
||||
From 4331180aa09e44550ff8de781c618bae5e99bb70 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Tue, 9 Apr 2024 18:07:43 -0500
|
||||
Subject: [PATCH 025/100] hw/i386/sev: Use legacy SEV VM types for older
|
||||
machine types
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [25/91] 8c73cd312736ccb0818b4d3216fd13712f21f3c9 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Newer 9.1 machine types will default to using the KVM_SEV_INIT2 API for
|
||||
creating SEV/SEV-ES going forward. However, this API results in guest
|
||||
measurement changes which are generally not expected for users of these
|
||||
older guest types and can cause disruption if they switch to a newer
|
||||
QEMU/kernel version. Avoid this by continuing to use the older
|
||||
KVM_SEV_INIT/KVM_SEV_ES_INIT APIs for older machine types.
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Message-ID: <20240409230743.962513-4-michael.roth@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit ea7fbd37537b3a598335c21ccb2ea674630fc810)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/pc.c | 1 +
|
||||
target/i386/sev.c | 1 +
|
||||
2 files changed, 2 insertions(+)
|
||||
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index b9fde3cec1..1a34bc4522 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -351,6 +351,7 @@ const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat);
|
||||
GlobalProperty pc_rhel_9_5_compat[] = {
|
||||
/* pc_rhel_9_5_compat from pc_compat_pc_9_0 (backported from 9.1) */
|
||||
{ TYPE_X86_CPU, "guest-phys-bits", "0" },
|
||||
+ { "sev-guest", "legacy-vm-type", "true" },
|
||||
};
|
||||
const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat);
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index f4ee317cb0..d30b68c11e 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -1417,6 +1417,7 @@ sev_guest_instance_init(Object *obj)
|
||||
object_property_add_uint32_ptr(obj, "reduced-phys-bits",
|
||||
&sev->reduced_phys_bits,
|
||||
OBJ_PROP_FLAG_READWRITE);
|
||||
+ object_apply_compat_props(obj);
|
||||
}
|
||||
|
||||
/* sev guest info */
|
||||
--
|
||||
2.39.3
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,133 @@
|
||||
From ebf08d2a822576acfa60fbd5f552d26de1e4c4be Mon Sep 17 00:00:00 2001
|
||||
From: Bernhard Beschow <shentey@gmail.com>
|
||||
Date: Wed, 8 May 2024 19:55:04 +0200
|
||||
Subject: [PATCH 040/100] hw/i386/x86: Don't leak "isa-bios" memory regions
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [40/91] bb595357c6cc2d5a80bf3873853c69553c5feee5 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Fix the leaking in x86_bios_rom_init() and pc_isa_bios_init() by adding an
|
||||
"isa_bios" attribute to X86MachineState.
|
||||
|
||||
Suggested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
|
||||
Message-ID: <20240508175507.22270-4-shentey@gmail.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
(cherry picked from commit 32d3ee87a17fc91e981a23dba94855bff89f5920)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/pc_sysfw.c | 7 +++----
|
||||
hw/i386/x86.c | 9 ++++-----
|
||||
include/hw/i386/x86.h | 7 +++++++
|
||||
3 files changed, 14 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
|
||||
index 59c7a81692..82d37cb376 100644
|
||||
--- a/hw/i386/pc_sysfw.c
|
||||
+++ b/hw/i386/pc_sysfw.c
|
||||
@@ -40,11 +40,10 @@
|
||||
|
||||
#define FLASH_SECTOR_SIZE 4096
|
||||
|
||||
-static void pc_isa_bios_init(MemoryRegion *rom_memory,
|
||||
+static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory,
|
||||
MemoryRegion *flash_mem)
|
||||
{
|
||||
int isa_bios_size;
|
||||
- MemoryRegion *isa_bios;
|
||||
uint64_t flash_size;
|
||||
void *flash_ptr, *isa_bios_ptr;
|
||||
|
||||
@@ -52,7 +51,6 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory,
|
||||
|
||||
/* map the last 128KB of the BIOS in ISA space */
|
||||
isa_bios_size = MIN(flash_size, 128 * KiB);
|
||||
- isa_bios = g_malloc(sizeof(*isa_bios));
|
||||
memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
|
||||
&error_fatal);
|
||||
memory_region_add_subregion_overlap(rom_memory,
|
||||
@@ -136,6 +134,7 @@ void pc_system_flash_cleanup_unused(PCMachineState *pcms)
|
||||
static void pc_system_flash_map(PCMachineState *pcms,
|
||||
MemoryRegion *rom_memory)
|
||||
{
|
||||
+ X86MachineState *x86ms = X86_MACHINE(pcms);
|
||||
hwaddr total_size = 0;
|
||||
int i;
|
||||
BlockBackend *blk;
|
||||
@@ -185,7 +184,7 @@ static void pc_system_flash_map(PCMachineState *pcms,
|
||||
|
||||
if (i == 0) {
|
||||
flash_mem = pflash_cfi01_get_memory(system_flash);
|
||||
- pc_isa_bios_init(rom_memory, flash_mem);
|
||||
+ pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem);
|
||||
|
||||
/* Encrypt the pflash boot ROM */
|
||||
if (sev_enabled()) {
|
||||
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
|
||||
index 6d3c72f124..457e8a34a5 100644
|
||||
--- a/hw/i386/x86.c
|
||||
+++ b/hw/i386/x86.c
|
||||
@@ -1133,7 +1133,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
{
|
||||
const char *bios_name;
|
||||
char *filename;
|
||||
- MemoryRegion *bios, *isa_bios;
|
||||
+ MemoryRegion *bios;
|
||||
int bios_size, isa_bios_size;
|
||||
ssize_t ret;
|
||||
|
||||
@@ -1173,14 +1173,13 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
|
||||
/* map the last 128KB of the BIOS in ISA space */
|
||||
isa_bios_size = MIN(bios_size, 128 * KiB);
|
||||
- isa_bios = g_malloc(sizeof(*isa_bios));
|
||||
- memory_region_init_alias(isa_bios, NULL, "isa-bios", bios,
|
||||
+ memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", bios,
|
||||
bios_size - isa_bios_size, isa_bios_size);
|
||||
memory_region_add_subregion_overlap(rom_memory,
|
||||
0x100000 - isa_bios_size,
|
||||
- isa_bios,
|
||||
+ &x86ms->isa_bios,
|
||||
1);
|
||||
- memory_region_set_readonly(isa_bios, !isapc_ram_fw);
|
||||
+ memory_region_set_readonly(&x86ms->isa_bios, !isapc_ram_fw);
|
||||
|
||||
/* map all the bios at the top of memory */
|
||||
memory_region_add_subregion(rom_memory,
|
||||
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
|
||||
index cb07618d19..a07de79167 100644
|
||||
--- a/include/hw/i386/x86.h
|
||||
+++ b/include/hw/i386/x86.h
|
||||
@@ -18,6 +18,7 @@
|
||||
#define HW_I386_X86_H
|
||||
|
||||
#include "exec/hwaddr.h"
|
||||
+#include "exec/memory.h"
|
||||
|
||||
#include "hw/boards.h"
|
||||
#include "hw/intc/ioapic.h"
|
||||
@@ -52,6 +53,12 @@ struct X86MachineState {
|
||||
GMappedFile *initrd_mapped_file;
|
||||
HotplugHandler *acpi_dev;
|
||||
|
||||
+ /*
|
||||
+ * Map the upper 128 KiB of the BIOS just underneath the 1 MiB address
|
||||
+ * boundary.
|
||||
+ */
|
||||
+ MemoryRegion isa_bios;
|
||||
+
|
||||
/* RAM information (sizes, addresses, configuration): */
|
||||
ram_addr_t below_4g_mem_size, above_4g_mem_size;
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,105 @@
|
||||
From e1f2265b5f6bf5b63bf3808bb540888f3cf8badb Mon Sep 17 00:00:00 2001
|
||||
From: Bernhard Beschow <shentey@gmail.com>
|
||||
Date: Wed, 8 May 2024 19:55:05 +0200
|
||||
Subject: [PATCH 041/100] hw/i386/x86: Don't leak "pc.bios" memory region
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [41/91] a9cd61d8d240134c09c46e244efb89217cadf60c (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Fix the leaking in x86_bios_rom_init() by adding a "bios" attribute to
|
||||
X86MachineState. Note that it is only used in the -bios case.
|
||||
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
|
||||
Message-ID: <20240508175507.22270-5-shentey@gmail.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
(cherry picked from commit 865d95321ffc8d9941e33000b10140550f094556)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/x86.c | 13 ++++++-------
|
||||
include/hw/i386/x86.h | 6 ++++++
|
||||
2 files changed, 12 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
|
||||
index 457e8a34a5..29167de97d 100644
|
||||
--- a/hw/i386/x86.c
|
||||
+++ b/hw/i386/x86.c
|
||||
@@ -1133,7 +1133,6 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
{
|
||||
const char *bios_name;
|
||||
char *filename;
|
||||
- MemoryRegion *bios;
|
||||
int bios_size, isa_bios_size;
|
||||
ssize_t ret;
|
||||
|
||||
@@ -1149,8 +1148,8 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
(bios_size % 65536) != 0) {
|
||||
goto bios_error;
|
||||
}
|
||||
- bios = g_malloc(sizeof(*bios));
|
||||
- memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal);
|
||||
+ memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size,
|
||||
+ &error_fatal);
|
||||
if (sev_enabled()) {
|
||||
/*
|
||||
* The concept of a "reset" simply doesn't exist for
|
||||
@@ -1159,11 +1158,11 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
* the firmware as rom to properly re-initialize on reset.
|
||||
* Just go for a straight file load instead.
|
||||
*/
|
||||
- void *ptr = memory_region_get_ram_ptr(bios);
|
||||
+ void *ptr = memory_region_get_ram_ptr(&x86ms->bios);
|
||||
load_image_size(filename, ptr, bios_size);
|
||||
x86_firmware_configure(ptr, bios_size);
|
||||
} else {
|
||||
- memory_region_set_readonly(bios, !isapc_ram_fw);
|
||||
+ memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw);
|
||||
ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1);
|
||||
if (ret != 0) {
|
||||
goto bios_error;
|
||||
@@ -1173,7 +1172,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
|
||||
/* map the last 128KB of the BIOS in ISA space */
|
||||
isa_bios_size = MIN(bios_size, 128 * KiB);
|
||||
- memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", bios,
|
||||
+ memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", &x86ms->bios,
|
||||
bios_size - isa_bios_size, isa_bios_size);
|
||||
memory_region_add_subregion_overlap(rom_memory,
|
||||
0x100000 - isa_bios_size,
|
||||
@@ -1184,7 +1183,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
/* map all the bios at the top of memory */
|
||||
memory_region_add_subregion(rom_memory,
|
||||
(uint32_t)(-bios_size),
|
||||
- bios);
|
||||
+ &x86ms->bios);
|
||||
return;
|
||||
|
||||
bios_error:
|
||||
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
|
||||
index a07de79167..55c6809ae0 100644
|
||||
--- a/include/hw/i386/x86.h
|
||||
+++ b/include/hw/i386/x86.h
|
||||
@@ -53,6 +53,12 @@ struct X86MachineState {
|
||||
GMappedFile *initrd_mapped_file;
|
||||
HotplugHandler *acpi_dev;
|
||||
|
||||
+ /*
|
||||
+ * Map the whole BIOS just underneath the 4 GiB address boundary. Only used
|
||||
+ * in the ROM (-bios) case.
|
||||
+ */
|
||||
+ MemoryRegion bios;
|
||||
+
|
||||
/*
|
||||
* Map the upper 128 KiB of the BIOS just underneath the 1 MiB address
|
||||
* boundary.
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,69 @@
|
||||
From b9d0c78f04160fbc1eee6cfd94b17f1133a35d83 Mon Sep 17 00:00:00 2001
|
||||
From: Bernhard Beschow <shentey@gmail.com>
|
||||
Date: Tue, 30 Apr 2024 17:06:38 +0200
|
||||
Subject: [PATCH 037/100] hw/i386/x86: Eliminate two if statements in
|
||||
x86_bios_rom_init()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [37/91] 1ef6a13214e85f6ef773f5c894c720f20330912b (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Given that memory_region_set_readonly() is a no-op when the readonlyness is
|
||||
already as requested it is possible to simplify the pattern
|
||||
|
||||
if (condition) {
|
||||
foo(true);
|
||||
}
|
||||
|
||||
to
|
||||
|
||||
foo(condition);
|
||||
|
||||
which is shorter and allows to see the invariant of the code more easily.
|
||||
|
||||
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Message-ID: <20240430150643.111976-2-shentey@gmail.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
(cherry picked from commit 014dbdac8798799d081abc9dff3e4876ca54f49e)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/x86.c | 8 ++------
|
||||
1 file changed, 2 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
|
||||
index 3d5b51e92d..2a4f3ee285 100644
|
||||
--- a/hw/i386/x86.c
|
||||
+++ b/hw/i386/x86.c
|
||||
@@ -1163,9 +1163,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
|
||||
load_image_size(filename, ptr, bios_size);
|
||||
x86_firmware_configure(ptr, bios_size);
|
||||
} else {
|
||||
- if (!isapc_ram_fw) {
|
||||
- memory_region_set_readonly(bios, true);
|
||||
- }
|
||||
+ memory_region_set_readonly(bios, !isapc_ram_fw);
|
||||
ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1);
|
||||
if (ret != 0) {
|
||||
goto bios_error;
|
||||
@@ -1182,9 +1180,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
|
||||
0x100000 - isa_bios_size,
|
||||
isa_bios,
|
||||
1);
|
||||
- if (!isapc_ram_fw) {
|
||||
- memory_region_set_readonly(isa_bios, true);
|
||||
- }
|
||||
+ memory_region_set_readonly(isa_bios, !isapc_ram_fw);
|
||||
|
||||
/* map all the bios at the top of memory */
|
||||
memory_region_add_subregion(rom_memory,
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,98 @@
|
||||
From 1baf67564d4227d6ba98923217a15814c438c32b Mon Sep 17 00:00:00 2001
|
||||
From: Bernhard Beschow <shentey@gmail.com>
|
||||
Date: Wed, 8 May 2024 19:55:06 +0200
|
||||
Subject: [PATCH 042/100] hw/i386/x86: Extract x86_isa_bios_init() from
|
||||
x86_bios_rom_init()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [42/91] 1db417a5995480924f7fd0661a306f2d2bfa0a77 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
The function is inspired by pc_isa_bios_init() and should eventually replace it.
|
||||
Using x86_isa_bios_init() rather than pc_isa_bios_init() fixes pflash commands
|
||||
to work in the isa-bios region.
|
||||
|
||||
While at it convert the magic number 0x100000 (== 1MiB) to increase readability.
|
||||
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
|
||||
Message-ID: <20240508175507.22270-6-shentey@gmail.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
(cherry picked from commit 5c5ffec12c30d2017cbdee6798f54d8fad3f9656)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/x86.c | 25 ++++++++++++++++---------
|
||||
include/hw/i386/x86.h | 2 ++
|
||||
2 files changed, 18 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
|
||||
index 29167de97d..c61f4ebfa6 100644
|
||||
--- a/hw/i386/x86.c
|
||||
+++ b/hw/i386/x86.c
|
||||
@@ -1128,12 +1128,25 @@ void x86_load_linux(X86MachineState *x86ms,
|
||||
nb_option_roms++;
|
||||
}
|
||||
|
||||
+void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory,
|
||||
+ MemoryRegion *bios, bool read_only)
|
||||
+{
|
||||
+ uint64_t bios_size = memory_region_size(bios);
|
||||
+ uint64_t isa_bios_size = MIN(bios_size, 128 * KiB);
|
||||
+
|
||||
+ memory_region_init_alias(isa_bios, NULL, "isa-bios", bios,
|
||||
+ bios_size - isa_bios_size, isa_bios_size);
|
||||
+ memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size,
|
||||
+ isa_bios, 1);
|
||||
+ memory_region_set_readonly(isa_bios, read_only);
|
||||
+}
|
||||
+
|
||||
void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
MemoryRegion *rom_memory, bool isapc_ram_fw)
|
||||
{
|
||||
const char *bios_name;
|
||||
char *filename;
|
||||
- int bios_size, isa_bios_size;
|
||||
+ int bios_size;
|
||||
ssize_t ret;
|
||||
|
||||
/* BIOS load */
|
||||
@@ -1171,14 +1184,8 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
g_free(filename);
|
||||
|
||||
/* map the last 128KB of the BIOS in ISA space */
|
||||
- isa_bios_size = MIN(bios_size, 128 * KiB);
|
||||
- memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", &x86ms->bios,
|
||||
- bios_size - isa_bios_size, isa_bios_size);
|
||||
- memory_region_add_subregion_overlap(rom_memory,
|
||||
- 0x100000 - isa_bios_size,
|
||||
- &x86ms->isa_bios,
|
||||
- 1);
|
||||
- memory_region_set_readonly(&x86ms->isa_bios, !isapc_ram_fw);
|
||||
+ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios,
|
||||
+ !isapc_ram_fw);
|
||||
|
||||
/* map all the bios at the top of memory */
|
||||
memory_region_add_subregion(rom_memory,
|
||||
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
|
||||
index 55c6809ae0..d7b7d3f3ce 100644
|
||||
--- a/include/hw/i386/x86.h
|
||||
+++ b/include/hw/i386/x86.h
|
||||
@@ -129,6 +129,8 @@ void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev,
|
||||
void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev,
|
||||
DeviceState *dev, Error **errp);
|
||||
|
||||
+void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory,
|
||||
+ MemoryRegion *bios, bool read_only);
|
||||
void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
|
||||
MemoryRegion *rom_memory, bool isapc_ram_fw);
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,116 +0,0 @@
|
||||
From 84f378c41832602dcf9bad6167b1f532c7c53e37 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Tue, 21 Nov 2023 15:03:55 +0100
|
||||
Subject: [PATCH 048/101] hw/ppc/Kconfig: Imply VFIO_PCI
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [47/67] c1a40cdab9bf62b16cb428d57a20b3e0eaa6de38 (eauger1/centos-qemu-kvm)
|
||||
|
||||
When the legacy and iommufd backends were introduced, a set of common
|
||||
vfio-pci routines were exported in pci.c for both backends to use :
|
||||
|
||||
vfio_pci_pre_reset
|
||||
vfio_pci_get_pci_hot_reset_info
|
||||
vfio_pci_host_match
|
||||
vfio_pci_post_reset
|
||||
|
||||
This introduced a build failure on PPC when --without-default-devices
|
||||
is use because VFIO is always selected in ppc/Kconfig but VFIO_PCI is
|
||||
not.
|
||||
|
||||
Use an 'imply VFIO_PCI' in ppc/Kconfig and bypass compilation of the
|
||||
VFIO EEH hooks routines defined in hw/ppc/spapr_pci_vfio.c with
|
||||
CONFIG_VFIO_PCI.
|
||||
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
(cherry picked from commit 4278df9d1d2383b738338c857406357660f11e42)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/ppc/Kconfig | 2 +-
|
||||
hw/ppc/spapr_pci_vfio.c | 36 ++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 37 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
|
||||
index 56f0475a8e..44263a58c4 100644
|
||||
--- a/hw/ppc/Kconfig
|
||||
+++ b/hw/ppc/Kconfig
|
||||
@@ -3,11 +3,11 @@ config PSERIES
|
||||
imply PCI_DEVICES
|
||||
imply TEST_DEVICES
|
||||
imply VIRTIO_VGA
|
||||
+ imply VFIO_PCI if LINUX # needed by spapr_pci_vfio.c
|
||||
select NVDIMM
|
||||
select DIMM
|
||||
select PCI
|
||||
select SPAPR_VSCSI
|
||||
- select VFIO if LINUX # needed by spapr_pci_vfio.c
|
||||
select XICS
|
||||
select XIVE
|
||||
select MSI_NONBROKEN
|
||||
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
|
||||
index d1d07bec46..76b2a3487b 100644
|
||||
--- a/hw/ppc/spapr_pci_vfio.c
|
||||
+++ b/hw/ppc/spapr_pci_vfio.c
|
||||
@@ -26,10 +26,12 @@
|
||||
#include "hw/pci/pci_device.h"
|
||||
#include "hw/vfio/vfio-common.h"
|
||||
#include "qemu/error-report.h"
|
||||
+#include CONFIG_DEVICES /* CONFIG_VFIO_PCI */
|
||||
|
||||
/*
|
||||
* Interfaces for IBM EEH (Enhanced Error Handling)
|
||||
*/
|
||||
+#ifdef CONFIG_VFIO_PCI
|
||||
static bool vfio_eeh_container_ok(VFIOContainer *container)
|
||||
{
|
||||
/*
|
||||
@@ -314,3 +316,37 @@ int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
|
||||
|
||||
return RTAS_OUT_SUCCESS;
|
||||
}
|
||||
+
|
||||
+#else
|
||||
+
|
||||
+bool spapr_phb_eeh_available(SpaprPhbState *sphb)
|
||||
+{
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+void spapr_phb_vfio_reset(DeviceState *qdev)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
|
||||
+ unsigned int addr, int option)
|
||||
+{
|
||||
+ return RTAS_OUT_NOT_SUPPORTED;
|
||||
+}
|
||||
+
|
||||
+int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state)
|
||||
+{
|
||||
+ return RTAS_OUT_NOT_SUPPORTED;
|
||||
+}
|
||||
+
|
||||
+int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option)
|
||||
+{
|
||||
+ return RTAS_OUT_NOT_SUPPORTED;
|
||||
+}
|
||||
+
|
||||
+int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
|
||||
+{
|
||||
+ return RTAS_OUT_NOT_SUPPORTED;
|
||||
+}
|
||||
+
|
||||
+#endif /* CONFIG_VFIO_PCI */
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,73 +0,0 @@
|
||||
From 8f27893a37e55a31180bb66cd9eae7199911881b Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Volker=20R=C3=BCmelin?= <vr_qemu@t-online.de>
|
||||
Date: Fri, 29 Dec 2023 21:38:54 +0100
|
||||
Subject: [PATCH 060/101] hw/vfio: fix iteration over global VFIODevice list
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||
RH-Jira: RHEL-19302 RHEL-21057
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||
RH-Commit: [59/67] f926e1233c8c5ad418e8794b1a103371c9dc5eb0 (eauger1/centos-qemu-kvm)
|
||||
|
||||
Commit 3d779abafe ("vfio/common: Introduce a global VFIODevice list")
|
||||
introduced a global VFIODevice list, but forgot to update the list
|
||||
element field name when iterating over the new list. Change the code
|
||||
to use the correct list element field.
|
||||
|
||||
Fixes: 3d779abafe ("vfio/common: Introduce a global VFIODevice list")
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2061
|
||||
Signed-off-by: Volker Rümelin <vr_qemu@t-online.de>
|
||||
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||
(cherry picked from commit 9353b6da430f90e47f352dbf6dc31120c8914da6)
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/vfio/common.c | 8 ++++----
|
||||
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||
index 0d4d8b8416..0b3352f2a9 100644
|
||||
--- a/hw/vfio/common.c
|
||||
+++ b/hw/vfio/common.c
|
||||
@@ -73,7 +73,7 @@ bool vfio_mig_active(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
|
||||
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
|
||||
if (vbasedev->migration_blocker) {
|
||||
return false;
|
||||
}
|
||||
@@ -94,7 +94,7 @@ static bool vfio_multiple_devices_migration_is_supported(void)
|
||||
unsigned int device_num = 0;
|
||||
bool all_support_p2p = true;
|
||||
|
||||
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
|
||||
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
|
||||
if (vbasedev->migration) {
|
||||
device_num++;
|
||||
|
||||
@@ -1366,13 +1366,13 @@ void vfio_reset_handler(void *opaque)
|
||||
{
|
||||
VFIODevice *vbasedev;
|
||||
|
||||
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
|
||||
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
|
||||
if (vbasedev->dev->realized) {
|
||||
vbasedev->ops->vfio_compute_needs_reset(vbasedev);
|
||||
}
|
||||
}
|
||||
|
||||
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
|
||||
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
|
||||
if (vbasedev->dev->realized && vbasedev->needs_reset) {
|
||||
vbasedev->ops->vfio_hot_reset_multi(vbasedev);
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,108 @@
|
||||
From c554f8768a18ceba173aedbd582c1cae43a41e2c Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Tue, 18 Jun 2024 14:19:58 +0200
|
||||
Subject: [PATCH 1/2] hw/virtio: Fix the de-initialization of vhost-user
|
||||
devices
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Thomas Huth <thuth@redhat.com>
|
||||
RH-MergeRequest: 255: hw/virtio: Fix the de-initialization of vhost-user devices
|
||||
RH-Jira: RHEL-40708
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/1] c7815a249ec135993f45934cab1c1f2c038b80ea (thuth/qemu-kvm-cs9)
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-40708
|
||||
|
||||
The unrealize functions of the various vhost-user devices are
|
||||
calling the corresponding vhost_*_set_status() functions with a
|
||||
status of 0 to shut down the device correctly.
|
||||
|
||||
Now these vhost_*_set_status() functions all follow this scheme:
|
||||
|
||||
bool should_start = virtio_device_should_start(vdev, status);
|
||||
|
||||
if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (should_start) {
|
||||
/* ... do the initialization stuff ... */
|
||||
} else {
|
||||
/* ... do the cleanup stuff ... */
|
||||
}
|
||||
|
||||
The problem here is virtio_device_should_start(vdev, 0) currently
|
||||
always returns "true" since it internally only looks at vdev->started
|
||||
instead of looking at the "status" parameter. Thus once the device
|
||||
got started once, virtio_device_should_start() always returns true
|
||||
and thus the vhost_*_set_status() functions return early, without
|
||||
ever doing any clean-up when being called with status == 0. This
|
||||
causes e.g. problems when trying to hot-plug and hot-unplug a vhost
|
||||
user devices multiple times since the de-initialization step is
|
||||
completely skipped during the unplug operation.
|
||||
|
||||
This bug has been introduced in commit 9f6bcfd99f ("hw/virtio: move
|
||||
vm_running check to virtio_device_started") which replaced
|
||||
|
||||
should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
|
||||
|
||||
with
|
||||
|
||||
should_start = virtio_device_started(vdev, status);
|
||||
|
||||
which later got replaced by virtio_device_should_start(). This blocked
|
||||
the possibility to set should_start to false in case the status flag
|
||||
VIRTIO_CONFIG_S_DRIVER_OK was not set.
|
||||
|
||||
Fix it by adjusting the virtio_device_should_start() function to
|
||||
only consider the status flag instead of vdev->started. Since this
|
||||
function is only used in the various vhost_*_set_status() functions
|
||||
for exactly the same purpose, it should be fine to fix it in this
|
||||
central place there without any risk to change the behavior of other
|
||||
code.
|
||||
|
||||
Fixes: 9f6bcfd99f ("hw/virtio: move vm_running check to virtio_device_started")
|
||||
Buglink: https://issues.redhat.com/browse/RHEL-40708
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
Message-Id: <20240618121958.88673-1-thuth@redhat.com>
|
||||
Reviewed-by: Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit d72479b11797c28893e1e3fc565497a9cae5ca16)
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
include/hw/virtio/virtio.h | 8 ++++----
|
||||
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
|
||||
index 7d5ffdc145..2eafad17b8 100644
|
||||
--- a/include/hw/virtio/virtio.h
|
||||
+++ b/include/hw/virtio/virtio.h
|
||||
@@ -470,9 +470,9 @@ static inline bool virtio_device_started(VirtIODevice *vdev, uint8_t status)
|
||||
* @vdev - the VirtIO device
|
||||
* @status - the devices status bits
|
||||
*
|
||||
- * This is similar to virtio_device_started() but also encapsulates a
|
||||
- * check on the VM status which would prevent a device starting
|
||||
- * anyway.
|
||||
+ * This is similar to virtio_device_started() but ignores vdev->started
|
||||
+ * and also encapsulates a check on the VM status which would prevent a
|
||||
+ * device from starting anyway.
|
||||
*/
|
||||
static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status)
|
||||
{
|
||||
@@ -480,7 +480,7 @@ static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status
|
||||
return false;
|
||||
}
|
||||
|
||||
- return virtio_device_started(vdev, status);
|
||||
+ return status & VIRTIO_CONFIG_S_DRIVER_OK;
|
||||
}
|
||||
|
||||
static inline void virtio_set_started(VirtIODevice *vdev, bool started)
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,68 @@
|
||||
From f572a40924c7138072e387111d0f092185972477 Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Thu, 9 May 2024 19:00:39 +0200
|
||||
Subject: [PATCH 044/100] i386: correctly select code in hw/i386 that depends
|
||||
on other components
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [44/91] 1327a5eb2b91edacf56cc4e93255cad456abbbeb (bonzini/rhel-qemu-kvm)
|
||||
|
||||
fw_cfg.c and vapic.c are currently included unconditionally but
|
||||
depend on other components. vapic.c depends on the local APIC,
|
||||
while fw_cfg.c includes a piece of AML builder code that depends
|
||||
on CONFIG_ACPI.
|
||||
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
|
||||
Message-ID: <20240509170044.190795-9-pbonzini@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 7974e51342775c87f6e759a8c525db1045ddfa24)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/fw_cfg.c | 2 ++
|
||||
hw/i386/meson.build | 2 +-
|
||||
2 files changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c
|
||||
index 283c3f4c16..7f97d40616 100644
|
||||
--- a/hw/i386/fw_cfg.c
|
||||
+++ b/hw/i386/fw_cfg.c
|
||||
@@ -204,6 +204,7 @@ void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg)
|
||||
fw_cfg_add_file(fw_cfg, "etc/msr_feature_control", val, sizeof(*val));
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_ACPI
|
||||
void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg)
|
||||
{
|
||||
/*
|
||||
@@ -230,3 +231,4 @@ void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg)
|
||||
aml_append(dev, aml_name_decl("_CRS", crs));
|
||||
aml_append(scope, dev);
|
||||
}
|
||||
+#endif
|
||||
diff --git a/hw/i386/meson.build b/hw/i386/meson.build
|
||||
index d8b70ef3e9..d9da676038 100644
|
||||
--- a/hw/i386/meson.build
|
||||
+++ b/hw/i386/meson.build
|
||||
@@ -1,12 +1,12 @@
|
||||
i386_ss = ss.source_set()
|
||||
i386_ss.add(files(
|
||||
'fw_cfg.c',
|
||||
- 'vapic.c',
|
||||
'e820_memory_layout.c',
|
||||
'multiboot.c',
|
||||
'x86.c',
|
||||
))
|
||||
|
||||
+i386_ss.add(when: 'CONFIG_APIC', if_true: files('vapic.c'))
|
||||
i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'),
|
||||
if_false: files('x86-iommu-stub.c'))
|
||||
i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'),
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,40 @@
|
||||
From 127f3c60668e1bd08ec00856a317cb841adf0440 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:23 -0500
|
||||
Subject: [PATCH 063/100] i386/cpu: Set SEV-SNP CPUID bit when SNP enabled
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [63/91] 0f834a6897c5cdc0e29a5b1862e621f8ce309657 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
SNP guests will rely on this bit to determine certain feature support.
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-12-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 7831221941cccbde922412c1550ed8b4bce7c361)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/cpu.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
|
||||
index 489c853b42..13737cd703 100644
|
||||
--- a/target/i386/cpu.c
|
||||
+++ b/target/i386/cpu.c
|
||||
@@ -6822,6 +6822,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
|
||||
if (sev_enabled()) {
|
||||
*eax = 0x2;
|
||||
*eax |= sev_es_enabled() ? 0x8 : 0;
|
||||
+ *eax |= sev_snp_enabled() ? 0x10 : 0;
|
||||
*ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */
|
||||
*ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,145 @@
|
||||
From 14aa42bbacde75b2ce9a59d1267f73d613026461 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:42 -0500
|
||||
Subject: [PATCH 076/100] i386/kvm: Add KVM_EXIT_HYPERCALL handling for
|
||||
KVM_HC_MAP_GPA_RANGE
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [76/91] 3e1201c330dc826af1ec4650974d47053270eb16 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
KVM_HC_MAP_GPA_RANGE will be used to send requests to userspace for
|
||||
private/shared memory attribute updates requested by the guest.
|
||||
Implement handling for that use-case along with some basic
|
||||
infrastructure for enabling specific hypercall events.
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-31-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 47e76d03b155e43beca550251a6eb7ea926c059f)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/kvm/kvm.c | 55 ++++++++++++++++++++++++++++++++++++
|
||||
target/i386/kvm/kvm_i386.h | 1 +
|
||||
target/i386/kvm/trace-events | 1 +
|
||||
3 files changed, 57 insertions(+)
|
||||
|
||||
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
|
||||
index 75e75d9772..2935e3931a 100644
|
||||
--- a/target/i386/kvm/kvm.c
|
||||
+++ b/target/i386/kvm/kvm.c
|
||||
@@ -21,6 +21,7 @@
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include <linux/kvm.h>
|
||||
+#include <linux/kvm_para.h>
|
||||
#include "standard-headers/asm-x86/kvm_para.h"
|
||||
#include "hw/xen/interface/arch-x86/cpuid.h"
|
||||
|
||||
@@ -208,6 +209,13 @@ int kvm_get_vm_type(MachineState *ms)
|
||||
return kvm_type;
|
||||
}
|
||||
|
||||
+bool kvm_enable_hypercall(uint64_t enable_mask)
|
||||
+{
|
||||
+ KVMState *s = KVM_STATE(current_accel());
|
||||
+
|
||||
+ return !kvm_vm_enable_cap(s, KVM_CAP_EXIT_HYPERCALL, 0, enable_mask);
|
||||
+}
|
||||
+
|
||||
bool kvm_has_smm(void)
|
||||
{
|
||||
return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM);
|
||||
@@ -5325,6 +5333,50 @@ static bool host_supports_vmx(void)
|
||||
return ecx & CPUID_EXT_VMX;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Currently the handling here only supports use of KVM_HC_MAP_GPA_RANGE
|
||||
+ * to service guest-initiated memory attribute update requests so that
|
||||
+ * KVM_SET_MEMORY_ATTRIBUTES can update whether or not a page should be
|
||||
+ * backed by the private memory pool provided by guest_memfd, and as such
|
||||
+ * is only applicable to guest_memfd-backed guests (e.g. SNP/TDX).
|
||||
+ *
|
||||
+ * Other other use-cases for KVM_HC_MAP_GPA_RANGE, such as for SEV live
|
||||
+ * migration, are not implemented here currently.
|
||||
+ *
|
||||
+ * For the guest_memfd use-case, these exits will generally be synthesized
|
||||
+ * by KVM based on platform-specific hypercalls, like GHCB requests in the
|
||||
+ * case of SEV-SNP, and not issued directly within the guest though the
|
||||
+ * KVM_HC_MAP_GPA_RANGE hypercall. So in this case, KVM_HC_MAP_GPA_RANGE is
|
||||
+ * not actually advertised to guests via the KVM CPUID feature bit, as
|
||||
+ * opposed to SEV live migration where it would be. Since it is unlikely the
|
||||
+ * SEV live migration use-case would be useful for guest-memfd backed guests,
|
||||
+ * because private/shared page tracking is already provided through other
|
||||
+ * means, these 2 use-cases should be treated as being mutually-exclusive.
|
||||
+ */
|
||||
+static int kvm_handle_hc_map_gpa_range(struct kvm_run *run)
|
||||
+{
|
||||
+ uint64_t gpa, size, attributes;
|
||||
+
|
||||
+ if (!machine_require_guest_memfd(current_machine))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ gpa = run->hypercall.args[0];
|
||||
+ size = run->hypercall.args[1] * TARGET_PAGE_SIZE;
|
||||
+ attributes = run->hypercall.args[2];
|
||||
+
|
||||
+ trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags);
|
||||
+
|
||||
+ return kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED);
|
||||
+}
|
||||
+
|
||||
+static int kvm_handle_hypercall(struct kvm_run *run)
|
||||
+{
|
||||
+ if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE)
|
||||
+ return kvm_handle_hc_map_gpa_range(run);
|
||||
+
|
||||
+ return -EINVAL;
|
||||
+}
|
||||
+
|
||||
#define VMX_INVALID_GUEST_STATE 0x80000021
|
||||
|
||||
int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
|
||||
@@ -5420,6 +5472,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
|
||||
ret = kvm_xen_handle_exit(cpu, &run->xen);
|
||||
break;
|
||||
#endif
|
||||
+ case KVM_EXIT_HYPERCALL:
|
||||
+ ret = kvm_handle_hypercall(run);
|
||||
+ break;
|
||||
default:
|
||||
fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
|
||||
ret = -1;
|
||||
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
|
||||
index 6b44844d95..34fc60774b 100644
|
||||
--- a/target/i386/kvm/kvm_i386.h
|
||||
+++ b/target/i386/kvm/kvm_i386.h
|
||||
@@ -33,6 +33,7 @@
|
||||
bool kvm_has_smm(void);
|
||||
bool kvm_enable_x2apic(void);
|
||||
bool kvm_hv_vpindex_settable(void);
|
||||
+bool kvm_enable_hypercall(uint64_t enable_mask);
|
||||
|
||||
bool kvm_enable_sgx_provisioning(KVMState *s);
|
||||
bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
|
||||
diff --git a/target/i386/kvm/trace-events b/target/i386/kvm/trace-events
|
||||
index b365a8e8e2..74a6234ff7 100644
|
||||
--- a/target/i386/kvm/trace-events
|
||||
+++ b/target/i386/kvm/trace-events
|
||||
@@ -5,6 +5,7 @@ kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %"
|
||||
kvm_x86_add_msi_route(int virq) "Adding route entry for virq %d"
|
||||
kvm_x86_remove_msi_route(int virq) "Removing route entry for virq %d"
|
||||
kvm_x86_update_msi_routes(int num) "Updated %d MSI routes"
|
||||
+kvm_hc_map_gpa_range(uint64_t gpa, uint64_t size, uint64_t attributes, uint64_t flags) "gpa 0x%" PRIx64 " size 0x%" PRIx64 " attributes 0x%" PRIx64 " flags 0x%" PRIx64
|
||||
|
||||
# xen-emu.c
|
||||
kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIx64
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,536 @@
|
||||
From 5ead79f45e8e90b7a04586c89e70cb9d0b66b730 Mon Sep 17 00:00:00 2001
|
||||
From: Sean Christopherson <sean.j.christopherson@intel.com>
|
||||
Date: Thu, 29 Feb 2024 01:36:43 -0500
|
||||
Subject: [PATCH 004/100] i386/kvm: Move architectural CPUID leaf generation to
|
||||
separate helper
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [4/91] 06ecdbcf05ad3d658273980b114f02477d0b0475 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Move the architectural (for lack of a better term) CPUID leaf generation
|
||||
to a separate helper so that the generation code can be reused by TDX,
|
||||
which needs to generate a canonical VM-scoped configuration.
|
||||
|
||||
For now this is just a cleanup, so keep the function static.
|
||||
|
||||
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
|
||||
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Message-ID: <20240229063726.610065-23-xiaoyao.li@intel.com>
|
||||
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit a5acf4f26c208a05d05ef1bde65553ce2ab5e5d0)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/kvm/kvm.c | 417 +++++++++++++++++++++---------------------
|
||||
1 file changed, 211 insertions(+), 206 deletions(-)
|
||||
|
||||
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
|
||||
index 739f33db47..5f30b649a0 100644
|
||||
--- a/target/i386/kvm/kvm.c
|
||||
+++ b/target/i386/kvm/kvm.c
|
||||
@@ -1706,195 +1706,22 @@ static void kvm_init_nested_state(CPUX86State *env)
|
||||
}
|
||||
}
|
||||
|
||||
-int kvm_arch_init_vcpu(CPUState *cs)
|
||||
+static uint32_t kvm_x86_build_cpuid(CPUX86State *env,
|
||||
+ struct kvm_cpuid_entry2 *entries,
|
||||
+ uint32_t cpuid_i)
|
||||
{
|
||||
- struct {
|
||||
- struct kvm_cpuid2 cpuid;
|
||||
- struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
|
||||
- } cpuid_data;
|
||||
- /*
|
||||
- * The kernel defines these structs with padding fields so there
|
||||
- * should be no extra padding in our cpuid_data struct.
|
||||
- */
|
||||
- QEMU_BUILD_BUG_ON(sizeof(cpuid_data) !=
|
||||
- sizeof(struct kvm_cpuid2) +
|
||||
- sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
|
||||
-
|
||||
- X86CPU *cpu = X86_CPU(cs);
|
||||
- CPUX86State *env = &cpu->env;
|
||||
- uint32_t limit, i, j, cpuid_i;
|
||||
+ uint32_t limit, i, j;
|
||||
uint32_t unused;
|
||||
struct kvm_cpuid_entry2 *c;
|
||||
- uint32_t signature[3];
|
||||
- int kvm_base = KVM_CPUID_SIGNATURE;
|
||||
- int max_nested_state_len;
|
||||
- int r;
|
||||
- Error *local_err = NULL;
|
||||
-
|
||||
- memset(&cpuid_data, 0, sizeof(cpuid_data));
|
||||
-
|
||||
- cpuid_i = 0;
|
||||
-
|
||||
- has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2);
|
||||
-
|
||||
- r = kvm_arch_set_tsc_khz(cs);
|
||||
- if (r < 0) {
|
||||
- return r;
|
||||
- }
|
||||
-
|
||||
- /* vcpu's TSC frequency is either specified by user, or following
|
||||
- * the value used by KVM if the former is not present. In the
|
||||
- * latter case, we query it from KVM and record in env->tsc_khz,
|
||||
- * so that vcpu's TSC frequency can be migrated later via this field.
|
||||
- */
|
||||
- if (!env->tsc_khz) {
|
||||
- r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ?
|
||||
- kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) :
|
||||
- -ENOTSUP;
|
||||
- if (r > 0) {
|
||||
- env->tsc_khz = r;
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY;
|
||||
-
|
||||
- /*
|
||||
- * kvm_hyperv_expand_features() is called here for the second time in case
|
||||
- * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle
|
||||
- * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to
|
||||
- * check which Hyper-V enlightenments are supported and which are not, we
|
||||
- * can still proceed and check/expand Hyper-V enlightenments here so legacy
|
||||
- * behavior is preserved.
|
||||
- */
|
||||
- if (!kvm_hyperv_expand_features(cpu, &local_err)) {
|
||||
- error_report_err(local_err);
|
||||
- return -ENOSYS;
|
||||
- }
|
||||
-
|
||||
- if (hyperv_enabled(cpu)) {
|
||||
- r = hyperv_init_vcpu(cpu);
|
||||
- if (r) {
|
||||
- return r;
|
||||
- }
|
||||
-
|
||||
- cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries);
|
||||
- kvm_base = KVM_CPUID_SIGNATURE_NEXT;
|
||||
- has_msr_hv_hypercall = true;
|
||||
- }
|
||||
-
|
||||
- if (cs->kvm_state->xen_version) {
|
||||
-#ifdef CONFIG_XEN_EMU
|
||||
- struct kvm_cpuid_entry2 *xen_max_leaf;
|
||||
-
|
||||
- memcpy(signature, "XenVMMXenVMM", 12);
|
||||
-
|
||||
- xen_max_leaf = c = &cpuid_data.entries[cpuid_i++];
|
||||
- c->function = kvm_base + XEN_CPUID_SIGNATURE;
|
||||
- c->eax = kvm_base + XEN_CPUID_TIME;
|
||||
- c->ebx = signature[0];
|
||||
- c->ecx = signature[1];
|
||||
- c->edx = signature[2];
|
||||
-
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
- c->function = kvm_base + XEN_CPUID_VENDOR;
|
||||
- c->eax = cs->kvm_state->xen_version;
|
||||
- c->ebx = 0;
|
||||
- c->ecx = 0;
|
||||
- c->edx = 0;
|
||||
-
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
- c->function = kvm_base + XEN_CPUID_HVM_MSR;
|
||||
- /* Number of hypercall-transfer pages */
|
||||
- c->eax = 1;
|
||||
- /* Hypercall MSR base address */
|
||||
- if (hyperv_enabled(cpu)) {
|
||||
- c->ebx = XEN_HYPERCALL_MSR_HYPERV;
|
||||
- kvm_xen_init(cs->kvm_state, c->ebx);
|
||||
- } else {
|
||||
- c->ebx = XEN_HYPERCALL_MSR;
|
||||
- }
|
||||
- c->ecx = 0;
|
||||
- c->edx = 0;
|
||||
-
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
- c->function = kvm_base + XEN_CPUID_TIME;
|
||||
- c->eax = ((!!tsc_is_stable_and_known(env) << 1) |
|
||||
- (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2));
|
||||
- /* default=0 (emulate if necessary) */
|
||||
- c->ebx = 0;
|
||||
- /* guest tsc frequency */
|
||||
- c->ecx = env->user_tsc_khz;
|
||||
- /* guest tsc incarnation (migration count) */
|
||||
- c->edx = 0;
|
||||
-
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
- c->function = kvm_base + XEN_CPUID_HVM;
|
||||
- xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM;
|
||||
- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) {
|
||||
- c->function = kvm_base + XEN_CPUID_HVM;
|
||||
-
|
||||
- if (cpu->xen_vapic) {
|
||||
- c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT;
|
||||
- c->eax |= XEN_HVM_CPUID_X2APIC_VIRT;
|
||||
- }
|
||||
-
|
||||
- c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS;
|
||||
-
|
||||
- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) {
|
||||
- c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT;
|
||||
- c->ebx = cs->cpu_index;
|
||||
- }
|
||||
-
|
||||
- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) {
|
||||
- c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR;
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- r = kvm_xen_init_vcpu(cs);
|
||||
- if (r) {
|
||||
- return r;
|
||||
- }
|
||||
-
|
||||
- kvm_base += 0x100;
|
||||
-#else /* CONFIG_XEN_EMU */
|
||||
- /* This should never happen as kvm_arch_init() would have died first. */
|
||||
- fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n");
|
||||
- abort();
|
||||
-#endif
|
||||
- } else if (cpu->expose_kvm) {
|
||||
- memcpy(signature, "KVMKVMKVM\0\0\0", 12);
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
- c->function = KVM_CPUID_SIGNATURE | kvm_base;
|
||||
- c->eax = KVM_CPUID_FEATURES | kvm_base;
|
||||
- c->ebx = signature[0];
|
||||
- c->ecx = signature[1];
|
||||
- c->edx = signature[2];
|
||||
-
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
- c->function = KVM_CPUID_FEATURES | kvm_base;
|
||||
- c->eax = env->features[FEAT_KVM];
|
||||
- c->edx = env->features[FEAT_KVM_HINTS];
|
||||
- }
|
||||
|
||||
cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
|
||||
|
||||
- if (cpu->kvm_pv_enforce_cpuid) {
|
||||
- r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1);
|
||||
- if (r < 0) {
|
||||
- fprintf(stderr,
|
||||
- "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s",
|
||||
- strerror(-r));
|
||||
- abort();
|
||||
- }
|
||||
- }
|
||||
-
|
||||
for (i = 0; i <= limit; i++) {
|
||||
+ j = 0;
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
- fprintf(stderr, "unsupported level value: 0x%x\n", limit);
|
||||
- abort();
|
||||
+ goto full;
|
||||
}
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
-
|
||||
+ c = &entries[cpuid_i++];
|
||||
switch (i) {
|
||||
case 2: {
|
||||
/* Keep reading function 2 till all the input is received */
|
||||
@@ -1908,11 +1735,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
|
||||
for (j = 1; j < times; ++j) {
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
- fprintf(stderr, "cpuid_data is full, no space for "
|
||||
- "cpuid(eax:2):eax & 0xf = 0x%x\n", times);
|
||||
- abort();
|
||||
+ goto full;
|
||||
}
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c = &entries[cpuid_i++];
|
||||
c->function = i;
|
||||
c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
|
||||
cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
@@ -1951,11 +1776,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
continue;
|
||||
}
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
- fprintf(stderr, "cpuid_data is full, no space for "
|
||||
- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
|
||||
- abort();
|
||||
+ goto full;
|
||||
}
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c = &entries[cpuid_i++];
|
||||
}
|
||||
break;
|
||||
case 0x12:
|
||||
@@ -1970,11 +1793,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
}
|
||||
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
- fprintf(stderr, "cpuid_data is full, no space for "
|
||||
- "cpuid(eax:0x12,ecx:0x%x)\n", j);
|
||||
- abort();
|
||||
+ goto full;
|
||||
}
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c = &entries[cpuid_i++];
|
||||
}
|
||||
break;
|
||||
case 0x7:
|
||||
@@ -1991,11 +1812,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
|
||||
for (j = 1; j <= times; ++j) {
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
- fprintf(stderr, "cpuid_data is full, no space for "
|
||||
- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
|
||||
- abort();
|
||||
+ goto full;
|
||||
}
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c = &entries[cpuid_i++];
|
||||
c->function = i;
|
||||
c->index = j;
|
||||
c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
@@ -2048,11 +1867,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
|
||||
|
||||
for (i = 0x80000000; i <= limit; i++) {
|
||||
+ j = 0;
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
- fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit);
|
||||
- abort();
|
||||
+ goto full;
|
||||
}
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c = &entries[cpuid_i++];
|
||||
|
||||
switch (i) {
|
||||
case 0x8000001d:
|
||||
@@ -2067,11 +1886,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
break;
|
||||
}
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
- fprintf(stderr, "cpuid_data is full, no space for "
|
||||
- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
|
||||
- abort();
|
||||
+ goto full;
|
||||
}
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c = &entries[cpuid_i++];
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@@ -2094,11 +1911,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused);
|
||||
|
||||
for (i = 0xC0000000; i <= limit; i++) {
|
||||
+ j = 0;
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
- fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit);
|
||||
- abort();
|
||||
+ goto full;
|
||||
}
|
||||
- c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c = &entries[cpuid_i++];
|
||||
|
||||
c->function = i;
|
||||
c->flags = 0;
|
||||
@@ -2106,6 +1923,194 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
}
|
||||
}
|
||||
|
||||
+ return cpuid_i;
|
||||
+
|
||||
+full:
|
||||
+ fprintf(stderr, "cpuid_data is full, no space for "
|
||||
+ "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
|
||||
+ abort();
|
||||
+}
|
||||
+
|
||||
+int kvm_arch_init_vcpu(CPUState *cs)
|
||||
+{
|
||||
+ struct {
|
||||
+ struct kvm_cpuid2 cpuid;
|
||||
+ struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
|
||||
+ } cpuid_data;
|
||||
+ /*
|
||||
+ * The kernel defines these structs with padding fields so there
|
||||
+ * should be no extra padding in our cpuid_data struct.
|
||||
+ */
|
||||
+ QEMU_BUILD_BUG_ON(sizeof(cpuid_data) !=
|
||||
+ sizeof(struct kvm_cpuid2) +
|
||||
+ sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
|
||||
+
|
||||
+ X86CPU *cpu = X86_CPU(cs);
|
||||
+ CPUX86State *env = &cpu->env;
|
||||
+ uint32_t cpuid_i;
|
||||
+ struct kvm_cpuid_entry2 *c;
|
||||
+ uint32_t signature[3];
|
||||
+ int kvm_base = KVM_CPUID_SIGNATURE;
|
||||
+ int max_nested_state_len;
|
||||
+ int r;
|
||||
+ Error *local_err = NULL;
|
||||
+
|
||||
+ memset(&cpuid_data, 0, sizeof(cpuid_data));
|
||||
+
|
||||
+ cpuid_i = 0;
|
||||
+
|
||||
+ has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2);
|
||||
+
|
||||
+ r = kvm_arch_set_tsc_khz(cs);
|
||||
+ if (r < 0) {
|
||||
+ return r;
|
||||
+ }
|
||||
+
|
||||
+ /* vcpu's TSC frequency is either specified by user, or following
|
||||
+ * the value used by KVM if the former is not present. In the
|
||||
+ * latter case, we query it from KVM and record in env->tsc_khz,
|
||||
+ * so that vcpu's TSC frequency can be migrated later via this field.
|
||||
+ */
|
||||
+ if (!env->tsc_khz) {
|
||||
+ r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ?
|
||||
+ kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) :
|
||||
+ -ENOTSUP;
|
||||
+ if (r > 0) {
|
||||
+ env->tsc_khz = r;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY;
|
||||
+
|
||||
+ /*
|
||||
+ * kvm_hyperv_expand_features() is called here for the second time in case
|
||||
+ * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle
|
||||
+ * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to
|
||||
+ * check which Hyper-V enlightenments are supported and which are not, we
|
||||
+ * can still proceed and check/expand Hyper-V enlightenments here so legacy
|
||||
+ * behavior is preserved.
|
||||
+ */
|
||||
+ if (!kvm_hyperv_expand_features(cpu, &local_err)) {
|
||||
+ error_report_err(local_err);
|
||||
+ return -ENOSYS;
|
||||
+ }
|
||||
+
|
||||
+ if (hyperv_enabled(cpu)) {
|
||||
+ r = hyperv_init_vcpu(cpu);
|
||||
+ if (r) {
|
||||
+ return r;
|
||||
+ }
|
||||
+
|
||||
+ cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries);
|
||||
+ kvm_base = KVM_CPUID_SIGNATURE_NEXT;
|
||||
+ has_msr_hv_hypercall = true;
|
||||
+ }
|
||||
+
|
||||
+ if (cs->kvm_state->xen_version) {
|
||||
+#ifdef CONFIG_XEN_EMU
|
||||
+ struct kvm_cpuid_entry2 *xen_max_leaf;
|
||||
+
|
||||
+ memcpy(signature, "XenVMMXenVMM", 12);
|
||||
+
|
||||
+ xen_max_leaf = c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c->function = kvm_base + XEN_CPUID_SIGNATURE;
|
||||
+ c->eax = kvm_base + XEN_CPUID_TIME;
|
||||
+ c->ebx = signature[0];
|
||||
+ c->ecx = signature[1];
|
||||
+ c->edx = signature[2];
|
||||
+
|
||||
+ c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c->function = kvm_base + XEN_CPUID_VENDOR;
|
||||
+ c->eax = cs->kvm_state->xen_version;
|
||||
+ c->ebx = 0;
|
||||
+ c->ecx = 0;
|
||||
+ c->edx = 0;
|
||||
+
|
||||
+ c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c->function = kvm_base + XEN_CPUID_HVM_MSR;
|
||||
+ /* Number of hypercall-transfer pages */
|
||||
+ c->eax = 1;
|
||||
+ /* Hypercall MSR base address */
|
||||
+ if (hyperv_enabled(cpu)) {
|
||||
+ c->ebx = XEN_HYPERCALL_MSR_HYPERV;
|
||||
+ kvm_xen_init(cs->kvm_state, c->ebx);
|
||||
+ } else {
|
||||
+ c->ebx = XEN_HYPERCALL_MSR;
|
||||
+ }
|
||||
+ c->ecx = 0;
|
||||
+ c->edx = 0;
|
||||
+
|
||||
+ c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c->function = kvm_base + XEN_CPUID_TIME;
|
||||
+ c->eax = ((!!tsc_is_stable_and_known(env) << 1) |
|
||||
+ (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2));
|
||||
+ /* default=0 (emulate if necessary) */
|
||||
+ c->ebx = 0;
|
||||
+ /* guest tsc frequency */
|
||||
+ c->ecx = env->user_tsc_khz;
|
||||
+ /* guest tsc incarnation (migration count) */
|
||||
+ c->edx = 0;
|
||||
+
|
||||
+ c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c->function = kvm_base + XEN_CPUID_HVM;
|
||||
+ xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM;
|
||||
+ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) {
|
||||
+ c->function = kvm_base + XEN_CPUID_HVM;
|
||||
+
|
||||
+ if (cpu->xen_vapic) {
|
||||
+ c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT;
|
||||
+ c->eax |= XEN_HVM_CPUID_X2APIC_VIRT;
|
||||
+ }
|
||||
+
|
||||
+ c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS;
|
||||
+
|
||||
+ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) {
|
||||
+ c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT;
|
||||
+ c->ebx = cs->cpu_index;
|
||||
+ }
|
||||
+
|
||||
+ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) {
|
||||
+ c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ r = kvm_xen_init_vcpu(cs);
|
||||
+ if (r) {
|
||||
+ return r;
|
||||
+ }
|
||||
+
|
||||
+ kvm_base += 0x100;
|
||||
+#else /* CONFIG_XEN_EMU */
|
||||
+ /* This should never happen as kvm_arch_init() would have died first. */
|
||||
+ fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n");
|
||||
+ abort();
|
||||
+#endif
|
||||
+ } else if (cpu->expose_kvm) {
|
||||
+ memcpy(signature, "KVMKVMKVM\0\0\0", 12);
|
||||
+ c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c->function = KVM_CPUID_SIGNATURE | kvm_base;
|
||||
+ c->eax = KVM_CPUID_FEATURES | kvm_base;
|
||||
+ c->ebx = signature[0];
|
||||
+ c->ecx = signature[1];
|
||||
+ c->edx = signature[2];
|
||||
+
|
||||
+ c = &cpuid_data.entries[cpuid_i++];
|
||||
+ c->function = KVM_CPUID_FEATURES | kvm_base;
|
||||
+ c->eax = env->features[FEAT_KVM];
|
||||
+ c->edx = env->features[FEAT_KVM_HINTS];
|
||||
+ }
|
||||
+
|
||||
+ if (cpu->kvm_pv_enforce_cpuid) {
|
||||
+ r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1);
|
||||
+ if (r < 0) {
|
||||
+ fprintf(stderr,
|
||||
+ "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s",
|
||||
+ strerror(-r));
|
||||
+ abort();
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ cpuid_i = kvm_x86_build_cpuid(env, cpuid_data.entries, cpuid_i);
|
||||
cpuid_data.cpuid.nent = cpuid_i;
|
||||
|
||||
if (((env->cpuid_version >> 8)&0xF) >= 6
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,91 @@
|
||||
From 03e275023b482ac79b4f92ca4ceef6de3caa634f Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Thu, 9 May 2024 19:00:40 +0200
|
||||
Subject: [PATCH 045/100] i386: pc: remove unnecessary MachineClass overrides
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [45/91] c03d5b57014d0d02f6ce0cdfb19a34996d100dea (bonzini/rhel-qemu-kvm)
|
||||
|
||||
There is no need to override these fields of MachineClass because they are
|
||||
already set to the right value in the superclass.
|
||||
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
|
||||
Message-ID: <20240509170044.190795-10-pbonzini@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit b348fdcdac9f9fc70be9ae56c54e41765e9aae24)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
hw/i386/pc.c | 3 ---
|
||||
hw/i386/x86.c | 6 +++---
|
||||
include/hw/i386/x86.h | 4 ----
|
||||
3 files changed, 3 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index 660a59c63b..0aca0cc79e 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -1979,9 +1979,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
|
||||
mc->async_pf_vmexit_disable = false;
|
||||
mc->get_hotplug_handler = pc_get_hotplug_handler;
|
||||
mc->hotplug_allowed = pc_hotplug_allowed;
|
||||
- mc->cpu_index_to_instance_props = x86_cpu_index_to_props;
|
||||
- mc->get_default_cpu_node_id = x86_get_default_cpu_node_id;
|
||||
- mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids;
|
||||
mc->auto_enable_numa_with_memhp = true;
|
||||
mc->auto_enable_numa_with_memdev = true;
|
||||
mc->has_hotpluggable_cpus = true;
|
||||
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
|
||||
index c61f4ebfa6..fcef652c1e 100644
|
||||
--- a/hw/i386/x86.c
|
||||
+++ b/hw/i386/x86.c
|
||||
@@ -443,7 +443,7 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
|
||||
numa_cpu_pre_plug(cpu_slot, dev, errp);
|
||||
}
|
||||
|
||||
-CpuInstanceProperties
|
||||
+static CpuInstanceProperties
|
||||
x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
|
||||
{
|
||||
MachineClass *mc = MACHINE_GET_CLASS(ms);
|
||||
@@ -453,7 +453,7 @@ x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
|
||||
return possible_cpus->cpus[cpu_index].props;
|
||||
}
|
||||
|
||||
-int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx)
|
||||
+static int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx)
|
||||
{
|
||||
X86CPUTopoIDs topo_ids;
|
||||
X86MachineState *x86ms = X86_MACHINE(ms);
|
||||
@@ -467,7 +467,7 @@ int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx)
|
||||
return topo_ids.pkg_id % ms->numa_state->num_nodes;
|
||||
}
|
||||
|
||||
-const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms)
|
||||
+static const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms)
|
||||
{
|
||||
X86MachineState *x86ms = X86_MACHINE(ms);
|
||||
unsigned int max_cpus = ms->smp.max_cpus;
|
||||
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
|
||||
index d7b7d3f3ce..c2062db13f 100644
|
||||
--- a/include/hw/i386/x86.h
|
||||
+++ b/include/hw/i386/x86.h
|
||||
@@ -114,10 +114,6 @@ uint32_t x86_cpu_apic_id_from_index(X86MachineState *pcms,
|
||||
|
||||
void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, Error **errp);
|
||||
void x86_cpus_init(X86MachineState *pcms, int default_cpu_version);
|
||||
-CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms,
|
||||
- unsigned cpu_index);
|
||||
-int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx);
|
||||
-const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms);
|
||||
CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx);
|
||||
void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count);
|
||||
void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,116 @@
|
||||
From 652793962000d6906e219ceae36348a476b78c28 Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Fri, 31 May 2024 12:44:44 +0200
|
||||
Subject: [PATCH 065/100] i386/sev: Add a class method to determine KVM VM type
|
||||
for SNP guests
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [65/91] c6cbeac0a6f691138df212b80efaa9b1143fdaa8 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
SEV guests can use either KVM_X86_DEFAULT_VM, KVM_X86_SEV_VM,
|
||||
or KVM_X86_SEV_ES_VM depending on the configuration and what
|
||||
the host kernel supports. SNP guests on the other hand can only
|
||||
ever use KVM_X86_SNP_VM, so split determination of VM type out
|
||||
into a separate class method that can be set accordingly for
|
||||
sev-guest vs. sev-snp-guest objects and add handling for SNP.
|
||||
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-14-pankaj.gupta@amd.com>
|
||||
[Remove unnecessary function pointer declaration. - Paolo]
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit a808132f6d8e855bd83a400570ec91d2e00bebe3)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/kvm/kvm.c | 1 +
|
||||
target/i386/sev.c | 15 ++++++++++++---
|
||||
2 files changed, 13 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
|
||||
index 408568d053..75e75d9772 100644
|
||||
--- a/target/i386/kvm/kvm.c
|
||||
+++ b/target/i386/kvm/kvm.c
|
||||
@@ -166,6 +166,7 @@ static const char *vm_type_name[] = {
|
||||
[KVM_X86_DEFAULT_VM] = "default",
|
||||
[KVM_X86_SEV_VM] = "SEV",
|
||||
[KVM_X86_SEV_ES_VM] = "SEV-ES",
|
||||
+ [KVM_X86_SNP_VM] = "SEV-SNP",
|
||||
};
|
||||
|
||||
bool kvm_is_vm_type_supported(int type)
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index c3daaf1ad5..072cc4f853 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -885,6 +885,11 @@ out:
|
||||
return sev_common->kvm_type;
|
||||
}
|
||||
|
||||
+static int sev_snp_kvm_type(X86ConfidentialGuest *cg)
|
||||
+{
|
||||
+ return KVM_X86_SNP_VM;
|
||||
+}
|
||||
+
|
||||
static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
{
|
||||
char *devname;
|
||||
@@ -894,6 +899,8 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
struct sev_user_data_status status = {};
|
||||
SevCommonState *sev_common = SEV_COMMON(cgs);
|
||||
SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs);
|
||||
+ X86ConfidentialGuestClass *x86_klass =
|
||||
+ X86_CONFIDENTIAL_GUEST_GET_CLASS(cgs);
|
||||
|
||||
sev_common->state = SEV_STATE_UNINIT;
|
||||
|
||||
@@ -964,7 +971,7 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
}
|
||||
|
||||
trace_kvm_sev_init();
|
||||
- if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) {
|
||||
+ if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) {
|
||||
cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT;
|
||||
|
||||
ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error);
|
||||
@@ -1441,10 +1448,8 @@ static void
|
||||
sev_common_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
|
||||
- X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
|
||||
|
||||
klass->kvm_init = sev_common_kvm_init;
|
||||
- x86_klass->kvm_type = sev_kvm_type;
|
||||
|
||||
object_class_property_add_str(oc, "sev-device",
|
||||
sev_common_get_sev_device,
|
||||
@@ -1529,10 +1534,12 @@ static void
|
||||
sev_guest_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
|
||||
+ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
|
||||
|
||||
klass->launch_start = sev_launch_start;
|
||||
klass->launch_finish = sev_launch_finish;
|
||||
klass->kvm_init = sev_kvm_init;
|
||||
+ x86_klass->kvm_type = sev_kvm_type;
|
||||
|
||||
object_class_property_add_str(oc, "dh-cert-file",
|
||||
sev_guest_get_dh_cert_file,
|
||||
@@ -1770,8 +1777,10 @@ static void
|
||||
sev_snp_guest_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
|
||||
+ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
|
||||
|
||||
klass->kvm_init = sev_snp_kvm_init;
|
||||
+ x86_klass->kvm_type = sev_snp_kvm_type;
|
||||
|
||||
object_class_property_add(oc, "policy", "uint64",
|
||||
sev_snp_guest_get_policy,
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,84 @@
|
||||
From 82a714b79851b5c2d1389d2fa7a01548c486a854 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:20 -0500
|
||||
Subject: [PATCH 060/100] i386/sev: Add a sev_snp_enabled() helper
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [60/91] c35ead095028ccfb1e1be0fe010ca4f7688530a0 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Add a simple helper to check if the current guest type is SNP. Also have
|
||||
SNP-enabled imply that SEV-ES is enabled as well, and fix up any places
|
||||
where the sev_es_enabled() check is expecting a pure/non-SNP guest.
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-9-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 99190f805dca9475fe244fbd8041961842657dc2)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 13 ++++++++++++-
|
||||
target/i386/sev.h | 2 ++
|
||||
2 files changed, 14 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index a81b3228d4..4edfedc139 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -325,12 +325,21 @@ sev_enabled(void)
|
||||
return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON);
|
||||
}
|
||||
|
||||
+bool
|
||||
+sev_snp_enabled(void)
|
||||
+{
|
||||
+ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
|
||||
+
|
||||
+ return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_SNP_GUEST);
|
||||
+}
|
||||
+
|
||||
bool
|
||||
sev_es_enabled(void)
|
||||
{
|
||||
ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
|
||||
|
||||
- return sev_enabled() && (SEV_GUEST(cgs)->policy & SEV_POLICY_ES);
|
||||
+ return sev_snp_enabled() ||
|
||||
+ (sev_enabled() && SEV_GUEST(cgs)->policy & SEV_POLICY_ES);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
@@ -946,7 +955,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
"support", __func__);
|
||||
goto err;
|
||||
}
|
||||
+ }
|
||||
|
||||
+ if (sev_es_enabled() && !sev_snp_enabled()) {
|
||||
if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) {
|
||||
error_setg(errp, "%s: guest policy requires SEV-ES, but "
|
||||
"host SEV-ES support unavailable",
|
||||
diff --git a/target/i386/sev.h b/target/i386/sev.h
|
||||
index bedc667eeb..94295ee74f 100644
|
||||
--- a/target/i386/sev.h
|
||||
+++ b/target/i386/sev.h
|
||||
@@ -45,9 +45,11 @@ typedef struct SevKernelLoaderContext {
|
||||
#ifdef CONFIG_SEV
|
||||
bool sev_enabled(void);
|
||||
bool sev_es_enabled(void);
|
||||
+bool sev_snp_enabled(void);
|
||||
#else
|
||||
#define sev_enabled() 0
|
||||
#define sev_es_enabled() 0
|
||||
+#define sev_snp_enabled() 0
|
||||
#endif
|
||||
|
||||
uint32_t sev_get_cbit_position(void);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,187 @@
|
||||
From 0e435819540b0d39da2c828aacc0f35ecaadbdf6 Mon Sep 17 00:00:00 2001
|
||||
From: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:28 -0500
|
||||
Subject: [PATCH 068/100] i386/sev: Add handling to encrypt/finalize guest
|
||||
launch data
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [68/91] fe77931d279aa8df061823da88a320fb5f72ffea (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Process any queued up launch data and encrypt/measure it into the SNP
|
||||
guest instance prior to initial guest launch.
|
||||
|
||||
This also updates the KVM_SEV_SNP_LAUNCH_UPDATE call to handle partial
|
||||
update responses.
|
||||
|
||||
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Co-developed-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Co-developed-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-17-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 9f3a6999f9730a694d7db448a99f9c9cb6515992)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 112 ++++++++++++++++++++++++++++++++++++++-
|
||||
target/i386/trace-events | 2 +
|
||||
2 files changed, 113 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index e89b87d2f5..ef2e592ca7 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -756,6 +756,76 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static const char *
|
||||
+snp_page_type_to_str(int type)
|
||||
+{
|
||||
+ switch (type) {
|
||||
+ case KVM_SEV_SNP_PAGE_TYPE_NORMAL: return "Normal";
|
||||
+ case KVM_SEV_SNP_PAGE_TYPE_ZERO: return "Zero";
|
||||
+ case KVM_SEV_SNP_PAGE_TYPE_UNMEASURED: return "Unmeasured";
|
||||
+ case KVM_SEV_SNP_PAGE_TYPE_SECRETS: return "Secrets";
|
||||
+ case KVM_SEV_SNP_PAGE_TYPE_CPUID: return "Cpuid";
|
||||
+ default: return "unknown";
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+sev_snp_launch_update(SevSnpGuestState *sev_snp_guest,
|
||||
+ SevLaunchUpdateData *data)
|
||||
+{
|
||||
+ int ret, fw_error;
|
||||
+ struct kvm_sev_snp_launch_update update = {0};
|
||||
+
|
||||
+ if (!data->hva || !data->len) {
|
||||
+ error_report("SNP_LAUNCH_UPDATE called with invalid address"
|
||||
+ "/ length: %p / %lx",
|
||||
+ data->hva, data->len);
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
+ update.uaddr = (__u64)(unsigned long)data->hva;
|
||||
+ update.gfn_start = data->gpa >> TARGET_PAGE_BITS;
|
||||
+ update.len = data->len;
|
||||
+ update.type = data->type;
|
||||
+
|
||||
+ /*
|
||||
+ * KVM_SEV_SNP_LAUNCH_UPDATE requires that GPA ranges have the private
|
||||
+ * memory attribute set in advance.
|
||||
+ */
|
||||
+ ret = kvm_set_memory_attributes_private(data->gpa, data->len);
|
||||
+ if (ret) {
|
||||
+ error_report("SEV-SNP: failed to configure initial"
|
||||
+ "private guest memory");
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ while (update.len || ret == -EAGAIN) {
|
||||
+ trace_kvm_sev_snp_launch_update(update.uaddr, update.gfn_start <<
|
||||
+ TARGET_PAGE_BITS, update.len,
|
||||
+ snp_page_type_to_str(update.type));
|
||||
+
|
||||
+ ret = sev_ioctl(SEV_COMMON(sev_snp_guest)->sev_fd,
|
||||
+ KVM_SEV_SNP_LAUNCH_UPDATE,
|
||||
+ &update, &fw_error);
|
||||
+ if (ret && ret != -EAGAIN) {
|
||||
+ error_report("SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'",
|
||||
+ ret, fw_error, fw_error_to_str(fw_error));
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+out:
|
||||
+ if (!ret && update.gfn_start << TARGET_PAGE_BITS != data->gpa + data->len) {
|
||||
+ error_report("SEV-SNP: expected update of GPA range %lx-%lx,"
|
||||
+ "got GPA range %lx-%llx",
|
||||
+ data->gpa, data->gpa + data->len, data->gpa,
|
||||
+ update.gfn_start << TARGET_PAGE_BITS);
|
||||
+ ret = -EIO;
|
||||
+ }
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static int
|
||||
sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len)
|
||||
{
|
||||
@@ -901,6 +971,46 @@ sev_launch_finish(SevCommonState *sev_common)
|
||||
migrate_add_blocker(&sev_mig_blocker, &error_fatal);
|
||||
}
|
||||
|
||||
+static void
|
||||
+sev_snp_launch_finish(SevCommonState *sev_common)
|
||||
+{
|
||||
+ int ret, error;
|
||||
+ Error *local_err = NULL;
|
||||
+ SevLaunchUpdateData *data;
|
||||
+ SevSnpGuestState *sev_snp = SEV_SNP_GUEST(sev_common);
|
||||
+ struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf;
|
||||
+
|
||||
+ QTAILQ_FOREACH(data, &launch_update, next) {
|
||||
+ ret = sev_snp_launch_update(sev_snp, data);
|
||||
+ if (ret) {
|
||||
+ exit(1);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ trace_kvm_sev_snp_launch_finish(sev_snp->id_block, sev_snp->id_auth,
|
||||
+ sev_snp->host_data);
|
||||
+ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_FINISH,
|
||||
+ finish, &error);
|
||||
+ if (ret) {
|
||||
+ error_report("SNP_LAUNCH_FINISH ret=%d fw_error=%d '%s'",
|
||||
+ ret, error, fw_error_to_str(error));
|
||||
+ exit(1);
|
||||
+ }
|
||||
+
|
||||
+ sev_set_guest_state(sev_common, SEV_STATE_RUNNING);
|
||||
+
|
||||
+ /* add migration blocker */
|
||||
+ error_setg(&sev_mig_blocker,
|
||||
+ "SEV-SNP: Migration is not implemented");
|
||||
+ ret = migrate_add_blocker(&sev_mig_blocker, &local_err);
|
||||
+ if (local_err) {
|
||||
+ error_report_err(local_err);
|
||||
+ error_free(sev_mig_blocker);
|
||||
+ exit(1);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+
|
||||
static void
|
||||
sev_vm_state_change(void *opaque, bool running, RunState state)
|
||||
{
|
||||
@@ -1832,10 +1942,10 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data)
|
||||
X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
|
||||
|
||||
klass->launch_start = sev_snp_launch_start;
|
||||
+ klass->launch_finish = sev_snp_launch_finish;
|
||||
klass->kvm_init = sev_snp_kvm_init;
|
||||
x86_klass->kvm_type = sev_snp_kvm_type;
|
||||
|
||||
-
|
||||
object_class_property_add(oc, "policy", "uint64",
|
||||
sev_snp_guest_get_policy,
|
||||
sev_snp_guest_set_policy, NULL, NULL);
|
||||
diff --git a/target/i386/trace-events b/target/i386/trace-events
|
||||
index cb26d8a925..06b44ead2e 100644
|
||||
--- a/target/i386/trace-events
|
||||
+++ b/target/i386/trace-events
|
||||
@@ -12,3 +12,5 @@ kvm_sev_launch_finish(void) ""
|
||||
kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d"
|
||||
kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s"
|
||||
kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s"
|
||||
+kvm_sev_snp_launch_update(uint64_t src, uint64_t gpa, uint64_t len, const char *type) "src 0x%" PRIx64 " gpa 0x%" PRIx64 " len 0x%" PRIx64 " (%s page)"
|
||||
+kvm_sev_snp_launch_finish(char *id_block, char *id_auth, char *host_data) "id_block %s id_auth %s host_data %s"
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,127 @@
|
||||
From 2872c423fa44dcbf50b581a5c3feac064a0473a0 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Tue, 9 Apr 2024 18:07:41 -0500
|
||||
Subject: [PATCH 024/100] i386/sev: Add 'legacy-vm-type' parameter for SEV
|
||||
guest objects
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [24/91] ce35d1b09fe8aa8772ff149543f7760455c1e6b5 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
QEMU will currently automatically make use of the KVM_SEV_INIT2 API for
|
||||
initializing SEV and SEV-ES guests verses the older
|
||||
KVM_SEV_INIT/KVM_SEV_ES_INIT interfaces.
|
||||
|
||||
However, the older interfaces will silently avoid sync'ing FPU/XSAVE
|
||||
state to the VMSA prior to encryption, thus relying on behavior and
|
||||
measurements that assume the related fields to be allow zero.
|
||||
|
||||
With KVM_SEV_INIT2, this state is now synced into the VMSA, resulting in
|
||||
measurements changes and, theoretically, behaviorial changes, though the
|
||||
latter are unlikely to be seen in practice.
|
||||
|
||||
To allow a smooth transition to the newer interface, while still
|
||||
providing a mechanism to maintain backward compatibility with VMs
|
||||
created using the older interfaces, provide a new command-line
|
||||
parameter:
|
||||
|
||||
-object sev-guest,legacy-vm-type=true,...
|
||||
|
||||
and have it default to false.
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Message-ID: <20240409230743.962513-2-michael.roth@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 023267334da375226720e62963df9545aa8fc2fd)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
qapi/qom.json | 11 ++++++++++-
|
||||
target/i386/sev.c | 18 +++++++++++++++++-
|
||||
2 files changed, 27 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/qapi/qom.json b/qapi/qom.json
|
||||
index 85e6b4f84a..38dde6d785 100644
|
||||
--- a/qapi/qom.json
|
||||
+++ b/qapi/qom.json
|
||||
@@ -898,6 +898,14 @@
|
||||
# designated guest firmware page for measured boot with -kernel
|
||||
# (default: false) (since 6.2)
|
||||
#
|
||||
+# @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM.
|
||||
+# The newer KVM_SEV_INIT2 interface syncs additional vCPU
|
||||
+# state when initializing the VMSA structures, which will
|
||||
+# result in a different guest measurement. Set this to
|
||||
+# maintain compatibility with older QEMU or kernel versions
|
||||
+# that rely on legacy KVM_SEV_INIT behavior.
|
||||
+# (default: false) (since 9.1)
|
||||
+#
|
||||
# Since: 2.12
|
||||
##
|
||||
{ 'struct': 'SevGuestProperties',
|
||||
@@ -908,7 +916,8 @@
|
||||
'*handle': 'uint32',
|
||||
'*cbitpos': 'uint32',
|
||||
'reduced-phys-bits': 'uint32',
|
||||
- '*kernel-hashes': 'bool' } }
|
||||
+ '*kernel-hashes': 'bool',
|
||||
+ '*legacy-vm-type': 'bool' } }
|
||||
|
||||
##
|
||||
# @ThreadContextProperties:
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 9dab4060b8..f4ee317cb0 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -67,6 +67,7 @@ struct SevGuestState {
|
||||
uint32_t cbitpos;
|
||||
uint32_t reduced_phys_bits;
|
||||
bool kernel_hashes;
|
||||
+ bool legacy_vm_type;
|
||||
|
||||
/* runtime state */
|
||||
uint32_t handle;
|
||||
@@ -356,6 +357,16 @@ static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp)
|
||||
sev->kernel_hashes = value;
|
||||
}
|
||||
|
||||
+static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp)
|
||||
+{
|
||||
+ return SEV_GUEST(obj)->legacy_vm_type;
|
||||
+}
|
||||
+
|
||||
+static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp)
|
||||
+{
|
||||
+ SEV_GUEST(obj)->legacy_vm_type = value;
|
||||
+}
|
||||
+
|
||||
bool
|
||||
sev_enabled(void)
|
||||
{
|
||||
@@ -863,7 +874,7 @@ static int sev_kvm_type(X86ConfidentialGuest *cg)
|
||||
}
|
||||
|
||||
kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
|
||||
- if (kvm_is_vm_type_supported(kvm_type)) {
|
||||
+ if (kvm_is_vm_type_supported(kvm_type) && !sev->legacy_vm_type) {
|
||||
sev->kvm_type = kvm_type;
|
||||
} else {
|
||||
sev->kvm_type = KVM_X86_DEFAULT_VM;
|
||||
@@ -1381,6 +1392,11 @@ sev_guest_class_init(ObjectClass *oc, void *data)
|
||||
sev_guest_set_kernel_hashes);
|
||||
object_class_property_set_description(oc, "kernel-hashes",
|
||||
"add kernel hashes to guest firmware for measured Linux boot");
|
||||
+ object_class_property_add_bool(oc, "legacy-vm-type",
|
||||
+ sev_guest_get_legacy_vm_type,
|
||||
+ sev_guest_set_legacy_vm_type);
|
||||
+ object_class_property_set_description(oc, "legacy-vm-type",
|
||||
+ "use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions.");
|
||||
}
|
||||
|
||||
static void
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,203 @@
|
||||
From a236548a903aa8350fff9601d481b2f529c8d4a7 Mon Sep 17 00:00:00 2001
|
||||
From: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:21 -0500
|
||||
Subject: [PATCH 061/100] i386/sev: Add sev_kvm_init() override for SEV class
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [61/91] b24fcbc8712e7394e029312229da023c63803969 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Some aspects of the init routine SEV are specific to SEV and not
|
||||
applicable for SNP guests, so move the SEV-specific bits into
|
||||
separate class method and retain only the common functionality.
|
||||
|
||||
Co-developed-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-10-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 990da8d243a8c59dafcbed78b56a0e4ffb1605d9)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 72 +++++++++++++++++++++++++++++++++--------------
|
||||
1 file changed, 51 insertions(+), 21 deletions(-)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 4edfedc139..5519de1c6b 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -73,6 +73,7 @@ struct SevCommonStateClass {
|
||||
/* public */
|
||||
int (*launch_start)(SevCommonState *sev_common);
|
||||
void (*launch_finish)(SevCommonState *sev_common);
|
||||
+ int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp);
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -882,7 +883,7 @@ out:
|
||||
return sev_common->kvm_type;
|
||||
}
|
||||
|
||||
-static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
+static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
{
|
||||
SevCommonState *sev_common = SEV_COMMON(cgs);
|
||||
char *devname;
|
||||
@@ -892,12 +893,6 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
struct sev_user_data_status status = {};
|
||||
SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs);
|
||||
|
||||
- ret = ram_block_discard_disable(true);
|
||||
- if (ret) {
|
||||
- error_report("%s: cannot disable RAM discard", __func__);
|
||||
- return -1;
|
||||
- }
|
||||
-
|
||||
sev_common->state = SEV_STATE_UNINIT;
|
||||
|
||||
host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL);
|
||||
@@ -911,7 +906,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
if (host_cbitpos != sev_common->cbitpos) {
|
||||
error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'",
|
||||
__func__, host_cbitpos, sev_common->cbitpos);
|
||||
- goto err;
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -924,7 +919,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
error_setg(errp, "%s: reduced_phys_bits check failed,"
|
||||
" it should be in the range of 1 to 63, requested '%d'",
|
||||
__func__, sev_common->reduced_phys_bits);
|
||||
- goto err;
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
devname = object_property_get_str(OBJECT(sev_common), "sev-device", NULL);
|
||||
@@ -933,7 +928,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
error_setg(errp, "%s: Failed to open %s '%s'", __func__,
|
||||
devname, strerror(errno));
|
||||
g_free(devname);
|
||||
- goto err;
|
||||
+ return -1;
|
||||
}
|
||||
g_free(devname);
|
||||
|
||||
@@ -943,7 +938,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
error_setg(errp, "%s: failed to get platform status ret=%d "
|
||||
"fw_error='%d: %s'", __func__, ret, fw_error,
|
||||
fw_error_to_str(fw_error));
|
||||
- goto err;
|
||||
+ return -1;
|
||||
}
|
||||
sev_common->build_id = status.build;
|
||||
sev_common->api_major = status.api_major;
|
||||
@@ -953,7 +948,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
if (!kvm_kernel_irqchip_allowed()) {
|
||||
error_setg(errp, "%s: SEV-ES guests require in-kernel irqchip"
|
||||
"support", __func__);
|
||||
- goto err;
|
||||
+ return -1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -962,7 +957,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
error_setg(errp, "%s: guest policy requires SEV-ES, but "
|
||||
"host SEV-ES support unavailable",
|
||||
__func__);
|
||||
- goto err;
|
||||
+ return -1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -980,25 +975,59 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
if (ret) {
|
||||
error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'",
|
||||
__func__, ret, fw_error, fw_error_to_str(fw_error));
|
||||
- goto err;
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
ret = klass->launch_start(sev_common);
|
||||
if (ret) {
|
||||
error_setg(errp, "%s: failed to create encryption context", __func__);
|
||||
- goto err;
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ if (klass->kvm_init && klass->kvm_init(cgs, errp)) {
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
- ram_block_notifier_add(&sev_ram_notifier);
|
||||
- qemu_add_machine_init_done_notifier(&sev_machine_done_notify);
|
||||
qemu_add_vm_change_state_handler(sev_vm_state_change, sev_common);
|
||||
|
||||
cgs->ready = true;
|
||||
|
||||
return 0;
|
||||
-err:
|
||||
- ram_block_discard_disable(false);
|
||||
- return -1;
|
||||
+}
|
||||
+
|
||||
+static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ /*
|
||||
+ * SEV/SEV-ES rely on pinned memory to back guest RAM so discarding
|
||||
+ * isn't actually possible. With SNP, only guest_memfd pages are used
|
||||
+ * for private guest memory, so discarding of shared memory is still
|
||||
+ * possible..
|
||||
+ */
|
||||
+ ret = ram_block_discard_disable(true);
|
||||
+ if (ret) {
|
||||
+ error_setg(errp, "%s: cannot disable RAM discard", __func__);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * SEV uses these notifiers to register/pin pages prior to guest use,
|
||||
+ * but SNP relies on guest_memfd for private pages, which has its
|
||||
+ * own internal mechanisms for registering/pinning private memory.
|
||||
+ */
|
||||
+ ram_block_notifier_add(&sev_ram_notifier);
|
||||
+
|
||||
+ /*
|
||||
+ * The machine done notify event is used for SEV guests to get the
|
||||
+ * measurement of the encrypted images. When SEV-SNP is enabled, the
|
||||
+ * measurement is part of the guest attestation process where it can
|
||||
+ * be collected without any reliance on the VMM. So skip registering
|
||||
+ * the notifier for SNP in favor of using guest attestation instead.
|
||||
+ */
|
||||
+ qemu_add_machine_init_done_notifier(&sev_machine_done_notify);
|
||||
+
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1397,7 +1426,7 @@ sev_common_class_init(ObjectClass *oc, void *data)
|
||||
ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
|
||||
X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
|
||||
|
||||
- klass->kvm_init = sev_kvm_init;
|
||||
+ klass->kvm_init = sev_common_kvm_init;
|
||||
x86_klass->kvm_type = sev_kvm_type;
|
||||
|
||||
object_class_property_add_str(oc, "sev-device",
|
||||
@@ -1486,6 +1515,7 @@ sev_guest_class_init(ObjectClass *oc, void *data)
|
||||
|
||||
klass->launch_start = sev_launch_start;
|
||||
klass->launch_finish = sev_launch_finish;
|
||||
+ klass->kvm_init = sev_kvm_init;
|
||||
|
||||
object_class_property_add_str(oc, "dh-cert-file",
|
||||
sev_guest_get_dh_cert_file,
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,94 @@
|
||||
From 35ceebdeccbf5dceb374c6f89a12e9981def570b Mon Sep 17 00:00:00 2001
|
||||
From: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:22 -0500
|
||||
Subject: [PATCH 062/100] i386/sev: Add snp_kvm_init() override for SNP class
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [62/91] 8fa537961c9262b99a4ffb99e1c25f080d76d1de (bonzini/rhel-qemu-kvm)
|
||||
|
||||
SNP does not support SMM and requires guest_memfd for
|
||||
private guest memory, so add SNP specific kvm_init()
|
||||
functionality in snp_kvm_init() class method.
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Co-developed-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-11-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 125b95a6d465a03ff30816eff0b1889aec01f0c3)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 24 +++++++++++++++++++++++-
|
||||
1 file changed, 23 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 5519de1c6b..6525b3c1a0 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -885,12 +885,12 @@ out:
|
||||
|
||||
static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
{
|
||||
- SevCommonState *sev_common = SEV_COMMON(cgs);
|
||||
char *devname;
|
||||
int ret, fw_error, cmd;
|
||||
uint32_t ebx;
|
||||
uint32_t host_cbitpos;
|
||||
struct sev_user_data_status status = {};
|
||||
+ SevCommonState *sev_common = SEV_COMMON(cgs);
|
||||
SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs);
|
||||
|
||||
sev_common->state = SEV_STATE_UNINIT;
|
||||
@@ -1030,6 +1030,21 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
+{
|
||||
+ MachineState *ms = MACHINE(qdev_get_machine());
|
||||
+ X86MachineState *x86ms = X86_MACHINE(ms);
|
||||
+
|
||||
+ if (x86ms->smm == ON_OFF_AUTO_AUTO) {
|
||||
+ x86ms->smm = ON_OFF_AUTO_OFF;
|
||||
+ } else if (x86ms->smm == ON_OFF_AUTO_ON) {
|
||||
+ error_setg(errp, "SEV-SNP does not support SMM.");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
int
|
||||
sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp)
|
||||
{
|
||||
@@ -1752,6 +1767,10 @@ sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp)
|
||||
static void
|
||||
sev_snp_guest_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
+ SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
|
||||
+
|
||||
+ klass->kvm_init = sev_snp_kvm_init;
|
||||
+
|
||||
object_class_property_add(oc, "policy", "uint64",
|
||||
sev_snp_guest_get_policy,
|
||||
sev_snp_guest_set_policy, NULL, NULL);
|
||||
@@ -1778,8 +1797,11 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data)
|
||||
static void
|
||||
sev_snp_guest_instance_init(Object *obj)
|
||||
{
|
||||
+ ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj);
|
||||
SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
|
||||
|
||||
+ cgs->require_guest_memfd = true;
|
||||
+
|
||||
/* default init/start/finish params for kvm */
|
||||
sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY;
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,262 @@
|
||||
From 4013364679757161d6b9754bfc33ae38be0a1b7f Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:32 -0500
|
||||
Subject: [PATCH 072/100] i386/sev: Add support for SNP CPUID validation
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [72/91] 080e2942552dc7de8966e69d0d0d3b8951392030 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
SEV-SNP firmware allows a special guest page to be populated with a
|
||||
table of guest CPUID values so that they can be validated through
|
||||
firmware before being loaded into encrypted guest memory where they can
|
||||
be used in place of hypervisor-provided values[1].
|
||||
|
||||
As part of SEV-SNP guest initialization, use this interface to validate
|
||||
the CPUID entries reported by KVM_GET_CPUID2 prior to initial guest
|
||||
start and populate the CPUID page reserved by OVMF with the resulting
|
||||
encrypted data.
|
||||
|
||||
[1] SEV SNP Firmware ABI Specification, Rev. 0.8, 8.13.2.6
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-21-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 70943ad8e4dfbe5f77006b880290219be9d03553)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 164 +++++++++++++++++++++++++++++++++++++++++++++-
|
||||
1 file changed, 162 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index c57534fca2..06401f0526 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -200,6 +200,36 @@ static const char *const sev_fw_errlist[] = {
|
||||
|
||||
#define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist)
|
||||
|
||||
+/* <linux/kvm.h> doesn't expose this, so re-use the max from kvm.c */
|
||||
+#define KVM_MAX_CPUID_ENTRIES 100
|
||||
+
|
||||
+typedef struct KvmCpuidInfo {
|
||||
+ struct kvm_cpuid2 cpuid;
|
||||
+ struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
|
||||
+} KvmCpuidInfo;
|
||||
+
|
||||
+#define SNP_CPUID_FUNCTION_MAXCOUNT 64
|
||||
+#define SNP_CPUID_FUNCTION_UNKNOWN 0xFFFFFFFF
|
||||
+
|
||||
+typedef struct {
|
||||
+ uint32_t eax_in;
|
||||
+ uint32_t ecx_in;
|
||||
+ uint64_t xcr0_in;
|
||||
+ uint64_t xss_in;
|
||||
+ uint32_t eax;
|
||||
+ uint32_t ebx;
|
||||
+ uint32_t ecx;
|
||||
+ uint32_t edx;
|
||||
+ uint64_t reserved;
|
||||
+} __attribute__((packed)) SnpCpuidFunc;
|
||||
+
|
||||
+typedef struct {
|
||||
+ uint32_t count;
|
||||
+ uint32_t reserved1;
|
||||
+ uint64_t reserved2;
|
||||
+ SnpCpuidFunc entries[SNP_CPUID_FUNCTION_MAXCOUNT];
|
||||
+} __attribute__((packed)) SnpCpuidInfo;
|
||||
+
|
||||
static int
|
||||
sev_ioctl(int fd, int cmd, void *data, int *error)
|
||||
{
|
||||
@@ -788,6 +818,35 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static void
|
||||
+sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old,
|
||||
+ SnpCpuidInfo *new)
|
||||
+{
|
||||
+ size_t i;
|
||||
+
|
||||
+ if (old->count != new->count) {
|
||||
+ error_report("SEV-SNP: CPUID validation failed due to count mismatch,"
|
||||
+ "provided: %d, expected: %d", old->count, new->count);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ for (i = 0; i < old->count; i++) {
|
||||
+ SnpCpuidFunc *old_func, *new_func;
|
||||
+
|
||||
+ old_func = &old->entries[i];
|
||||
+ new_func = &new->entries[i];
|
||||
+
|
||||
+ if (memcmp(old_func, new_func, sizeof(SnpCpuidFunc))) {
|
||||
+ error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x"
|
||||
+ "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x"
|
||||
+ "expected: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x",
|
||||
+ old_func->eax_in, old_func->ecx_in,
|
||||
+ old_func->eax, old_func->ebx, old_func->ecx, old_func->edx,
|
||||
+ new_func->eax, new_func->ebx, new_func->ecx, new_func->edx);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static const char *
|
||||
snp_page_type_to_str(int type)
|
||||
{
|
||||
@@ -806,6 +865,7 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest,
|
||||
SevLaunchUpdateData *data)
|
||||
{
|
||||
int ret, fw_error;
|
||||
+ SnpCpuidInfo snp_cpuid_info;
|
||||
struct kvm_sev_snp_launch_update update = {0};
|
||||
|
||||
if (!data->hva || !data->len) {
|
||||
@@ -815,6 +875,11 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest,
|
||||
return 1;
|
||||
}
|
||||
|
||||
+ if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) {
|
||||
+ /* Save a copy for comparison in case the LAUNCH_UPDATE fails */
|
||||
+ memcpy(&snp_cpuid_info, data->hva, sizeof(snp_cpuid_info));
|
||||
+ }
|
||||
+
|
||||
update.uaddr = (__u64)(unsigned long)data->hva;
|
||||
update.gfn_start = data->gpa >> TARGET_PAGE_BITS;
|
||||
update.len = data->len;
|
||||
@@ -842,6 +907,11 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest,
|
||||
if (ret && ret != -EAGAIN) {
|
||||
error_report("SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'",
|
||||
ret, fw_error, fw_error_to_str(fw_error));
|
||||
+
|
||||
+ if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) {
|
||||
+ sev_snp_cpuid_report_mismatches(&snp_cpuid_info, data->hva);
|
||||
+ error_report("SEV-SNP: failed update CPUID page");
|
||||
+ }
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -1004,7 +1074,8 @@ sev_launch_finish(SevCommonState *sev_common)
|
||||
}
|
||||
|
||||
static int
|
||||
-snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type)
|
||||
+snp_launch_update_data(uint64_t gpa, void *hva,
|
||||
+ uint32_t len, int type)
|
||||
{
|
||||
SevLaunchUpdateData *data;
|
||||
|
||||
@@ -1019,6 +1090,90 @@ snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int
|
||||
+sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info,
|
||||
+ const KvmCpuidInfo *kvm_cpuid_info)
|
||||
+{
|
||||
+ size_t i;
|
||||
+
|
||||
+ if (kvm_cpuid_info->cpuid.nent > SNP_CPUID_FUNCTION_MAXCOUNT) {
|
||||
+ error_report("SEV-SNP: CPUID entry count (%d) exceeds max (%d)",
|
||||
+ kvm_cpuid_info->cpuid.nent, SNP_CPUID_FUNCTION_MAXCOUNT);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ memset(snp_cpuid_info, 0, sizeof(*snp_cpuid_info));
|
||||
+
|
||||
+ for (i = 0; i < kvm_cpuid_info->cpuid.nent; i++) {
|
||||
+ const struct kvm_cpuid_entry2 *kvm_cpuid_entry;
|
||||
+ SnpCpuidFunc *snp_cpuid_entry;
|
||||
+
|
||||
+ kvm_cpuid_entry = &kvm_cpuid_info->entries[i];
|
||||
+ snp_cpuid_entry = &snp_cpuid_info->entries[i];
|
||||
+
|
||||
+ snp_cpuid_entry->eax_in = kvm_cpuid_entry->function;
|
||||
+ if (kvm_cpuid_entry->flags == KVM_CPUID_FLAG_SIGNIFCANT_INDEX) {
|
||||
+ snp_cpuid_entry->ecx_in = kvm_cpuid_entry->index;
|
||||
+ }
|
||||
+ snp_cpuid_entry->eax = kvm_cpuid_entry->eax;
|
||||
+ snp_cpuid_entry->ebx = kvm_cpuid_entry->ebx;
|
||||
+ snp_cpuid_entry->ecx = kvm_cpuid_entry->ecx;
|
||||
+ snp_cpuid_entry->edx = kvm_cpuid_entry->edx;
|
||||
+
|
||||
+ /*
|
||||
+ * Guest kernels will calculate EBX themselves using the 0xD
|
||||
+ * subfunctions corresponding to the individual XSAVE areas, so only
|
||||
+ * encode the base XSAVE size in the initial leaves, corresponding
|
||||
+ * to the initial XCR0=1 state.
|
||||
+ */
|
||||
+ if (snp_cpuid_entry->eax_in == 0xD &&
|
||||
+ (snp_cpuid_entry->ecx_in == 0x0 || snp_cpuid_entry->ecx_in == 0x1)) {
|
||||
+ snp_cpuid_entry->ebx = 0x240;
|
||||
+ snp_cpuid_entry->xcr0_in = 1;
|
||||
+ snp_cpuid_entry->xss_in = 0;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ snp_cpuid_info->count = i;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, uint32_t cpuid_len)
|
||||
+{
|
||||
+ KvmCpuidInfo kvm_cpuid_info = {0};
|
||||
+ SnpCpuidInfo snp_cpuid_info;
|
||||
+ CPUState *cs = first_cpu;
|
||||
+ int ret;
|
||||
+ uint32_t i = 0;
|
||||
+
|
||||
+ assert(sizeof(snp_cpuid_info) <= cpuid_len);
|
||||
+
|
||||
+ /* get the cpuid list from KVM */
|
||||
+ do {
|
||||
+ kvm_cpuid_info.cpuid.nent = ++i;
|
||||
+ ret = kvm_vcpu_ioctl(cs, KVM_GET_CPUID2, &kvm_cpuid_info);
|
||||
+ } while (ret == -E2BIG);
|
||||
+
|
||||
+ if (ret) {
|
||||
+ error_report("SEV-SNP: unable to query CPUID values for CPU: '%s'",
|
||||
+ strerror(-ret));
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
+ ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info);
|
||||
+ if (ret) {
|
||||
+ error_report("SEV-SNP: failed to generate CPUID table information");
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
+ memcpy(hva, &snp_cpuid_info, sizeof(snp_cpuid_info));
|
||||
+
|
||||
+ return snp_launch_update_data(cpuid_addr, hva, cpuid_len,
|
||||
+ KVM_SEV_SNP_PAGE_TYPE_CPUID);
|
||||
+}
|
||||
+
|
||||
static int
|
||||
snp_metadata_desc_to_page_type(int desc_type)
|
||||
{
|
||||
@@ -1053,7 +1208,12 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp,
|
||||
exit(1);
|
||||
}
|
||||
|
||||
- ret = snp_launch_update_data(desc->base, hva, desc->len, type);
|
||||
+ if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) {
|
||||
+ ret = snp_launch_update_cpuid(desc->base, hva, desc->len);
|
||||
+ } else {
|
||||
+ ret = snp_launch_update_data(desc->base, hva, desc->len, type);
|
||||
+ }
|
||||
+
|
||||
if (ret) {
|
||||
error_report("%s: Failed to add metadata page gpa 0x%x+%x type %d",
|
||||
__func__, desc->base, desc->len, desc->type);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,127 @@
|
||||
From b2cfd4d89026e76ba86ea7adea323f2c3a588790 Mon Sep 17 00:00:00 2001
|
||||
From: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:31 -0500
|
||||
Subject: [PATCH 071/100] i386/sev: Add support for populating OVMF metadata
|
||||
pages
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [71/91] b563442c0e2f6ea01937425d300b56d9e641fd57 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
OVMF reserves various pages so they can be pre-initialized/validated
|
||||
prior to launching the guest. Add support for populating these pages
|
||||
with the expected content.
|
||||
|
||||
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Co-developed-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-20-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 3d8c2a7f4806ff39423312e503737fd76c34dcae)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 74 insertions(+)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 17281bb2c7..c57534fca2 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -1003,15 +1003,89 @@ sev_launch_finish(SevCommonState *sev_common)
|
||||
migrate_add_blocker(&sev_mig_blocker, &error_fatal);
|
||||
}
|
||||
|
||||
+static int
|
||||
+snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type)
|
||||
+{
|
||||
+ SevLaunchUpdateData *data;
|
||||
+
|
||||
+ data = g_new0(SevLaunchUpdateData, 1);
|
||||
+ data->gpa = gpa;
|
||||
+ data->hva = hva;
|
||||
+ data->len = len;
|
||||
+ data->type = type;
|
||||
+
|
||||
+ QTAILQ_INSERT_TAIL(&launch_update, data, next);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+snp_metadata_desc_to_page_type(int desc_type)
|
||||
+{
|
||||
+ switch (desc_type) {
|
||||
+ /* Add the umeasured prevalidated pages as a zero page */
|
||||
+ case SEV_DESC_TYPE_SNP_SEC_MEM: return KVM_SEV_SNP_PAGE_TYPE_ZERO;
|
||||
+ case SEV_DESC_TYPE_SNP_SECRETS: return KVM_SEV_SNP_PAGE_TYPE_SECRETS;
|
||||
+ case SEV_DESC_TYPE_CPUID: return KVM_SEV_SNP_PAGE_TYPE_CPUID;
|
||||
+ default:
|
||||
+ return KVM_SEV_SNP_PAGE_TYPE_ZERO;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+snp_populate_metadata_pages(SevSnpGuestState *sev_snp,
|
||||
+ OvmfSevMetadata *metadata)
|
||||
+{
|
||||
+ OvmfSevMetadataDesc *desc;
|
||||
+ int type, ret, i;
|
||||
+ void *hva;
|
||||
+ MemoryRegion *mr = NULL;
|
||||
+
|
||||
+ for (i = 0; i < metadata->num_desc; i++) {
|
||||
+ desc = &metadata->descs[i];
|
||||
+
|
||||
+ type = snp_metadata_desc_to_page_type(desc->type);
|
||||
+
|
||||
+ hva = gpa2hva(&mr, desc->base, desc->len, NULL);
|
||||
+ if (!hva) {
|
||||
+ error_report("%s: Failed to get HVA for GPA 0x%x sz 0x%x",
|
||||
+ __func__, desc->base, desc->len);
|
||||
+ exit(1);
|
||||
+ }
|
||||
+
|
||||
+ ret = snp_launch_update_data(desc->base, hva, desc->len, type);
|
||||
+ if (ret) {
|
||||
+ error_report("%s: Failed to add metadata page gpa 0x%x+%x type %d",
|
||||
+ __func__, desc->base, desc->len, desc->type);
|
||||
+ exit(1);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void
|
||||
sev_snp_launch_finish(SevCommonState *sev_common)
|
||||
{
|
||||
int ret, error;
|
||||
Error *local_err = NULL;
|
||||
+ OvmfSevMetadata *metadata;
|
||||
SevLaunchUpdateData *data;
|
||||
SevSnpGuestState *sev_snp = SEV_SNP_GUEST(sev_common);
|
||||
struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf;
|
||||
|
||||
+ /*
|
||||
+ * To boot the SNP guest, the hypervisor is required to populate the CPUID
|
||||
+ * and Secrets page before finalizing the launch flow. The location of
|
||||
+ * the secrets and CPUID page is available through the OVMF metadata GUID.
|
||||
+ */
|
||||
+ metadata = pc_system_get_ovmf_sev_metadata_ptr();
|
||||
+ if (metadata == NULL) {
|
||||
+ error_report("%s: Failed to locate SEV metadata header", __func__);
|
||||
+ exit(1);
|
||||
+ }
|
||||
+
|
||||
+ /* Populate all the metadata pages */
|
||||
+ snp_populate_metadata_pages(sev_snp, metadata);
|
||||
+
|
||||
QTAILQ_FOREACH(data, &launch_update, next) {
|
||||
ret = sev_snp_launch_update(sev_snp, data);
|
||||
if (ret) {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,122 @@
|
||||
From 0f7432f2b968298b64fd243df793b176f67a538f Mon Sep 17 00:00:00 2001
|
||||
From: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:27 -0500
|
||||
Subject: [PATCH 067/100] i386/sev: Add the SNP launch start context
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [67/91] 63759a25a413a7a9a7274fb4c3b8bc2528634855 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
The SNP_LAUNCH_START is called first to create a cryptographic launch
|
||||
context within the firmware.
|
||||
|
||||
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Co-developed-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-16-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit d3107f882ec22cfb211eab7efa0c4e95f5ce11bb)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 39 +++++++++++++++++++++++++++++++++++++++
|
||||
target/i386/trace-events | 1 +
|
||||
2 files changed, 40 insertions(+)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 43d1c48bd9..e89b87d2f5 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -39,6 +39,7 @@
|
||||
#include "confidential-guest.h"
|
||||
#include "hw/i386/pc.h"
|
||||
#include "exec/address-spaces.h"
|
||||
+#include "qemu/queue.h"
|
||||
|
||||
OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON)
|
||||
OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST)
|
||||
@@ -115,6 +116,16 @@ struct SevSnpGuestState {
|
||||
#define DEFAULT_SEV_DEVICE "/dev/sev"
|
||||
#define DEFAULT_SEV_SNP_POLICY 0x30000
|
||||
|
||||
+typedef struct SevLaunchUpdateData {
|
||||
+ QTAILQ_ENTRY(SevLaunchUpdateData) next;
|
||||
+ hwaddr gpa;
|
||||
+ void *hva;
|
||||
+ uint64_t len;
|
||||
+ int type;
|
||||
+} SevLaunchUpdateData;
|
||||
+
|
||||
+static QTAILQ_HEAD(, SevLaunchUpdateData) launch_update;
|
||||
+
|
||||
#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e"
|
||||
typedef struct __attribute__((__packed__)) SevInfoBlock {
|
||||
/* SEV-ES Reset Vector Address */
|
||||
@@ -674,6 +685,31 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int
|
||||
+sev_snp_launch_start(SevCommonState *sev_common)
|
||||
+{
|
||||
+ int fw_error, rc;
|
||||
+ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common);
|
||||
+ struct kvm_sev_snp_launch_start *start = &sev_snp_guest->kvm_start_conf;
|
||||
+
|
||||
+ trace_kvm_sev_snp_launch_start(start->policy,
|
||||
+ sev_snp_guest->guest_visible_workarounds);
|
||||
+
|
||||
+ rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_START,
|
||||
+ start, &fw_error);
|
||||
+ if (rc < 0) {
|
||||
+ error_report("%s: SNP_LAUNCH_START ret=%d fw_error=%d '%s'",
|
||||
+ __func__, rc, fw_error, fw_error_to_str(fw_error));
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
+ QTAILQ_INIT(&launch_update);
|
||||
+
|
||||
+ sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_UPDATE);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int
|
||||
sev_launch_start(SevCommonState *sev_common)
|
||||
{
|
||||
@@ -1003,6 +1039,7 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
}
|
||||
|
||||
ret = klass->launch_start(sev_common);
|
||||
+
|
||||
if (ret) {
|
||||
error_setg(errp, "%s: failed to create encryption context", __func__);
|
||||
return -1;
|
||||
@@ -1794,9 +1831,11 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data)
|
||||
SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
|
||||
X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
|
||||
|
||||
+ klass->launch_start = sev_snp_launch_start;
|
||||
klass->kvm_init = sev_snp_kvm_init;
|
||||
x86_klass->kvm_type = sev_snp_kvm_type;
|
||||
|
||||
+
|
||||
object_class_property_add(oc, "policy", "uint64",
|
||||
sev_snp_guest_get_policy,
|
||||
sev_snp_guest_set_policy, NULL, NULL);
|
||||
diff --git a/target/i386/trace-events b/target/i386/trace-events
|
||||
index 2cd8726eeb..cb26d8a925 100644
|
||||
--- a/target/i386/trace-events
|
||||
+++ b/target/i386/trace-events
|
||||
@@ -11,3 +11,4 @@ kvm_sev_launch_measurement(const char *value) "data %s"
|
||||
kvm_sev_launch_finish(void) ""
|
||||
kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d"
|
||||
kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s"
|
||||
+kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s"
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,237 @@
|
||||
From ec786a1ec0a76775e980862d77500f5196a937e3 Mon Sep 17 00:00:00 2001
|
||||
From: Dov Murik <dovmurik@linux.ibm.com>
|
||||
Date: Thu, 30 May 2024 06:16:35 -0500
|
||||
Subject: [PATCH 080/100] i386/sev: Allow measured direct kernel boot on SNP
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [80/91] 11c629862519c1a279566febf5a537c63c5fcf61 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
In SNP, the hashes page designated with a specific metadata entry
|
||||
published in AmdSev OVMF.
|
||||
|
||||
Therefore, if the user enabled kernel hashes (for measured direct boot),
|
||||
QEMU should prepare the content of hashes table, and during the
|
||||
processing of the metadata entry it copy the content into the designated
|
||||
page and encrypt it.
|
||||
|
||||
Note that in SNP (unlike SEV and SEV-ES) the measurements is done in
|
||||
whole 4KB pages. Therefore QEMU zeros the whole page that includes the
|
||||
hashes table, and fills in the kernel hashes area in that page, and then
|
||||
encrypts the whole page. The rest of the page is reserved for SEV
|
||||
launch secrets which are not usable anyway on SNP.
|
||||
|
||||
If the user disabled kernel hashes, QEMU pre-validates the kernel hashes
|
||||
page as a zero page.
|
||||
|
||||
Signed-off-by: Dov Murik <dovmurik@linux.ibm.com>
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-24-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit c1996992cc882b00139f78067d6a64e2ec9cb0d8)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
include/hw/i386/pc.h | 2 +
|
||||
target/i386/sev.c | 111 ++++++++++++++++++++++++++++++++-----------
|
||||
2 files changed, 85 insertions(+), 28 deletions(-)
|
||||
|
||||
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
|
||||
index 94b49310f5..ee3bfb7be9 100644
|
||||
--- a/include/hw/i386/pc.h
|
||||
+++ b/include/hw/i386/pc.h
|
||||
@@ -175,6 +175,8 @@ typedef enum {
|
||||
SEV_DESC_TYPE_SNP_SECRETS,
|
||||
/* The section contains address that can be used as a CPUID page */
|
||||
SEV_DESC_TYPE_CPUID,
|
||||
+ /* The section contains the region for kernel hashes for measured direct boot */
|
||||
+ SEV_DESC_TYPE_SNP_KERNEL_HASHES = 0x10,
|
||||
|
||||
} ovmf_sev_metadata_desc_type;
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 3fce4c08eb..004c667ac1 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -115,6 +115,10 @@ struct SevCommonStateClass {
|
||||
X86ConfidentialGuestClass parent_class;
|
||||
|
||||
/* public */
|
||||
+ bool (*build_kernel_loader_hashes)(SevCommonState *sev_common,
|
||||
+ SevHashTableDescriptor *area,
|
||||
+ SevKernelLoaderContext *ctx,
|
||||
+ Error **errp);
|
||||
int (*launch_start)(SevCommonState *sev_common);
|
||||
void (*launch_finish)(SevCommonState *sev_common);
|
||||
int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, uint64_t len);
|
||||
@@ -154,6 +158,9 @@ struct SevSnpGuestState {
|
||||
|
||||
struct kvm_sev_snp_launch_start kvm_start_conf;
|
||||
struct kvm_sev_snp_launch_finish kvm_finish_conf;
|
||||
+
|
||||
+ uint32_t kernel_hashes_offset;
|
||||
+ PaddedSevHashTable *kernel_hashes_data;
|
||||
};
|
||||
|
||||
#define DEFAULT_GUEST_POLICY 0x1 /* disable debug */
|
||||
@@ -1189,6 +1196,23 @@ snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, uint32_t cpuid_len)
|
||||
KVM_SEV_SNP_PAGE_TYPE_CPUID);
|
||||
}
|
||||
|
||||
+static int
|
||||
+snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr,
|
||||
+ void *hva, uint32_t len)
|
||||
+{
|
||||
+ int type = KVM_SEV_SNP_PAGE_TYPE_ZERO;
|
||||
+ if (sev_snp->parent_obj.kernel_hashes) {
|
||||
+ assert(sev_snp->kernel_hashes_data);
|
||||
+ assert((sev_snp->kernel_hashes_offset +
|
||||
+ sizeof(*sev_snp->kernel_hashes_data)) <= len);
|
||||
+ memset(hva, 0, len);
|
||||
+ memcpy(hva + sev_snp->kernel_hashes_offset, sev_snp->kernel_hashes_data,
|
||||
+ sizeof(*sev_snp->kernel_hashes_data));
|
||||
+ type = KVM_SEV_SNP_PAGE_TYPE_NORMAL;
|
||||
+ }
|
||||
+ return snp_launch_update_data(addr, hva, len, type);
|
||||
+}
|
||||
+
|
||||
static int
|
||||
snp_metadata_desc_to_page_type(int desc_type)
|
||||
{
|
||||
@@ -1225,6 +1249,9 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp,
|
||||
|
||||
if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) {
|
||||
ret = snp_launch_update_cpuid(desc->base, hva, desc->len);
|
||||
+ } else if (desc->type == SEV_DESC_TYPE_SNP_KERNEL_HASHES) {
|
||||
+ ret = snp_launch_update_kernel_hashes(sev_snp, desc->base, hva,
|
||||
+ desc->len);
|
||||
} else {
|
||||
ret = snp_launch_update_data(desc->base, hva, desc->len, type);
|
||||
}
|
||||
@@ -1823,6 +1850,58 @@ static bool build_kernel_loader_hashes(PaddedSevHashTable *padded_ht,
|
||||
return true;
|
||||
}
|
||||
|
||||
+static bool sev_snp_build_kernel_loader_hashes(SevCommonState *sev_common,
|
||||
+ SevHashTableDescriptor *area,
|
||||
+ SevKernelLoaderContext *ctx,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ /*
|
||||
+ * SNP: Populate the hashes table in an area that later in
|
||||
+ * snp_launch_update_kernel_hashes() will be copied to the guest memory
|
||||
+ * and encrypted.
|
||||
+ */
|
||||
+ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common);
|
||||
+ sev_snp_guest->kernel_hashes_offset = area->base & ~TARGET_PAGE_MASK;
|
||||
+ sev_snp_guest->kernel_hashes_data = g_new0(PaddedSevHashTable, 1);
|
||||
+ return build_kernel_loader_hashes(sev_snp_guest->kernel_hashes_data, ctx, errp);
|
||||
+}
|
||||
+
|
||||
+static bool sev_build_kernel_loader_hashes(SevCommonState *sev_common,
|
||||
+ SevHashTableDescriptor *area,
|
||||
+ SevKernelLoaderContext *ctx,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ PaddedSevHashTable *padded_ht;
|
||||
+ hwaddr mapped_len = sizeof(*padded_ht);
|
||||
+ MemTxAttrs attrs = { 0 };
|
||||
+ bool ret = true;
|
||||
+
|
||||
+ /*
|
||||
+ * Populate the hashes table in the guest's memory at the OVMF-designated
|
||||
+ * area for the SEV hashes table
|
||||
+ */
|
||||
+ padded_ht = address_space_map(&address_space_memory, area->base,
|
||||
+ &mapped_len, true, attrs);
|
||||
+ if (!padded_ht || mapped_len != sizeof(*padded_ht)) {
|
||||
+ error_setg(errp, "SEV: cannot map hashes table guest memory area");
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ if (build_kernel_loader_hashes(padded_ht, ctx, errp)) {
|
||||
+ if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht,
|
||||
+ sizeof(*padded_ht), errp) < 0) {
|
||||
+ ret = false;
|
||||
+ }
|
||||
+ } else {
|
||||
+ ret = false;
|
||||
+ }
|
||||
+
|
||||
+ address_space_unmap(&address_space_memory, padded_ht,
|
||||
+ mapped_len, true, mapped_len);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page
|
||||
* which is included in SEV's initial memory measurement.
|
||||
@@ -1831,11 +1910,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
|
||||
{
|
||||
uint8_t *data;
|
||||
SevHashTableDescriptor *area;
|
||||
- PaddedSevHashTable *padded_ht;
|
||||
- hwaddr mapped_len = sizeof(*padded_ht);
|
||||
- MemTxAttrs attrs = { 0 };
|
||||
- bool ret = true;
|
||||
SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
|
||||
+ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common);
|
||||
|
||||
/*
|
||||
* Only add the kernel hashes if the sev-guest configuration explicitly
|
||||
@@ -1858,30 +1934,7 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
|
||||
return false;
|
||||
}
|
||||
|
||||
- /*
|
||||
- * Populate the hashes table in the guest's memory at the OVMF-designated
|
||||
- * area for the SEV hashes table
|
||||
- */
|
||||
- padded_ht = address_space_map(&address_space_memory, area->base,
|
||||
- &mapped_len, true, attrs);
|
||||
- if (!padded_ht || mapped_len != sizeof(*padded_ht)) {
|
||||
- error_setg(errp, "SEV: cannot map hashes table guest memory area");
|
||||
- return false;
|
||||
- }
|
||||
-
|
||||
- if (build_kernel_loader_hashes(padded_ht, ctx, errp)) {
|
||||
- if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht,
|
||||
- sizeof(*padded_ht), errp) < 0) {
|
||||
- ret = false;
|
||||
- }
|
||||
- } else {
|
||||
- ret = false;
|
||||
- }
|
||||
-
|
||||
- address_space_unmap(&address_space_memory, padded_ht,
|
||||
- mapped_len, true, mapped_len);
|
||||
-
|
||||
- return ret;
|
||||
+ return klass->build_kernel_loader_hashes(sev_common, area, ctx, errp);
|
||||
}
|
||||
|
||||
static char *
|
||||
@@ -1998,6 +2051,7 @@ sev_guest_class_init(ObjectClass *oc, void *data)
|
||||
SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
|
||||
X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
|
||||
|
||||
+ klass->build_kernel_loader_hashes = sev_build_kernel_loader_hashes;
|
||||
klass->launch_start = sev_launch_start;
|
||||
klass->launch_finish = sev_launch_finish;
|
||||
klass->launch_update_data = sev_launch_update_data;
|
||||
@@ -2242,6 +2296,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data)
|
||||
SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
|
||||
X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
|
||||
|
||||
+ klass->build_kernel_loader_hashes = sev_snp_build_kernel_loader_hashes;
|
||||
klass->launch_start = sev_snp_launch_start;
|
||||
klass->launch_finish = sev_snp_launch_finish;
|
||||
klass->launch_update_data = sev_snp_launch_update_data;
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,268 @@
|
||||
From ab6197309551bd6ddd9f8239191f68dfac23684b Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Tue, 9 Jul 2024 23:10:05 -0500
|
||||
Subject: [PATCH 090/100] i386/sev: Don't allow automatic fallback to legacy
|
||||
KVM_SEV*_INIT
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [90/91] 2b1345faa56f993bb6e13d63e11656c784e20412 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
Currently if the 'legacy-vm-type' property of the sev-guest object is
|
||||
'on', QEMU will attempt to use the newer KVM_SEV_INIT2 kernel
|
||||
interface in conjunction with the newer KVM_X86_SEV_VM and
|
||||
KVM_X86_SEV_ES_VM KVM VM types.
|
||||
|
||||
This can lead to measurement changes if, for instance, an SEV guest was
|
||||
created on a host that originally had an older kernel that didn't
|
||||
support KVM_SEV_INIT2, but is booted on the same host later on after the
|
||||
host kernel was upgraded.
|
||||
|
||||
Instead, if legacy-vm-type is 'off', QEMU should fail if the
|
||||
KVM_SEV_INIT2 interface is not provided by the current host kernel.
|
||||
Modify the fallback handling accordingly.
|
||||
|
||||
In the future, VMSA features and other flags might be added to QEMU
|
||||
which will require legacy-vm-type to be 'off' because they will rely
|
||||
on the newer KVM_SEV_INIT2 interface. It may be difficult to convey to
|
||||
users what values of legacy-vm-type are compatible with which
|
||||
features/options, so as part of this rework, switch legacy-vm-type to a
|
||||
tri-state OnOffAuto option. 'auto' in this case will automatically
|
||||
switch to using the newer KVM_SEV_INIT2, but only if it is required to
|
||||
make use of new VMSA features or other options only available via
|
||||
KVM_SEV_INIT2.
|
||||
|
||||
Defining 'auto' in this way would avoid inadvertantly breaking
|
||||
compatibility with older kernels since it would only be used in cases
|
||||
where users opt into newer features that are only available via
|
||||
KVM_SEV_INIT2 and newer kernels, and provide better default behavior
|
||||
than the legacy-vm-type=off behavior that was previously in place, so
|
||||
make it the default for 9.1+ machine types.
|
||||
|
||||
Cc: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
||||
cc: kvm@vger.kernel.org
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Link: https://lore.kernel.org/r/20240710041005.83720-1-michael.roth@amd.com
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 9d38d9dca2a81aaf5752d45d221021ef96d496cd)
|
||||
|
||||
RHEL: adjust compatiility setting, applying it to 9.4 machine type
|
||||
---
|
||||
hw/i386/pc.c | 2 +-
|
||||
qapi/qom.json | 18 ++++++----
|
||||
target/i386/sev.c | 85 +++++++++++++++++++++++++++++++++++++++--------
|
||||
3 files changed, 83 insertions(+), 22 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index b25d075b59..e9c5ea5d8f 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -352,7 +352,7 @@ const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat);
|
||||
GlobalProperty pc_rhel_9_5_compat[] = {
|
||||
/* pc_rhel_9_5_compat from pc_compat_pc_9_0 (backported from 9.1) */
|
||||
{ TYPE_X86_CPU, "guest-phys-bits", "0" },
|
||||
- { "sev-guest", "legacy-vm-type", "true" },
|
||||
+ { "sev-guest", "legacy-vm-type", "on" },
|
||||
};
|
||||
const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat);
|
||||
|
||||
diff --git a/qapi/qom.json b/qapi/qom.json
|
||||
index 8bd299265e..17bd5a0cf7 100644
|
||||
--- a/qapi/qom.json
|
||||
+++ b/qapi/qom.json
|
||||
@@ -912,12 +912,16 @@
|
||||
# @handle: SEV firmware handle (default: 0)
|
||||
#
|
||||
# @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM.
|
||||
-# The newer KVM_SEV_INIT2 interface syncs additional vCPU
|
||||
-# state when initializing the VMSA structures, which will
|
||||
-# result in a different guest measurement. Set this to
|
||||
-# maintain compatibility with older QEMU or kernel versions
|
||||
-# that rely on legacy KVM_SEV_INIT behavior.
|
||||
-# (default: false) (since 9.1)
|
||||
+# The newer KVM_SEV_INIT2 interface, from Linux >= 6.10, syncs
|
||||
+# additional vCPU state when initializing the VMSA structures,
|
||||
+# which will result in a different guest measurement. Set
|
||||
+# this to 'on' to force compatibility with older QEMU or kernel
|
||||
+# versions that rely on legacy KVM_SEV_INIT behavior. 'auto'
|
||||
+# will behave identically to 'on', but will automatically
|
||||
+# switch to using KVM_SEV_INIT2 if the user specifies any
|
||||
+# additional options that require it. If set to 'off', QEMU
|
||||
+# will require KVM_SEV_INIT2 unconditionally.
|
||||
+# (default: off) (since 9.1)
|
||||
#
|
||||
# Since: 2.12
|
||||
##
|
||||
@@ -927,7 +931,7 @@
|
||||
'*session-file': 'str',
|
||||
'*policy': 'uint32',
|
||||
'*handle': 'uint32',
|
||||
- '*legacy-vm-type': 'bool' } }
|
||||
+ '*legacy-vm-type': 'OnOffAuto' } }
|
||||
|
||||
##
|
||||
# @SevSnpGuestProperties:
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 491fab74fd..b921defb63 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -144,7 +144,7 @@ struct SevGuestState {
|
||||
uint32_t policy;
|
||||
char *dh_cert_file;
|
||||
char *session_file;
|
||||
- bool legacy_vm_type;
|
||||
+ OnOffAuto legacy_vm_type;
|
||||
};
|
||||
|
||||
struct SevSnpGuestState {
|
||||
@@ -1334,6 +1334,17 @@ sev_vm_state_change(void *opaque, bool running, RunState state)
|
||||
}
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * This helper is to examine sev-guest properties and determine if any options
|
||||
+ * have been set which rely on the newer KVM_SEV_INIT2 interface and associated
|
||||
+ * KVM VM types.
|
||||
+ */
|
||||
+static bool sev_init2_required(SevGuestState *sev_guest)
|
||||
+{
|
||||
+ /* Currently no KVM_SEV_INIT2-specific options are exposed via QEMU */
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
static int sev_kvm_type(X86ConfidentialGuest *cg)
|
||||
{
|
||||
SevCommonState *sev_common = SEV_COMMON(cg);
|
||||
@@ -1344,14 +1355,39 @@ static int sev_kvm_type(X86ConfidentialGuest *cg)
|
||||
goto out;
|
||||
}
|
||||
|
||||
+ /* These are the only cases where legacy VM types can be used. */
|
||||
+ if (sev_guest->legacy_vm_type == ON_OFF_AUTO_ON ||
|
||||
+ (sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO &&
|
||||
+ !sev_init2_required(sev_guest))) {
|
||||
+ sev_common->kvm_type = KVM_X86_DEFAULT_VM;
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * Newer VM types are required, either explicitly via legacy-vm-type=on, or
|
||||
+ * implicitly via legacy-vm-type=auto along with additional sev-guest
|
||||
+ * properties that require the newer VM types.
|
||||
+ */
|
||||
kvm_type = (sev_guest->policy & SEV_POLICY_ES) ?
|
||||
KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
|
||||
- if (kvm_is_vm_type_supported(kvm_type) && !sev_guest->legacy_vm_type) {
|
||||
- sev_common->kvm_type = kvm_type;
|
||||
- } else {
|
||||
- sev_common->kvm_type = KVM_X86_DEFAULT_VM;
|
||||
+ if (!kvm_is_vm_type_supported(kvm_type)) {
|
||||
+ if (sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO) {
|
||||
+ error_report("SEV: host kernel does not support requested %s VM type, which is required "
|
||||
+ "for the set of options specified. To allow use of the legacy "
|
||||
+ "KVM_X86_DEFAULT_VM VM type, please disable any options that are not "
|
||||
+ "compatible with the legacy VM type, or upgrade your kernel.",
|
||||
+ kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM");
|
||||
+ } else {
|
||||
+ error_report("SEV: host kernel does not support requested %s VM type. To allow use of "
|
||||
+ "the legacy KVM_X86_DEFAULT_VM VM type, the 'legacy-vm-type' argument "
|
||||
+ "must be set to 'on' or 'auto' for the sev-guest object.",
|
||||
+ kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM");
|
||||
+ }
|
||||
+
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
+ sev_common->kvm_type = kvm_type;
|
||||
out:
|
||||
return sev_common->kvm_type;
|
||||
}
|
||||
@@ -1442,14 +1478,24 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
}
|
||||
|
||||
trace_kvm_sev_init();
|
||||
- if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) {
|
||||
+ switch (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common))) {
|
||||
+ case KVM_X86_DEFAULT_VM:
|
||||
cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT;
|
||||
|
||||
ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error);
|
||||
- } else {
|
||||
+ break;
|
||||
+ case KVM_X86_SEV_VM:
|
||||
+ case KVM_X86_SEV_ES_VM:
|
||||
+ case KVM_X86_SNP_VM: {
|
||||
struct kvm_sev_init args = { 0 };
|
||||
|
||||
ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error);
|
||||
+ break;
|
||||
+ }
|
||||
+ default:
|
||||
+ error_setg(errp, "%s: host kernel does not support the requested SEV configuration.",
|
||||
+ __func__);
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
@@ -2037,14 +2083,23 @@ sev_guest_set_session_file(Object *obj, const char *value, Error **errp)
|
||||
SEV_GUEST(obj)->session_file = g_strdup(value);
|
||||
}
|
||||
|
||||
-static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp)
|
||||
+static void sev_guest_get_legacy_vm_type(Object *obj, Visitor *v,
|
||||
+ const char *name, void *opaque,
|
||||
+ Error **errp)
|
||||
{
|
||||
- return SEV_GUEST(obj)->legacy_vm_type;
|
||||
+ SevGuestState *sev_guest = SEV_GUEST(obj);
|
||||
+ OnOffAuto legacy_vm_type = sev_guest->legacy_vm_type;
|
||||
+
|
||||
+ visit_type_OnOffAuto(v, name, &legacy_vm_type, errp);
|
||||
}
|
||||
|
||||
-static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp)
|
||||
+static void sev_guest_set_legacy_vm_type(Object *obj, Visitor *v,
|
||||
+ const char *name, void *opaque,
|
||||
+ Error **errp)
|
||||
{
|
||||
- SEV_GUEST(obj)->legacy_vm_type = value;
|
||||
+ SevGuestState *sev_guest = SEV_GUEST(obj);
|
||||
+
|
||||
+ visit_type_OnOffAuto(v, name, &sev_guest->legacy_vm_type, errp);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -2070,9 +2125,9 @@ sev_guest_class_init(ObjectClass *oc, void *data)
|
||||
sev_guest_set_session_file);
|
||||
object_class_property_set_description(oc, "session-file",
|
||||
"guest owners session parameters (encoded with base64)");
|
||||
- object_class_property_add_bool(oc, "legacy-vm-type",
|
||||
- sev_guest_get_legacy_vm_type,
|
||||
- sev_guest_set_legacy_vm_type);
|
||||
+ object_class_property_add(oc, "legacy-vm-type", "OnOffAuto",
|
||||
+ sev_guest_get_legacy_vm_type,
|
||||
+ sev_guest_set_legacy_vm_type, NULL, NULL);
|
||||
object_class_property_set_description(oc, "legacy-vm-type",
|
||||
"use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions.");
|
||||
}
|
||||
@@ -2088,6 +2143,8 @@ sev_guest_instance_init(Object *obj)
|
||||
object_property_add_uint32_ptr(obj, "policy", &sev_guest->policy,
|
||||
OBJ_PROP_FLAG_READWRITE);
|
||||
object_apply_compat_props(obj);
|
||||
+
|
||||
+ sev_guest->legacy_vm_type = ON_OFF_AUTO_AUTO;
|
||||
}
|
||||
|
||||
/* guest info specific sev/sev-es */
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,46 @@
|
||||
From ebb3c3536366c383fa09b0987a4efb68d018b7b8 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <michael.roth@amd.com>
|
||||
Date: Thu, 30 May 2024 06:16:24 -0500
|
||||
Subject: [PATCH 064/100] i386/sev: Don't return launch measurements for
|
||||
SEV-SNP guests
|
||||
|
||||
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-MergeRequest: 245: SEV-SNP support
|
||||
RH-Jira: RHEL-39544
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [64/91] 5a29bb2d8b5a07aec6fd271ec37345e665e9cce4 (bonzini/rhel-qemu-kvm)
|
||||
|
||||
For SEV-SNP guests, launch measurement is queried from within the guest
|
||||
during attestation, so don't attempt to return it as part of
|
||||
query-sev-launch-measure.
|
||||
|
||||
Signed-off-by: Michael Roth <michael.roth@amd.com>
|
||||
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
||||
Message-ID: <20240530111643.1091816-13-pankaj.gupta@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit 73ae63b162fc1fed520f53ad200712964d7d0264)
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 6525b3c1a0..c3daaf1ad5 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -795,7 +795,9 @@ sev_launch_get_measure(Notifier *notifier, void *unused)
|
||||
|
||||
static char *sev_get_launch_measurement(void)
|
||||
{
|
||||
- SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs);
|
||||
+ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
|
||||
+ SevGuestState *sev_guest =
|
||||
+ (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST);
|
||||
|
||||
if (sev_guest &&
|
||||
SEV_COMMON(sev_guest)->state >= SEV_STATE_LAUNCH_SECRET) {
|
||||
--
|
||||
2.39.3
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue