diff --git a/.gitignore b/.gitignore index ded41b6..49421e7 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/qemu-8.2.0.tar.xz +SOURCES/qemu-9.0.0.tar.xz diff --git a/.qemu-kvm.metadata b/.qemu-kvm.metadata index 4a22f24..437fb86 100644 --- a/.qemu-kvm.metadata +++ b/.qemu-kvm.metadata @@ -1 +1 @@ -1615e59b1bd68324e0819245fe003e33c14a52f9 SOURCES/qemu-8.2.0.tar.xz +6699bb03d6da21159b89668bca01c6c958b95d07 SOURCES/qemu-9.0.0.tar.xz diff --git a/SOURCES/0004-Initial-redhat-build.patch b/SOURCES/0004-Initial-redhat-build.patch index a63b5c3..49991a2 100644 --- a/SOURCES/0004-Initial-redhat-build.patch +++ b/SOURCES/0004-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From faae70a870156f86a5cf55ca967b15d7612941ff Mon Sep 17 00:00:00 2001 +From ea7dff3dbf979d7d8a85a16cf5187235143e1048 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 26 May 2021 10:56:02 +0200 Subject: Initial redhat build @@ -13,7 +13,7 @@ several issues are fixed in QEMU tree: We disable make check due to issues with some of the tests. -This rebase is based on qemu-kvm-8.1.0-5.el9 +This rebase is based on qemu-kvm-8.2.0-11.el9 Signed-off-by: Miroslav Rezanina -- @@ -83,6 +83,12 @@ Rebase changes (8.2.0): - Added --disable-plugins configure option - Fixing frh.py strings +Rebase notes (9.0.0): +- Fixed qemu-kvm binary location change +- Remove hppa-firmware64.img +- Package stp files for utilities +- Download subprojects on local build + Merged patches (6.0.0): - 605758c902 Limit build on Power to qemu-img and qemu-ga only @@ -193,14 +199,17 @@ Merged patches (8.1.0): Merged patches (8.2.0): - cd9efa221d Enable qemu-kvm-device-usb-redirec for aarch64 +Merged patches (9.0.0 rc0): +- 25de053dbf spec: Enable zstd + Signed-off-by: Miroslav Rezanina --- - .distro/Makefile | 100 + + .distro/Makefile | 101 + .distro/Makefile.common | 42 + .distro/README.tests | 39 + .distro/modules-load.conf | 4 + .distro/qemu-guest-agent.service | 1 - - .distro/qemu-kvm.spec.template | 4909 +++++++++++++++++++++++ + .distro/qemu-kvm.spec.template | 5170 +++++++++++++++++++++++ .distro/rpminspect.yaml | 6 +- .distro/scripts/extract_build_cmd.py | 12 + .distro/scripts/frh.py | 4 +- @@ -211,7 +220,7 @@ Signed-off-by: Miroslav Rezanina scripts/systemtap/conf.d/qemu_kvm.conf | 4 + scripts/systemtap/script.d/qemu_kvm.stp | 1 + ui/vnc-auth-sasl.c | 2 +- - 16 files changed, 5168 insertions(+), 6 deletions(-) + 16 files changed, 5430 insertions(+), 6 deletions(-) create mode 100644 .distro/Makefile create mode 100644 .distro/Makefile.common create mode 100644 .distro/README.tests diff --git a/SOURCES/0005-Enable-disable-devices-for-RHEL.patch b/SOURCES/0005-Enable-disable-devices-for-RHEL.patch index e23ec51..61e84a1 100644 --- a/SOURCES/0005-Enable-disable-devices-for-RHEL.patch +++ b/SOURCES/0005-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 048067b4618ba1fa7c8c517185d4cd3a675eba72 Mon Sep 17 00:00:00 2001 +From 780c39975b059deaee106775b6e3a240155acea3 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 7 Dec 2022 03:05:48 -0500 Subject: Enable/disable devices for RHEL @@ -47,6 +47,12 @@ Rebase notes (8.2.0): - Disable new neoverse-v2 - Removed CONFIG_OPENGL from x86_64 config file +Rebase notes (9.0.0 rc0): +- Split CONFIG_IDE_QDEV to CONFIG_IDE_DEV and CONFIG_IDE_BUS (upstream change) + +Rebase notes (9.0.0 rc1): +- Do not compile armv7 cpu types + Merged patches (6.1.0): - c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak - 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI @@ -74,36 +80,41 @@ Merged patches (8.1.0): Merged patches (8.2.0): - b29f66431f Enable igb on x86_64 + +Merged patches (9.0.0 rc0): +- 3889ede5d9 Compile IOMMUFD on x86_64 +- 0beb18451f Compile IOMMUFD on s390x +- 2b4b13f70d Compile IOMMUFD object on aarch64 --- .distro/qemu-kvm.spec.template | 18 +-- - .../aarch64-softmmu/aarch64-rh-devices.mak | 41 +++++++ + .../aarch64-softmmu/aarch64-rh-devices.mak | 42 +++++++ .../ppc64-softmmu/ppc64-rh-devices.mak | 37 ++++++ configs/devices/rh-virtio.mak | 10 ++ - .../s390x-softmmu/s390x-rh-devices.mak | 18 +++ - .../x86_64-softmmu/x86_64-rh-devices.mak | 110 ++++++++++++++++++ + .../s390x-softmmu/s390x-rh-devices.mak | 19 +++ + .../x86_64-softmmu/x86_64-rh-devices.mak | 112 ++++++++++++++++++ hw/arm/virt.c | 2 + hw/block/fdc.c | 10 ++ hw/cpu/meson.build | 3 +- hw/cxl/meson.build | 3 +- hw/display/cirrus_vga.c | 4 + hw/ide/piix.c | 5 +- - hw/ide/qdev.c | 9 ++ hw/input/pckbd.c | 2 + hw/net/e1000.c | 2 + hw/ppc/spapr_cpu_core.c | 2 + hw/usb/meson.build | 2 +- - hw/virtio/meson.build | 5 +- + hw/virtio/meson.build | 6 +- target/arm/arm-qmp-cmds.c | 2 + target/arm/cpu.c | 4 + target/arm/cpu.h | 3 + target/arm/cpu64.c | 12 +- target/arm/tcg/cpu32.c | 2 + target/arm/tcg/cpu64.c | 8 ++ + target/arm/tcg/meson.build | 4 +- target/ppc/cpu-models.c | 9 ++ target/s390x/cpu_models_sysemu.c | 3 + target/s390x/kvm/kvm.c | 8 ++ tests/qtest/arm-cpu-features.c | 4 + - 28 files changed, 323 insertions(+), 15 deletions(-) + 28 files changed, 321 insertions(+), 17 deletions(-) create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak create mode 100644 configs/devices/rh-virtio.mak @@ -112,10 +123,10 @@ Merged patches (8.2.0): diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak new file mode 100644 -index 0000000000..aec1831199 +index 0000000000..b0191d3c69 --- /dev/null +++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -@@ -0,0 +1,43 @@ +@@ -0,0 +1,42 @@ +include ../rh-virtio.mak + +CONFIG_ARM_GIC_KVM=y @@ -126,13 +137,11 @@ index 0000000000..aec1831199 +CONFIG_CXL=y +CONFIG_CXL_MEM_DEVICE=y +CONFIG_EDID=y -+CONFIG_IVSHMEM_DEVICE=y +CONFIG_PCIE_PORT=y +CONFIG_PCIE_PCI_BRIDGE=y +CONFIG_PCI_DEVICES=y +CONFIG_PCI_TESTDEV=y +CONFIG_PFLASH_CFI01=y -+CONFIG_QXL=y +CONFIG_SCSI=y +CONFIG_SEMIHOSTING=y +CONFIG_USB=y @@ -159,6 +168,7 @@ index 0000000000..aec1831199 +CONFIG_VHOST_VSOCK=y +CONFIG_VHOST_USER_VSOCK=y +CONFIG_VHOST_USER_FS=y ++CONFIG_IOMMUFD=y diff --git a/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak new file mode 100644 index 0000000000..dbb7d30829 @@ -220,10 +230,10 @@ index 0000000000..94ede1b5f6 +CONFIG_VIRTIO_SERIAL=y diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak new file mode 100644 -index 0000000000..69a799adbd +index 0000000000..24cf6dbd03 --- /dev/null +++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak -@@ -0,0 +1,18 @@ +@@ -0,0 +1,19 @@ +include ../rh-virtio.mak + +CONFIG_PCI=y @@ -242,9 +252,10 @@ index 0000000000..69a799adbd +CONFIG_VHOST_VSOCK=y +CONFIG_VHOST_USER_VSOCK=y +CONFIG_VHOST_USER_FS=y ++CONFIG_IOMMUFD=y diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..ce5be73633 +index 0000000000..d60ff1bcfc --- /dev/null +++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak @@ -0,0 +1,112 @@ @@ -285,14 +296,14 @@ index 0000000000..ce5be73633 +CONFIG_IDE_CORE=y +CONFIG_IDE_PCI=y +CONFIG_IDE_PIIX=y -+CONFIG_IDE_QDEV=y ++CONFIG_IDE_DEV=y ++CONFIG_IDE_BUS=y +CONFIG_IGB_PCI_EXPRESS=y +CONFIG_IOAPIC=y +CONFIG_IOH3420=y +CONFIG_ISA_BUS=y +CONFIG_ISA_DEBUG=y +CONFIG_ISA_TESTDEV=y -+CONFIG_IVSHMEM_DEVICE=y +CONFIG_LPC_ICH9=y +CONFIG_MC146818RTC=y +CONFIG_MEM_DEVICE=y @@ -314,7 +325,6 @@ index 0000000000..ce5be73633 +CONFIG_PFLASH_CFI01=y +CONFIG_PVPANIC_ISA=y +CONFIG_PXB=y -+CONFIG_QXL=y +CONFIG_Q35=y +CONFIG_RTL8139_PCI=y +CONFIG_SCSI=y @@ -360,28 +370,29 @@ index 0000000000..ce5be73633 +CONFIG_VHOST_VSOCK=y +CONFIG_VHOST_USER_VSOCK=y +CONFIG_VHOST_USER_FS=y ++CONFIG_IOMMUFD=y diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index be2856c018..af9ea4dd1c 100644 +index a9a913aead..6c6d155002 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -205,6 +205,7 @@ static const int a15irqmap[] = { - }; - - static const char *valid_cpus[] = { +@@ -2954,6 +2954,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + MachineClass *mc = MACHINE_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); + static const char * const valid_cpu_types[] = { +#if 0 /* Disabled for Red Hat Enterprise Linux */ #ifdef CONFIG_TCG - ARM_CPU_TYPE_NAME("cortex-a7"), - ARM_CPU_TYPE_NAME("cortex-a15"), -@@ -219,6 +220,7 @@ static const char *valid_cpus[] = { - ARM_CPU_TYPE_NAME("neoverse-n2"), - #endif - ARM_CPU_TYPE_NAME("cortex-a53"), + ARM_CPU_TYPE_NAME("cortex-a7"), + ARM_CPU_TYPE_NAME("cortex-a15"), +@@ -2971,6 +2972,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + #endif /* CONFIG_TCG */ + #ifdef TARGET_AARCH64 + ARM_CPU_TYPE_NAME("cortex-a53"), +#endif /* disabled for RHEL */ - ARM_CPU_TYPE_NAME("cortex-a57"), - ARM_CPU_TYPE_NAME("host"), - ARM_CPU_TYPE_NAME("max"), + ARM_CPU_TYPE_NAME("cortex-a57"), + #if defined(CONFIG_KVM) || defined(CONFIG_HVF) + ARM_CPU_TYPE_NAME("host"), diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index d7cc4d3ec1..12d0a60905 100644 +index 6dd94e98bc..a05757fc9a 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -49,6 +49,8 @@ @@ -409,7 +420,7 @@ index d7cc4d3ec1..12d0a60905 100644 error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); return; diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build -index 6d319947ca..91962fd863 100644 +index 38cdcfbe57..e588ecfd42 100644 --- a/hw/cpu/meson.build +++ b/hw/cpu/meson.build @@ -1,4 +1,5 @@ @@ -420,7 +431,7 @@ index 6d319947ca..91962fd863 100644 system_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) system_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) diff --git a/hw/cxl/meson.build b/hw/cxl/meson.build -index ea0aebf6e3..6878f06974 100644 +index 3e375f61a9..613adb3ebb 100644 --- a/hw/cxl/meson.build +++ b/hw/cxl/meson.build @@ -6,7 +6,8 @@ system_ss.add(when: 'CONFIG_CXL', @@ -434,7 +445,7 @@ index ea0aebf6e3..6878f06974 100644 if_false: files( 'cxl-host-stubs.c', diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index b80f98b6c4..0370cf8a64 100644 +index 150883a971..497365bd80 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c @@ -36,6 +36,7 @@ @@ -456,10 +467,10 @@ index b80f98b6c4..0370cf8a64 100644 * Follow real hardware, cirrus card emulated has 4 MB video memory. * Also accept 8 MB/16 MB for backward compatibility. diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index 4e5e12935f..03ca06bb17 100644 +index 80efc633d3..9cb82b8eea 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -190,7 +190,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -191,7 +191,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -469,7 +480,7 @@ index 4e5e12935f..03ca06bb17 100644 } static const TypeInfo piix3_ide_info = { -@@ -214,6 +215,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -215,6 +216,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -478,57 +489,11 @@ index 4e5e12935f..03ca06bb17 100644 } static const TypeInfo piix4_ide_info = { -diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c -index 1b3b4da01d..454bfa5783 100644 ---- a/hw/ide/qdev.c -+++ b/hw/ide/qdev.c -@@ -283,10 +283,13 @@ static void ide_cd_realize(IDEDevice *dev, Error **errp) - ide_dev_initfn(dev, IDE_CD, errp); - } - -+/* Disabled for Red Hat Enterprise Linux */ -+#if 0 - static void ide_cf_realize(IDEDevice *dev, Error **errp) - { - ide_dev_initfn(dev, IDE_CFATA, errp); - } -+#endif - - #define DEFINE_IDE_DEV_PROPERTIES() \ - DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \ -@@ -346,6 +349,8 @@ static const TypeInfo ide_cd_info = { - .class_init = ide_cd_class_init, - }; - -+/* Disabled for Red Hat Enterprise Linux */ -+#if 0 - static Property ide_cf_properties[] = { - DEFINE_IDE_DEV_PROPERTIES(), - DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf), -@@ -371,6 +376,7 @@ static const TypeInfo ide_cf_info = { - .instance_size = sizeof(IDEDrive), - .class_init = ide_cf_class_init, - }; -+#endif - - static void ide_device_class_init(ObjectClass *klass, void *data) - { -@@ -396,7 +402,10 @@ static void ide_register_types(void) - type_register_static(&ide_bus_info); - type_register_static(&ide_hd_info); - type_register_static(&ide_cd_info); -+/* Disabled for Red Hat Enterprise Linux */ -+#if 0 - type_register_static(&ide_cf_info); -+#endif - type_register_static(&ide_device_type_info); - } - diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index b92b63bedc..3b6235dde6 100644 +index 74f10b640f..2e85ecf476 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c -@@ -957,6 +957,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) +@@ -952,6 +952,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) dc->vmsd = &vmstate_kbd_isa; adevc->build_dev_aml = i8042_build_aml; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); @@ -538,7 +503,7 @@ index b92b63bedc..3b6235dde6 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index 8ffe1077f1..b3dfeeca4f 100644 +index 43f3a4a701..267f182883 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -1746,6 +1746,7 @@ static const E1000Info e1000_devices[] = { @@ -558,10 +523,10 @@ index 8ffe1077f1..b3dfeeca4f 100644 static void e1000_register_types(void) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 91fae56573..33e0c8724c 100644 +index e7c9edd033..3b0a47a28c 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -386,10 +386,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { +@@ -389,10 +389,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { .instance_size = sizeof(SpaprCpuCore), .class_size = sizeof(SpaprCpuCoreClass), }, @@ -569,16 +534,16 @@ index 91fae56573..33e0c8724c 100644 DEFINE_SPAPR_CPU_CORE_TYPE("970_v2.2"), DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.0"), DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.1"), - DEFINE_SPAPR_CPU_CORE_TYPE("power5+_v2.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power5p_v2.1"), +#endif DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"), - DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power7p_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/meson.build b/hw/usb/meson.build -index e94149ebde..4a8adbf3dc 100644 +index aac3bb35f2..5411ff35df 100644 --- a/hw/usb/meson.build +++ b/hw/usb/meson.build -@@ -52,7 +52,7 @@ system_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reader +@@ -55,7 +55,7 @@ system_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reader if cacard.found() usbsmartcard_ss = ss.source_set() usbsmartcard_ss.add(when: 'CONFIG_USB_SMARTCARD', @@ -588,26 +553,34 @@ index e94149ebde..4a8adbf3dc 100644 endif diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build -index c0055a7832..12e1d6c67e 100644 +index d7f18c96e6..aaabbb8b0b 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build -@@ -17,8 +17,9 @@ if have_vhost - if have_vhost_user - # fixme - this really should be generic - specific_virtio_ss.add(files('vhost-user.c')) +@@ -20,7 +20,8 @@ if have_vhost + system_virtio_ss.add(files('vhost-user-base.c')) + + # MMIO Stubs - system_virtio_ss.add(files('vhost-user-device.c')) -- system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) +# Disabled for 8.2.0 rebase for RHEL 9.4.0 +# system_virtio_ss.add(files('vhost-user-device.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) +@@ -28,7 +29,8 @@ if have_vhost + system_virtio_ss.add(when: 'CONFIG_VHOST_USER_INPUT', if_true: files('vhost-user-input.c')) + + # PCI Stubs +- system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) ++# Disabled for 8.2.0 rebase for RHEL 9.4.0 +# system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) - endif - if have_vhost_vdpa - system_virtio_ss.add(files('vhost-vdpa.c')) + system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], + if_true: files('vhost-user-gpio-pci.c')) + system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_I2C'], diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c -index b53d5efe13..64989a02d1 100644 +index 3cc8cc738b..6f21fea1f5 100644 --- a/target/arm/arm-qmp-cmds.c +++ b/target/arm/arm-qmp-cmds.c -@@ -231,6 +231,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, +@@ -223,6 +223,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, static void arm_cpu_add_definition(gpointer data, gpointer user_data) { ObjectClass *oc = data; @@ -615,23 +588,23 @@ index b53d5efe13..64989a02d1 100644 CpuDefinitionInfoList **cpu_list = user_data; CpuDefinitionInfo *info; const char *typename; -@@ -240,6 +241,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) - info->name = g_strndup(typename, - strlen(typename) - strlen("-" TYPE_ARM_CPU)); +@@ -231,6 +232,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) + info = g_malloc0(sizeof(*info)); + info->name = cpu_model_from_type(typename); info->q_typename = g_strdup(typename); + info->deprecated = !!cc->deprecation_note; QAPI_LIST_PREPEND(*cpu_list, info); } diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index efb22a87f9..a32521ada9 100644 +index ab8d007a86..e5dce20f19 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c -@@ -2524,6 +2524,10 @@ static void cpu_register_class_init(ObjectClass *oc, void *data) +@@ -2546,6 +2546,10 @@ static void cpu_register_class_init(ObjectClass *oc, void *data) acc->info = data; cc->gdb_core_xml_file = "arm-core.xml"; -+ ++ + if (acc->info->deprecation_note) { + cc->deprecation_note = acc->info->deprecation_note; + } @@ -639,10 +612,10 @@ index efb22a87f9..a32521ada9 100644 void arm_cpu_register(const ARMCPUInfo *info) diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index a0282e0d28..7e0f0dfea7 100644 +index bc0c84873f..e9472c8bb8 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h -@@ -34,6 +34,8 @@ +@@ -37,6 +37,8 @@ #define KVM_HAVE_MCE_INJECTION 1 #endif @@ -651,7 +624,7 @@ index a0282e0d28..7e0f0dfea7 100644 #define EXCP_UDEF 1 /* undefined instruction */ #define EXCP_SWI 2 /* software interrupt */ #define EXCP_PREFETCH_ABORT 3 -@@ -1120,6 +1122,7 @@ typedef struct ARMCPUInfo { +@@ -1092,6 +1094,7 @@ typedef struct ARMCPUInfo { const char *name; void (*initfn)(Object *obj); void (*class_init)(ObjectClass *oc, void *data); @@ -660,7 +633,7 @@ index a0282e0d28..7e0f0dfea7 100644 /** diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index 1e9c6c85ae..10be900803 100644 +index 985b1efe16..46a4e80171 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -648,6 +648,7 @@ static void aarch64_a57_initfn(Object *obj) @@ -692,7 +665,7 @@ index 1e9c6c85ae..10be900803 100644 { .name = "max", .initfn = aarch64_max_initfn }, #if defined(CONFIG_KVM) || defined(CONFIG_HVF) { .name = "host", .initfn = aarch64_host_initfn }, -@@ -815,8 +820,13 @@ static void aarch64_cpu_instance_init(Object *obj) +@@ -814,8 +819,13 @@ static void aarch64_cpu_instance_init(Object *obj) static void cpu_register_class_init(ObjectClass *oc, void *data) { ARMCPUClass *acc = ARM_CPU_CLASS(oc); @@ -707,24 +680,24 @@ index 1e9c6c85ae..10be900803 100644 void aarch64_cpu_register(const ARMCPUInfo *info) diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c -index d9e0e2a4dd..c5c639a6ea 100644 +index de8f2be941..8896295ae3 100644 --- a/target/arm/tcg/cpu32.c +++ b/target/arm/tcg/cpu32.c -@@ -98,6 +98,7 @@ void aa32_max_features(ARMCPU *cpu) +@@ -92,6 +92,7 @@ void aa32_max_features(ARMCPU *cpu) + cpu->isar.id_dfr1 = t; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ /* CPU models. These are not needed for the AArch64 linux-user build. */ #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - #if !defined(CONFIG_USER_ONLY) - static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) - { -@@ -1189,3 +1190,4 @@ static void arm_tcg_cpu_register_types(void) +@@ -1037,3 +1038,4 @@ static void arm_tcg_cpu_register_types(void) type_init(arm_tcg_cpu_register_types) #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ +#endif /* disabled for RHEL */ diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c -index fcda99e158..bd5a993ff8 100644 +index 9f7a9f3d2c..7ec6851c9c 100644 --- a/target/arm/tcg/cpu64.c +++ b/target/arm/tcg/cpu64.c @@ -29,6 +29,7 @@ @@ -759,7 +732,7 @@ index fcda99e158..bd5a993ff8 100644 /* * -cpu max: a CPU with as many features enabled as our emulation supports. -@@ -1259,6 +1263,7 @@ void aarch64_max_tcg_initfn(Object *obj) +@@ -1271,6 +1275,7 @@ void aarch64_max_tcg_initfn(Object *obj) qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property); } @@ -767,7 +740,7 @@ index fcda99e158..bd5a993ff8 100644 static const ARMCPUInfo aarch64_cpus[] = { { .name = "cortex-a35", .initfn = aarch64_a35_initfn }, { .name = "cortex-a55", .initfn = aarch64_a55_initfn }, -@@ -1270,14 +1275,17 @@ static const ARMCPUInfo aarch64_cpus[] = { +@@ -1282,14 +1287,17 @@ static const ARMCPUInfo aarch64_cpus[] = { { .name = "neoverse-v1", .initfn = aarch64_neoverse_v1_initfn }, { .name = "neoverse-n2", .initfn = aarch64_neoverse_n2_initfn }, }; @@ -785,8 +758,20 @@ index fcda99e158..bd5a993ff8 100644 } type_init(aarch64_cpu_register_types) +diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build +index 3b1a9f0fc5..6898b4de6f 100644 +--- a/target/arm/tcg/meson.build ++++ b/target/arm/tcg/meson.build +@@ -56,5 +56,5 @@ arm_system_ss.add(files( + 'psci.c', + )) + +-arm_system_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('cpu-v7m.c')) +-arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files('cpu-v7m.c')) ++#arm_system_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('cpu-v7m.c')) ++#arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files('cpu-v7m.c')) diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 7dbb47de64..69fddb05bc 100644 +index f2301b43f7..f77ebfcc81 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -66,6 +66,7 @@ @@ -810,13 +795,13 @@ index 7dbb47de64..69fddb05bc 100644 POWERPC_DEF("970fx_v1.0", CPU_POWERPC_970FX_v10, 970, @@ -718,6 +721,7 @@ "PowerPC 970MP v1.1") - POWERPC_DEF("power5+_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P, + POWERPC_DEF("power5p_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P, "POWER5+ v2.1") +#endif POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, "POWER7 v2.3") - POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, -@@ -898,12 +902,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + POWERPC_DEF("power7p_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, +@@ -894,13 +898,16 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "7447a", "7447a_v1.2" }, { "7457a", "7457a_v1.2" }, { "apollo7pm", "7457a_v1.0" }, @@ -826,13 +811,14 @@ index 7dbb47de64..69fddb05bc 100644 { "970", "970_v2.2" }, { "970fx", "970fx_v3.1" }, { "970mp", "970mp_v1.1" }, - { "power5+", "power5+_v2.1" }, + { "power5+", "power5p_v2.1" }, + { "power5+_v2.1", "power5p_v2.1" }, { "power5gs", "power5+_v2.1" }, +#endif { "power7", "power7_v2.3" }, - { "power7+", "power7+_v2.1" }, - { "power8e", "power8e_v2.1" }, -@@ -913,12 +920,14 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "power7+", "power7p_v2.1" }, + { "power7+_v2.1", "power7p_v2.1" }, +@@ -911,12 +918,14 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "power10", "power10_v2.0" }, #endif @@ -848,10 +834,10 @@ index 7dbb47de64..69fddb05bc 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c -index 63981bf36b..87a4480c05 100644 +index 2d99218069..0728bfcc20 100644 --- a/target/s390x/cpu_models_sysemu.c +++ b/target/s390x/cpu_models_sysemu.c -@@ -35,6 +35,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, +@@ -34,6 +34,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, (max_model->def->gen == model->def->gen && max_model->def->ec_ga < model->def->ec_ga)) { list_add_feat("type", unavailable); @@ -862,10 +848,10 @@ index 63981bf36b..87a4480c05 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c -index 33ab3551f4..912e493951 100644 +index 4ce809c5d4..55fb4855b1 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c -@@ -2567,6 +2567,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2565,6 +2565,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } @@ -881,10 +867,10 @@ index 33ab3551f4..912e493951 100644 prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c -index a8a4c668ad..2458cc527c 100644 +index 9d6e6190d5..f822526acb 100644 --- a/tests/qtest/arm-cpu-features.c +++ b/tests/qtest/arm-cpu-features.c -@@ -451,8 +451,10 @@ static void test_query_cpu_model_expansion(const void *data) +@@ -452,8 +452,10 @@ static void test_query_cpu_model_expansion(const void *data) assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); /* Test expected feature presence/absence for some cpu types */ @@ -895,7 +881,7 @@ index a8a4c668ad..2458cc527c 100644 /* Enabling and disabling pmu should always work. */ assert_has_feature_enabled(qts, "max", "pmu"); -@@ -469,6 +471,7 @@ static void test_query_cpu_model_expansion(const void *data) +@@ -470,6 +472,7 @@ static void test_query_cpu_model_expansion(const void *data) assert_has_feature_enabled(qts, "cortex-a57", "pmu"); assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); @@ -903,7 +889,7 @@ index a8a4c668ad..2458cc527c 100644 assert_has_feature_enabled(qts, "a64fx", "pmu"); assert_has_feature_enabled(qts, "a64fx", "aarch64"); /* -@@ -481,6 +484,7 @@ static void test_query_cpu_model_expansion(const void *data) +@@ -482,6 +485,7 @@ static void test_query_cpu_model_expansion(const void *data) "{ 'sve384': true }"); assert_error(qts, "a64fx", "cannot enable sve640", "{ 'sve640': true }"); diff --git a/SOURCES/0006-Machine-type-related-general-changes.patch b/SOURCES/0006-Machine-type-related-general-changes.patch index 4a4c6fb..e0c3795 100644 --- a/SOURCES/0006-Machine-type-related-general-changes.patch +++ b/SOURCES/0006-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From d9ff466c980d219ebf230ea24becce294c196f1f Mon Sep 17 00:00:00 2001 +From 8e6a30073f9c1a5d6294b2d16556522453e227e7 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -26,6 +26,12 @@ Rebase notes (7.1.0): Rebase notes (8.1.0): - Do not modify unused vga-isa.c +Rebase notes (9.0.0 rc0): +- Updated smsbios handling + +Rebase notes (9.0.0 rc4): +- Moving downstream compat changes + Merged patches (6.1.0): - f2fb42a3c6 redhat: add missing entries in hw_compat_rhel_8_4 - 1949ec258e hw/arm/virt: Disable PL011 clock migration through hw_compat_rhel_8_3 @@ -61,24 +67,27 @@ Merged patches (8.1.0): Merged patches (8.2.0): - 4ee284aca9 Add machine types compat bits. (partial) + +Merged patches (9.0.0 rc0): +- 4b8fe42abc virtio-mem: default-enable "dynamic-memslots" --- hw/acpi/piix4.c | 2 +- hw/arm/virt.c | 2 +- - hw/core/machine.c | 267 +++++++++++++++++++++++++++++++++++ - hw/i386/pc_piix.c | 2 + - hw/i386/pc_q35.c | 2 + + hw/core/machine.c | 269 +++++++++++++++++++++++++++++++++++ + hw/i386/fw_cfg.c | 3 +- hw/net/rtl8139.c | 4 +- hw/smbios/smbios.c | 46 +++++- hw/timer/i8254_common.c | 2 +- hw/usb/hcd-xhci-pci.c | 59 ++++++-- hw/usb/hcd-xhci-pci.h | 1 + + hw/virtio/virtio-mem.c | 3 +- include/hw/boards.h | 40 ++++++ - include/hw/firmware/smbios.h | 5 +- + include/hw/firmware/smbios.h | 4 +- include/hw/i386/pc.h | 3 + - 13 files changed, 413 insertions(+), 22 deletions(-) + 13 files changed, 414 insertions(+), 24 deletions(-) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index dd523d2e4c..5050c0ba97 100644 +index debe1adb84..e8ddcd716e 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -245,7 +245,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) @@ -88,28 +97,28 @@ index dd523d2e4c..5050c0ba97 100644 - .minimum_version_id = 3, + .minimum_version_id = 2, .post_load = vmstate_acpi_post_load, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index af9ea4dd1c..62f0f7d4d6 100644 +index 6c6d155002..36e9b4b4e9 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -1638,7 +1638,7 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1651,7 +1651,7 @@ static void virt_build_smbios(VirtMachineState *vms) smbios_set_defaults("QEMU", product, - vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, -- true, SMBIOS_ENTRY_POINT_TYPE_64); -+ true, NULL, NULL, SMBIOS_ENTRY_POINT_TYPE_64); + vmc->smbios_old_sys_ver ? "1.0" : mc->name, +- true); ++ true, NULL, NULL); /* build the array of physical mem area from base_memmap */ mem_array.address = vms->memmap[VIRT_MEM].base; diff --git a/hw/core/machine.c b/hw/core/machine.c -index 0c17398141..446601ee30 100644 +index 37ede0e7d4..695cb89a46 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -57,6 +57,273 @@ GlobalProperty hw_compat_7_2[] = { +@@ -296,6 +296,275 @@ GlobalProperty hw_compat_2_1[] = { }; - const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); + const size_t hw_compat_2_1_len = G_N_ELEMENTS(hw_compat_2_1); +/* + * RHEL only: machine types for previous major releases are deprecated @@ -132,6 +141,8 @@ index 0c17398141..446601ee30 100644 + { "vfio-pci-nohotplug", "x-ramfb-migrate", "off" }, + /* hw_compat_rhel_9_4 from hw_compat_8_1 */ + { "igb", "x-pcie-flr-init", "off" }, ++ /* hw_compat_rhel_9_4 jira RHEL-24045 */ ++ { "virtio-mem", "dynamic-memslots", "off" }, +}; +const size_t hw_compat_rhel_9_4_len = G_N_ELEMENTS(hw_compat_rhel_9_4); + @@ -378,37 +389,25 @@ index 0c17398141..446601ee30 100644 +}; +const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); + - GlobalProperty hw_compat_7_1[] = { - { "virtio-device", "queue_reset", "false" }, - { "virtio-rng-pci", "vectors", "0" }, -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index eace854335..2a9f465619 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -238,6 +238,8 @@ static void pc_init1(MachineState *machine, - smbios_set_defaults("QEMU", mc->desc, - mc->name, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, -+ pcmc->smbios_stream_product, -+ pcmc->smbios_stream_version, - pcms->smbios_entry_point_type); - } + MachineState *current_machine; -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 4f3e5412f6..912cb0c0dc 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -206,6 +206,8 @@ static void pc_q35_init(MachineState *machine) - smbios_set_defaults("QEMU", mc->desc, - mc->name, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, -+ pcmc->smbios_stream_product, -+ pcmc->smbios_stream_version, - pcms->smbios_entry_point_type); + static char *machine_get_kernel(Object *obj, Error **errp) +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index d802d2787f..c7aa39a13e 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -64,7 +64,8 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ + smbios_set_defaults("QEMU", mc->desc, mc->name, +- pcmc->smbios_uuid_encoded); ++ pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, pcmc->smbios_stream_version); } + /* tell smbios about cpuid version and features */ diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 4af8c66266..7dc12907ab 100644 +index 897c86ec41..2d0db43f49 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -3169,7 +3169,7 @@ static int rtl8139_pre_save(void *opaque) @@ -431,20 +430,21 @@ index 4af8c66266..7dc12907ab 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 2a90601ac5..7bde23e59d 100644 +index eed5787b15..68608a3403 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c -@@ -58,6 +58,9 @@ static bool smbios_legacy = true; - static bool smbios_uuid_encoded = true; - /* end: legacy structures & constants for <= 2.0 machines */ +@@ -39,6 +39,10 @@ size_t usr_blobs_len; + static unsigned usr_table_max; + static unsigned usr_table_cnt; +/* Set to true for modern Windows 10 HardwareID-6 compat */ +static bool smbios_type2_required; + - ++ uint8_t *smbios_tables; size_t smbios_tables_len; -@@ -670,7 +673,7 @@ static void smbios_build_type_1_table(void) + unsigned smbios_table_max; +@@ -629,7 +633,7 @@ static void smbios_build_type_1_table(void) static void smbios_build_type_2_table(void) { @@ -453,21 +453,17 @@ index 2a90601ac5..7bde23e59d 100644 SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); SMBIOS_TABLE_SET_STR(2, product_str, type2.product); -@@ -985,7 +988,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) +@@ -1018,16 +1022,52 @@ void smbios_set_default_processor_family(uint16_t processor_family) void smbios_set_defaults(const char *manufacturer, const char *product, - const char *version, bool legacy_mode, -- bool uuid_encoded, SmbiosEntryPointType ep_type) + const char *version, +- bool uuid_encoded) + bool uuid_encoded, + const char *stream_product, -+ const char *stream_version, -+ SmbiosEntryPointType ep_type) ++ const char *stream_version) { smbios_have_defaults = true; - smbios_legacy = legacy_mode; -@@ -1006,11 +1012,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, - g_free(smbios_entries); - } + smbios_uuid_encoded = uuid_encoded; + /* + * If @stream_product & @stream_version are non-NULL, then @@ -494,12 +490,12 @@ index 2a90601ac5..7bde23e59d 100644 + * + * We get 'System Manufacturer' and 'Baseboard Manufacturer' + */ - SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); - SMBIOS_SET_DEFAULT(type1.product, product); - SMBIOS_SET_DEFAULT(type1.version, version); -+ SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); + SMBIOS_SET_DEFAULT(smbios_type1.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(smbios_type1.product, product); + SMBIOS_SET_DEFAULT(smbios_type1.version, version); ++ SMBIOS_SET_DEFAULT(smbios_type1.family, "Red Hat Enterprise Linux"); + if (stream_version != NULL) { -+ SMBIOS_SET_DEFAULT(type1.sku, stream_version); ++ SMBIOS_SET_DEFAULT(smbios_type1.sku, stream_version); + } SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); - SMBIOS_SET_DEFAULT(type2.product, product); @@ -513,20 +509,20 @@ index 2a90601ac5..7bde23e59d 100644 SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); SMBIOS_SET_DEFAULT(type3.version, version); diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c -index b25da448c8..0331e84398 100644 +index 28fdabc321..bad13ec224 100644 --- a/hw/timer/i8254_common.c +++ b/hw/timer/i8254_common.c @@ -229,7 +229,7 @@ static const VMStateDescription vmstate_pit_common = { .pre_save = pit_dispatch_pre_save, .post_load = pit_dispatch_post_load, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { - VMSTATE_UINT32_V(channels[0].irq_disabled, PITCommonState, 3), + VMSTATE_UINT32(channels[0].irq_disabled, PITCommonState), /* qemu-kvm's v2 had 'flags' here */ VMSTATE_STRUCT_ARRAY(channels, PITCommonState, 3, 2, vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c -index 643d4643e4..529bad9366 100644 +index 4423983308..43b4b71fdf 100644 --- a/hw/usb/hcd-xhci-pci.c +++ b/hw/usb/hcd-xhci-pci.c @@ -104,6 +104,33 @@ static int xhci_pci_vmstate_post_load(void *opaque, int version_id) @@ -636,11 +632,26 @@ index 08f70ce97c..1be7527c1b 100644 } XHCIPciState; #endif +diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c +index ffd119ebac..0e2be2219c 100644 +--- a/hw/virtio/virtio-mem.c ++++ b/hw/virtio/virtio-mem.c +@@ -1694,8 +1694,9 @@ static Property virtio_mem_properties[] = { + #endif + DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM, + early_migration, true), ++ /* RHEL: default-enable "dynamic-memslots" (jira RHEL-24045) */ + DEFINE_PROP_BOOL(VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP, VirtIOMEM, +- dynamic_memslots, false), ++ dynamic_memslots, true), + DEFINE_PROP_END_OF_LIST(), + }; + diff --git a/include/hw/boards.h b/include/hw/boards.h -index da85f86efb..4a21eddbf9 100644 +index 8b8f6d5c00..0466f9d0f3 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -503,4 +503,44 @@ extern const size_t hw_compat_2_2_len; +@@ -512,4 +512,44 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; @@ -686,26 +697,25 @@ index da85f86efb..4a21eddbf9 100644 +extern const char *rhel_old_machine_deprecation; #endif diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 7f3259a630..d24b3ccd32 100644 +index 8d3fb2fb3b..d9d6d7a169 100644 --- a/include/hw/firmware/smbios.h +++ b/include/hw/firmware/smbios.h -@@ -294,7 +294,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); +@@ -332,7 +332,9 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); void smbios_set_cpuid(uint32_t version, uint32_t features); void smbios_set_defaults(const char *manufacturer, const char *product, - const char *version, bool legacy_mode, -- bool uuid_encoded, SmbiosEntryPointType ep_type); + const char *version, +- bool uuid_encoded); + bool uuid_encoded, + const char *stream_product, -+ const char *stream_version, -+ SmbiosEntryPointType ep_type); - uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); ++ const char *stream_version); + void smbios_set_default_processor_family(uint16_t processor_family); + uint8_t *smbios_get_table_legacy(size_t *length, Error **errp); void smbios_get_tables(MachineState *ms, - const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index a10ceeabbf..037942d233 100644 +index 27a68071d7..ebd8f973f2 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -113,6 +113,9 @@ struct PCMachineClass { +@@ -112,6 +112,9 @@ struct PCMachineClass { bool smbios_legacy_mode; bool smbios_uuid_encoded; SmbiosEntryPointType default_smbios_ep_type; diff --git a/SOURCES/0007-Add-aarch64-machine-types.patch b/SOURCES/0007-Add-aarch64-machine-types.patch index fde7982..a556bb2 100644 --- a/SOURCES/0007-Add-aarch64-machine-types.patch +++ b/SOURCES/0007-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 23f614ab0b79ec1c6f65a7f0d6993bfdfc53fd23 Mon Sep 17 00:00:00 2001 +From cf398296f3fcee185a00f23de5deae57c97d648e Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -30,6 +30,9 @@ Rebase notes (8.0.0): Rebase notes (8.1.0): - Added setting default_nic +Rebase notes (9.0.0 rc0): +- call arm_virt_compat_set on rhel type class_init + Merged patches (6.2.0): - 9a3d4fde0e hw/arm/virt: Remove 9.0 machine type - f7d04d6695 hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type @@ -64,34 +67,67 @@ Merged patches (8.1.0): Merged patches (8.2.0): - 4ee284aca9 Add machine types compat bits. (partial) + +Merged patches (9.0.0 rc0): +- 117068376a hw/arm/virt: Fix compats +- 8bcccfabc4 hw/arm/virt: Add properties to disable high memory regions +- 0005a8b93a hw/arm/virt: deprecate virt-rhel9.{0,2}.0 machine types --- - hw/arm/virt.c | 250 +++++++++++++++++++++++++++++++++++++++++- + hw/arm/virt.c | 299 +++++++++++++++++++++++++++++++++++++++++- include/hw/arm/virt.h | 8 ++ - 2 files changed, 257 insertions(+), 1 deletion(-) + 2 files changed, 306 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 62f0f7d4d6..c541efee5e 100644 +index 36e9b4b4e9..22bc345137 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -82,6 +82,7 @@ - #include "hw/char/pl011.h" - #include "qemu/guest-random.h" +@@ -101,6 +101,7 @@ static void arm_virt_compat_set(MachineClass *mc) + arm_virt_compat_len); + } +#if 0 /* Disabled for Red Hat Enterprise Linux */ #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -108,7 +109,48 @@ +@@ -128,7 +129,63 @@ static void arm_virt_compat_set(MachineClass *mc) DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) +#endif /* disabled for RHEL */ ++ ++/* ++ * This variable is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. They may be overriden by older machine compats. ++ * ++ * virtio-net-pci variant romfiles are not needed because edk2 does ++ * fully support the pxe boot. Besides virtio romfiles are not shipped ++ * on rhel/aarch64. ++ */ ++GlobalProperty arm_rhel_compat[] = { ++ {"virtio-net-pci", "romfile", "" }, ++ {"virtio-net-pci-transitional", "romfile", "" }, ++ {"virtio-net-pci-non-transitional", "romfile", "" }, ++}; ++const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); ++/* ++ * This cannot be called from the rhel_virt_class_init() because ++ * TYPE_RHEL_MACHINE is abstract and mc->compat_props g_ptr_array_new() ++ * only is called on virt-rhelm.n.s non abstract class init. ++ */ ++static void arm_rhel_compat_set(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, ++ arm_rhel_compat_len); ++} ++ +#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \ + static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \ + void *data) \ + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ ++ arm_rhel_compat_set(mc); \ + rhel##m##n##s##_virt_options(mc); \ + mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \ + if (latest) { \ @@ -114,23 +150,10 @@ index 62f0f7d4d6..c541efee5e 100644 + DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, true) +#define DEFINE_RHEL_MACHINE(major, minor, subminor) \ + DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false) -+ -+/* This variable is for changes to properties that are RHEL specific, -+ * different to the current upstream and to be applied to the latest -+ * machine type. -+ */ -+GlobalProperty arm_rhel_compat[] = { -+ { -+ .driver = "virtio-net-pci", -+ .property = "romfile", -+ .value = "", -+ }, -+}; -+const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -2341,6 +2383,7 @@ static void machvirt_init(MachineState *machine) +@@ -2355,6 +2412,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -138,7 +161,7 @@ index 62f0f7d4d6..c541efee5e 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2368,6 +2411,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -2382,6 +2440,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -146,7 +169,7 @@ index 62f0f7d4d6..c541efee5e 100644 static bool virt_get_highmem(Object *obj, Error **errp) { -@@ -2383,6 +2427,7 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) +@@ -2397,6 +2456,7 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) vms->highmem = value; } @@ -154,16 +177,23 @@ index 62f0f7d4d6..c541efee5e 100644 static bool virt_get_compact_highmem(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2438,7 +2483,7 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) +@@ -2410,6 +2470,7 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) - vms->highmem_mmio = value; + vms->highmem_compact = value; } -- +#endif /* disabled for RHEL */ + static bool virt_get_highmem_redists(Object *obj, Error **errp) + { +@@ -2453,7 +2514,6 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) + vms->highmem_mmio = value; + } + +- static bool virt_get_its(Object *obj, Error **errp) { -@@ -2454,6 +2499,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2468,6 +2528,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) vms->its = value; } @@ -171,7 +201,7 @@ index 62f0f7d4d6..c541efee5e 100644 static bool virt_get_dtb_randomness(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2467,6 +2513,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) +@@ -2481,6 +2542,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) vms->dtb_randomness = value; } @@ -179,7 +209,7 @@ index 62f0f7d4d6..c541efee5e 100644 static char *virt_get_oem_id(Object *obj, Error **errp) { -@@ -2550,6 +2597,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) +@@ -2564,6 +2626,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) vms->ras = value; } @@ -187,7 +217,7 @@ index 62f0f7d4d6..c541efee5e 100644 static bool virt_get_mte(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2563,6 +2611,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) +@@ -2577,6 +2640,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) vms->mte = value; } @@ -195,7 +225,7 @@ index 62f0f7d4d6..c541efee5e 100644 static char *virt_get_gic_version(Object *obj, Error **errp) { -@@ -2935,6 +2984,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2949,6 +3013,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return fixed_ipa ? 0 : requested_pa_size; } @@ -203,7 +233,7 @@ index 62f0f7d4d6..c541efee5e 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -3405,3 +3455,201 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -3463,3 +3528,235 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -213,6 +243,7 @@ index 62f0f7d4d6..c541efee5e 100644 +{ + MachineClass *mc = MACHINE_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); ++ arm_virt_compat_set(mc); + + mc->family = "virt-rhel-Z"; + mc->init = machvirt_init; @@ -258,6 +289,28 @@ index 62f0f7d4d6..c541efee5e 100644 + "Set on/off to enable/disable using " + "physical address space above 32 bits"); + ++ object_class_property_add_bool(oc, "highmem-redists", ++ virt_get_highmem_redists, ++ virt_set_highmem_redists); ++ object_class_property_set_description(oc, "highmem-redists", ++ "Set on/off to enable/disable high " ++ "memory region for GICv3 or GICv4 " ++ "redistributor"); ++ ++ object_class_property_add_bool(oc, "highmem-ecam", ++ virt_get_highmem_ecam, ++ virt_set_highmem_ecam); ++ object_class_property_set_description(oc, "highmem-ecam", ++ "Set on/off to enable/disable high " ++ "memory region for PCI ECAM"); ++ ++ object_class_property_add_bool(oc, "highmem-mmio", ++ virt_get_highmem_mmio, ++ virt_set_highmem_mmio); ++ object_class_property_set_description(oc, "highmem-mmio", ++ "Set on/off to enable/disable high " ++ "memory region for PCI MMIO"); ++ + object_class_property_add_str(oc, "gic-version", virt_get_gic_version, + virt_set_gic_version); + object_class_property_set_description(oc, "gic-version", @@ -382,14 +435,24 @@ index 62f0f7d4d6..c541efee5e 100644 +} +type_init(rhel_machine_init); + ++static void rhel940_virt_options(MachineClass *mc) ++{ ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(9, 4, 0) ++ +static void rhel920_virt_options(MachineClass *mc) +{ -+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ rhel940_virt_options(mc); ++ + compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); ++ ++ /* RHEL 9.4 is the first supported release */ ++ mc->deprecation_reason = ++ "machine types for versions prior to 9.4 are deprecated"; +} -+DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) ++DEFINE_RHEL_MACHINE(9, 2, 0) + +static void rhel900_virt_options(MachineClass *mc) +{ @@ -398,6 +461,7 @@ index 62f0f7d4d6..c541efee5e 100644 + rhel920_virt_options(mc); + + compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); + + /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ + vmc->no_tcg_lpa2 = true; @@ -406,10 +470,10 @@ index 62f0f7d4d6..c541efee5e 100644 +} +DEFINE_RHEL_MACHINE(9, 0, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index f69239850e..7b8abe5645 100644 +index bb486d36b1..237fc77bda 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -177,9 +177,17 @@ struct VirtMachineState { +@@ -179,9 +179,17 @@ struct VirtMachineState { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) diff --git a/SOURCES/0008-Add-ppc64-machine-types.patch b/SOURCES/0008-Add-ppc64-machine-types.patch index a269adb..87fcb3a 100644 --- a/SOURCES/0008-Add-ppc64-machine-types.patch +++ b/SOURCES/0008-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From d03cff85f5f1b69b1a66011ebaa974ece81d31bc Mon Sep 17 00:00:00 2001 +From fb905dbe5b51ed899062ef99a2dd7f238d3e3384 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -34,20 +34,20 @@ Merged patches (7.1.0): 8 files changed, 314 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index df09aa9d6a..ff459e1a46 100644 +index e9bc97fee0..a258d81846 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -1689,6 +1689,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) - +@@ -1718,6 +1718,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) pef_kvm_reset(machine->cgs, &error_fatal); spapr_caps_apply(spapr); + spapr_nested_reset(spapr); + if (spapr->svm_allowed) { + kvmppc_svm_allow(&error_fatal); + } first_ppc_cpu = POWERPC_CPU(first_cpu); if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && -@@ -3397,6 +3400,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) +@@ -3421,6 +3424,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) spapr->host_serial = g_strdup(value); } @@ -68,7 +68,7 @@ index df09aa9d6a..ff459e1a46 100644 static void spapr_instance_init(Object *obj) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); -@@ -3475,6 +3492,12 @@ static void spapr_instance_init(Object *obj) +@@ -3499,6 +3516,12 @@ static void spapr_instance_init(Object *obj) spapr_get_host_serial, spapr_set_host_serial); object_property_set_description(obj, "host-serial", "Host serial number to advertise in guest device tree"); @@ -81,7 +81,7 @@ index df09aa9d6a..ff459e1a46 100644 } static void spapr_machine_finalizefn(Object *obj) -@@ -4734,6 +4757,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) +@@ -4754,6 +4777,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) vmc->client_architecture_support = spapr_vof_client_architecture_support; vmc->quiesce = spapr_vof_quiesce; vmc->setprop = spapr_vof_setprop; @@ -89,15 +89,15 @@ index df09aa9d6a..ff459e1a46 100644 } static const TypeInfo spapr_machine_info = { -@@ -4785,6 +4809,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) +@@ -4805,6 +4829,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-8.2 + * pseries-9.0 */ -@@ -4967,6 +4992,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4998,6 +5023,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -105,7 +105,7 @@ index df09aa9d6a..ff459e1a46 100644 /* * pseries-4.0 -@@ -4982,6 +5008,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, +@@ -5013,6 +5039,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, } return true; } @@ -114,7 +114,7 @@ index df09aa9d6a..ff459e1a46 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -5306,6 +5334,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -5338,6 +5366,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -337,7 +337,7 @@ index df09aa9d6a..ff459e1a46 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 33e0c8724c..9d01663f43 100644 +index 3b0a47a28c..375e0c8e45 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -25,6 +25,7 @@ @@ -348,7 +348,7 @@ index 33e0c8724c..9d01663f43 100644 static void spapr_reset_vcpu(PowerPCCPU *cpu) { -@@ -261,6 +262,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -264,6 +265,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, { CPUPPCState *env = &cpu->env; CPUState *cs = CPU(cpu); @@ -356,7 +356,7 @@ index 33e0c8724c..9d01663f43 100644 if (!qdev_realize(DEVICE(cpu), NULL, errp)) { return false; -@@ -277,6 +279,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -280,6 +282,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, /* Set time-base frequency to 512 MHz. vhyp must be set first. */ cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ); @@ -375,10 +375,10 @@ index 33e0c8724c..9d01663f43 100644 qdev_unrealize(DEVICE(cpu)); return false; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index e91791a1a9..1951d8a2a0 100644 +index 4aaf23d28f..3233c54d11 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -154,6 +154,7 @@ struct SpaprMachineClass { +@@ -157,6 +157,7 @@ struct SpaprMachineClass { bool pre_5_2_numa_associativity; bool pre_6_2_numa_affinity; @@ -386,7 +386,7 @@ index e91791a1a9..1951d8a2a0 100644 bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, -@@ -256,6 +257,9 @@ struct SpaprMachineState { +@@ -259,6 +260,9 @@ struct SpaprMachineState { /* Set by -boot */ char *boot_device; @@ -422,10 +422,10 @@ index ebef2cccec..ff2c00c60e 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 69fddb05bc..64a05aaef3 100644 +index f77ebfcc81..18e9422006 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c -@@ -748,6 +748,7 @@ +@@ -744,6 +744,7 @@ /* PowerPC CPU aliases */ PowerPCCPUAlias ppc_cpu_aliases[] = { @@ -434,10 +434,10 @@ index 69fddb05bc..64a05aaef3 100644 { "405cr", "405crc" }, { "405gp", "405gpd" }, diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index f8101ffa29..e799a2bee6 100644 +index 67e6b2effd..11187aeb93 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1635,6 +1635,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1655,6 +1655,7 @@ static inline int ppc_env_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -446,7 +446,7 @@ index f8101ffa29..e799a2bee6 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c -index 9b1abe2fc4..56f1c46e8e 100644 +index 8231feb2d4..59f640cf7b 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -89,6 +89,7 @@ static int cap_large_decr; @@ -465,7 +465,7 @@ index 9b1abe2fc4..56f1c46e8e 100644 cap_large_decr = kvmppc_get_dec_bits(); cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); /* -@@ -2579,6 +2581,16 @@ bool kvmppc_supports_ail_3(void) +@@ -2564,6 +2566,16 @@ bool kvmppc_supports_ail_3(void) return cap_ail_mode_3; } @@ -482,7 +482,7 @@ index 9b1abe2fc4..56f1c46e8e 100644 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) { uint32_t host_pvr = mfpvr(); -@@ -2979,3 +2991,18 @@ bool kvm_arch_cpu_check_are_resettable(void) +@@ -2964,3 +2976,18 @@ bool kvm_arch_cpu_check_are_resettable(void) void kvm_arch_accel_class_init(ObjectClass *oc) { } diff --git a/SOURCES/0009-Add-s390x-machine-types.patch b/SOURCES/0009-Add-s390x-machine-types.patch index c3b9936..b9709f1 100644 --- a/SOURCES/0009-Add-s390x-machine-types.patch +++ b/SOURCES/0009-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 3623043d4a923bf9f541d439c76e7874cf0fa81d Mon Sep 17 00:00:00 2001 +From 04178c77cfe188b4eed9c08a0bf66842e61fe5dc Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -49,18 +49,18 @@ Merged patches (8.2.0): 4 files changed, 174 insertions(+) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 7262725d2e..984891b82a 100644 +index b1dcb3857f..ff753a29e0 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -855,6 +855,7 @@ bool css_migration_enabled(void) +@@ -859,6 +859,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_8_2_instance_options(MachineState *machine) + static void ccw_machine_9_0_instance_options(MachineState *machine) { } -@@ -1256,6 +1257,164 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1272,6 +1273,164 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); @@ -226,7 +226,7 @@ index 7262725d2e..984891b82a 100644 static void ccw_machine_register_types(void) { diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index a63d990e4e..198b81f2c0 100644 +index 8ed3bb6a27..370b3b3065 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -46,6 +46,9 @@ @@ -239,7 +239,7 @@ index a63d990e4e..198b81f2c0 100644 static S390CPUDef s390_cpu_defs[] = { CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"), CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"), -@@ -856,22 +859,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) +@@ -866,22 +869,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data) { S390CPUClass *xcc = S390_CPU_CLASS(oc); @@ -284,10 +284,10 @@ index d7b8912989..1a806a97c4 100644 /* CPU model based on a CPU definition */ diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c -index 87a4480c05..28c1b0486c 100644 +index 0728bfcc20..ca2e5d91e2 100644 --- a/target/s390x/cpu_models_sysemu.c +++ b/target/s390x/cpu_models_sysemu.c -@@ -60,6 +60,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) +@@ -59,6 +59,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) CpuDefinitionInfo *info; char *name = g_strdup(object_class_get_name(klass)); S390CPUClass *scc = S390_CPU_CLASS(klass); @@ -295,7 +295,7 @@ index 87a4480c05..28c1b0486c 100644 /* strip off the -s390x-cpu */ g_strrstr(name, "-" TYPE_S390_CPU)[0] = 0; -@@ -69,6 +70,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) +@@ -68,6 +69,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) info->migration_safe = scc->is_migration_safe; info->q_static = scc->is_static; info->q_typename = g_strdup(object_class_get_name(klass)); diff --git a/SOURCES/0010-Add-x86_64-machine-types.patch b/SOURCES/0010-Add-x86_64-machine-types.patch index d24bb57..83ee4a3 100644 --- a/SOURCES/0010-Add-x86_64-machine-types.patch +++ b/SOURCES/0010-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From b432505cb28bc3b9b0c1849210ac6c63bca3fe37 Mon Sep 17 00:00:00 2001 +From 3c88acb005806ad2386ab6c94a8831151f624738 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -57,23 +57,40 @@ Merged patches (8.1.0): Merged patches (8.2.0): - 4ee284aca9 Add machine types compat bits. (partial) - 719e2ac147 Fix x86 machine type compatibility for qemu-kvm 8.1.0 + +Merged patches (9.0.0 rc0): +- 9149e2bc8f x86: rhel 9.2.0 machine type compat fix --- + hw/i386/fw_cfg.c | 2 +- hw/i386/pc.c | 159 ++++++++++++++++++++- - hw/i386/pc_piix.c | 112 ++++++++++++++- - hw/i386/pc_q35.c | 285 ++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 109 ++++++++++++++ + hw/i386/pc_q35.c | 285 +++++++++++++++++++++++++++++++++++++ include/hw/boards.h | 2 + include/hw/i386/pc.h | 33 +++++ target/i386/cpu.c | 21 +++ target/i386/kvm/kvm-cpu.c | 1 + target/i386/kvm/kvm.c | 4 + tests/qtest/pvpanic-test.c | 5 +- - 9 files changed, 615 insertions(+), 7 deletions(-) + 10 files changed, 617 insertions(+), 4 deletions(-) +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index c7aa39a13e..283c3f4c16 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -63,7 +63,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, + + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", mc->desc, mc->name, ++ smbios_set_defaults("Red Hat", "KVM", mc->desc, + pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, pcmc->smbios_stream_version); + } diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 29b9964733..a1faa9e92c 100644 +index 5c21b0c4db..4a154c1a9a 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -323,6 +323,161 @@ GlobalProperty pc_compat_2_0[] = { +@@ -326,6 +326,161 @@ GlobalProperty pc_compat_2_0[] = { }; const size_t pc_compat_2_0_len = G_N_ELEMENTS(pc_compat_2_0); @@ -235,15 +252,15 @@ index 29b9964733..a1faa9e92c 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -1826,6 +1981,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) - pcmc->kvmclock_create_always = true; +@@ -1813,6 +1968,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->resizable_acpi_blob = true; + x86mc->apic_xrupt_override = true; assert(!mc->get_hotplug_handler); + mc->async_pf_vmexit_disable = false; mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1836,7 +1992,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1823,7 +1979,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; mc->block_default_type = IF_IDE; @@ -254,10 +271,10 @@ index 29b9964733..a1faa9e92c 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 2a9f465619..44038391fb 100644 +index 18ba076609..a647262d63 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -53,6 +53,7 @@ +@@ -52,6 +52,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "sysemu/xen.h" @@ -265,18 +282,7 @@ index 2a9f465619..44038391fb 100644 #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -235,8 +236,8 @@ static void pc_init1(MachineState *machine, - if (pcmc->smbios_defaults) { - MachineClass *mc = MACHINE_GET_CLASS(machine); - /* These values are guest ABI, do not change */ -- smbios_set_defaults("QEMU", mc->desc, -- mc->name, pcmc->smbios_legacy_mode, -+ smbios_set_defaults("Red Hat", "KVM", -+ mc->desc, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, - pcmc->smbios_stream_product, - pcmc->smbios_stream_version, -@@ -453,6 +454,7 @@ static void pc_set_south_bridge(Object *obj, int value, Error **errp) +@@ -422,6 +423,7 @@ static void pc_set_south_bridge(Object *obj, int value, Error **errp) * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ @@ -284,7 +290,7 @@ index 2a9f465619..44038391fb 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -970,3 +972,109 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -951,3 +953,110 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -314,8 +320,7 @@ index 2a9f465619..44038391fb 100644 + +static void pc_init_rhel760(MachineState *machine) +{ -+ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -+ TYPE_I440FX_PCI_DEVICE); ++ pc_init1(machine, TYPE_I440FX_PCI_DEVICE); +} + +static void pc_machine_rhel760_options(MachineClass *m) @@ -339,6 +344,8 @@ index 2a9f465619..44038391fb 100644 + pcmc->enforce_amd_1tb_hole = false; + /* From pc_i440fx_8_0_machine_options() */ + pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; ++ /* From pc_i440fx_8_1_machine_options() */ ++ pcmc->broken_32bit_mem_addr_check = true; + /* Introduced in QEMU 8.2 */ + pcmc->default_south_bridge = TYPE_PIIX3_DEVICE; + @@ -395,21 +402,10 @@ index 2a9f465619..44038391fb 100644 +DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, + pc_machine_rhel760_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 912cb0c0dc..6387df97c8 100644 +index c7bc8a2041..e872dc7e46 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -203,8 +203,8 @@ static void pc_q35_init(MachineState *machine) - - if (pcmc->smbios_defaults) { - /* These values are guest ABI, do not change */ -- smbios_set_defaults("QEMU", mc->desc, -- mc->name, pcmc->smbios_legacy_mode, -+ smbios_set_defaults("Red Hat", "KVM", -+ mc->desc, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, - pcmc->smbios_stream_product, - pcmc->smbios_stream_version, -@@ -363,6 +363,7 @@ static void pc_q35_init(MachineState *machine) +@@ -341,6 +341,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -417,7 +413,7 @@ index 912cb0c0dc..6387df97c8 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -699,3 +700,283 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -693,3 +694,287 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -444,6 +440,8 @@ index 912cb0c0dc..6387df97c8 100644 + m->alias = "q35"; + m->max_cpus = 710; + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); ++ compat_props_add(m->compat_props, ++ pc_q35_compat_defaults, pc_q35_compat_defaults_len); +} + +static void pc_q35_init_rhel940(MachineState *machine) @@ -480,6 +478,8 @@ index 912cb0c0dc..6387df97c8 100644 + + /* From pc_q35_8_0_machine_options() */ + pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; ++ /* From pc_q35_8_1_machine_options() */ ++ pcmc->broken_32bit_mem_addr_check = true; + + compat_props_add(m->compat_props, hw_compat_rhel_9_4, + hw_compat_rhel_9_4_len); @@ -702,10 +702,10 @@ index 912cb0c0dc..6387df97c8 100644 +DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, + pc_q35_machine_rhel760_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index 4a21eddbf9..4edfdb0ddb 100644 +index 0466f9d0f3..46b8725c41 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -277,6 +277,8 @@ struct MachineClass { +@@ -283,6 +283,8 @@ struct MachineClass { strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; bool auto_enable_numa_with_memdev; @@ -715,12 +715,12 @@ index 4a21eddbf9..4edfdb0ddb 100644 bool smbus_no_migration_support; bool nvdimm_supported; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 037942d233..37644ede7e 100644 +index ebd8f973f2..a984c951ad 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -314,6 +314,39 @@ extern const size_t pc_compat_1_4_len; - - int pc_machine_kvm_type(MachineState *machine, const char *vm_type); +@@ -291,6 +291,39 @@ extern const size_t pc_compat_2_1_len; + extern GlobalProperty pc_compat_2_0[]; + extern const size_t pc_compat_2_0_len; +extern GlobalProperty pc_rhel_compat[]; +extern const size_t pc_rhel_compat_len; @@ -759,7 +759,7 @@ index 037942d233..37644ede7e 100644 static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \ { \ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index cd16cb893d..93203d9b91 100644 +index 33760a2ee1..be7b0663cd 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -2190,9 +2190,13 @@ static const CPUCaches epyc_genoa_cache_info = { @@ -925,10 +925,10 @@ index 9c791b7b05..b91af5051f 100644 }; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index 4ce80555b4..9d41edf01e 100644 +index e68cbe9293..739f33db47 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c -@@ -3711,6 +3711,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3715,6 +3715,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -936,7 +936,7 @@ index 4ce80555b4..9d41edf01e 100644 kvm_msr_buf_reset(cpu); -@@ -4065,6 +4066,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -4069,6 +4070,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; diff --git a/SOURCES/0011-Enable-make-check.patch b/SOURCES/0011-Enable-make-check.patch index 54015c0..502bc67 100644 --- a/SOURCES/0011-Enable-make-check.patch +++ b/SOURCES/0011-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 66a0510405e5142a1f9e38e0770aa0f10aed3e03 Mon Sep 17 00:00:00 2001 +From 5768cf6811842e5c59da3b752f60659a9d6b5ba1 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check @@ -63,10 +63,10 @@ Merged patches (8.1.0): 13 files changed, 33 insertions(+), 30 deletions(-) diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py -index c37afa662c..61c95a2198 100644 +index 10d99403a4..c3422ea1e4 100644 --- a/tests/avocado/replay_kernel.py +++ b/tests/avocado/replay_kernel.py -@@ -153,7 +153,7 @@ def test_aarch64_virt(self): +@@ -166,7 +166,7 @@ def test_aarch64_virt(self): """ :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -76,7 +76,7 @@ index c37afa662c..61c95a2198 100644 kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' '/linux/releases/29/Everything/aarch64/os/images/pxeboot' diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py -index 4cce5a5598..e9248a04a2 100644 +index 92855a02a5..87822074b6 100644 --- a/tests/avocado/reverse_debugging.py +++ b/tests/avocado/reverse_debugging.py @@ -230,7 +230,7 @@ def test_aarch64_virt(self): @@ -120,7 +120,7 @@ index 15fd87b2c1..f0d9d89c93 100644 kernel_path = self._grab_aarch64_kernel() kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + diff --git a/tests/qemu-iotests/meson.build b/tests/qemu-iotests/meson.build -index 53847cb98f..a2abdb650e 100644 +index fad340ad59..3c0d5241f6 100644 --- a/tests/qemu-iotests/meson.build +++ b/tests/qemu-iotests/meson.build @@ -51,21 +51,21 @@ foreach format, speed: qemu_iotests_formats @@ -163,7 +163,7 @@ index 53847cb98f..a2abdb650e 100644 +# endforeach endforeach diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py -index 3ff38f2661..cab9a2bd6c 100644 +index 588f30a4f1..3929a3634f 100644 --- a/tests/qemu-iotests/testenv.py +++ b/tests/qemu-iotests/testenv.py @@ -244,6 +244,9 @@ def __init__(self, source_dir: str, build_dir: str, @@ -216,7 +216,7 @@ index 663bb6c485..2efc43e3f7 100644 "-device intel-hda,id=" HDA_ID CODEC_DEVICES); diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build -index 90aae42a22..9bc4e41af0 100644 +index 3aed6efcb8..119613237e 100644 --- a/tests/qtest/libqos/meson.build +++ b/tests/qtest/libqos/meson.build @@ -44,7 +44,7 @@ libqos_srcs = files( @@ -242,10 +242,10 @@ index 8ac95b89f7..cd2102555c 100644 qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index 47dabf91d0..0bdfa3a821 100644 +index 36c5c13a7b..a2887d6057 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build -@@ -97,7 +97,6 @@ qtests_i386 = \ +@@ -101,7 +101,6 @@ qtests_i386 = \ 'drive_del-test', 'tco-test', 'cpu-plug-test', @@ -254,7 +254,7 @@ index 47dabf91d0..0bdfa3a821 100644 'migration-test', 'test-x86-cpuid-compat', diff --git a/tests/qtest/virtio-net-failover.c b/tests/qtest/virtio-net-failover.c -index 0d40bc1f2d..4c633c1584 100644 +index 73dfabc272..a9dd304781 100644 --- a/tests/qtest/virtio-net-failover.c +++ b/tests/qtest/virtio-net-failover.c @@ -26,6 +26,7 @@ diff --git a/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch b/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch index 8222efd..e8bf13a 100644 --- a/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From a27cfa0b407bd806ce389a7c69d0130bcfd35244 Mon Sep 17 00:00:00 2001 +From e06a905d726fc20ea6bd95dff1bd0ffe97ebb202 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -27,7 +27,7 @@ to 64 as some usecases require more than 32 devices. Signed-off-by: Bandan Das -Rebase changes (231025): +Rebase changes (8.2.0): - Update to upstream changes --- hw/vfio/pci.c | 31 ++++++++++++++++++++++++++++++- @@ -35,10 +35,10 @@ Rebase changes (231025): 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index c62c02f7b6..ec98080f28 100644 +index 64780d1b79..57ac63c10c 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c -@@ -48,6 +48,9 @@ +@@ -50,6 +50,9 @@ /* Protected by BQL */ static KVMRouteChange vfio_route_change; @@ -48,15 +48,14 @@ index c62c02f7b6..ec98080f28 100644 static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); static void vfio_msi_disable_common(VFIOPCIDevice *vdev); -@@ -3076,14 +3079,37 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - { +@@ -2946,13 +2949,36 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + ERRP_GUARD(); VFIOPCIDevice *vdev = VFIO_PCI(pdev); VFIODevice *vbasedev = &vdev->vbasedev; + VFIODevice *vbasedev_iter; + VFIOGroup *group; char *tmp, *subsys; Error *err = NULL; - struct stat st; - int i, ret; + int ret, i = 0; bool is_mdev; @@ -84,10 +83,10 @@ index c62c02f7b6..ec98080f28 100644 + return; + } + - if (!vbasedev->sysfsdev) { + if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3501,6 +3527,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3370,6 +3396,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), @@ -98,7 +97,7 @@ index c62c02f7b6..ec98080f28 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index fba8737ab2..eb74d9de2d 100644 +index 6e64a2654e..b7de39c010 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -142,6 +142,7 @@ struct VFIOPCIDevice { diff --git a/SOURCES/0013-Add-support-statement-to-help-output.patch b/SOURCES/0013-Add-support-statement-to-help-output.patch index bc5d9b4..0644440 100644 --- a/SOURCES/0013-Add-support-statement-to-help-output.patch +++ b/SOURCES/0013-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 424f14d123fe1043518758605d94ed5ba50e52ad Mon Sep 17 00:00:00 2001 +From b467dc6a24ef41fa574260429807711f6802a54d Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -21,10 +21,10 @@ Signed-off-by: Eduardo Habkost 1 file changed, 9 insertions(+) diff --git a/system/vl.c b/system/vl.c -index 2bcd9efb9a..93635ffc5b 100644 +index c644222982..03c3b0aa94 100644 --- a/system/vl.c +++ b/system/vl.c -@@ -870,9 +870,17 @@ static void version(void) +@@ -869,9 +869,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -42,7 +42,7 @@ index 2bcd9efb9a..93635ffc5b 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", g_get_prgname()); -@@ -898,6 +906,7 @@ static void help(int exitcode) +@@ -897,6 +905,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); diff --git a/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 7fa10b5..04adb4a 100644 --- a/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From c683ff4a770b77dbe707413840918a46f67fa825 Mon Sep 17 00:00:00 2001 +From 20cc3a6d9bce3e40d165f865b5e398c300cae7bf Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -36,10 +36,10 @@ index 52d6454b93..d74dbdeca9 100644 .. |I2C| replace:: I\ :sup:`2`\ C .. |I2S| replace:: I\ :sup:`2`\ S diff --git a/qemu-options.hx b/qemu-options.hx -index 42fd09e4de..557118cb1f 100644 +index 8ce85d4559..4fc27ee2e2 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -3469,11 +3469,11 @@ SRST +@@ -3493,11 +3493,11 @@ SRST :: diff --git a/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch b/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch index 667d431..8518918 100644 --- a/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +++ b/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch @@ -1,4 +1,4 @@ -From 776bff1be5e98982a9bbc8345ff27274ff5b8c0f Mon Sep 17 00:00:00 2001 +From 2f9fdd21ecf2810d0d83a8125ce0cc1e75dbb13a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 20 Aug 2021 18:25:12 +0200 Subject: qcow2: Deprecation warning when opening v2 images rw @@ -44,7 +44,7 @@ Rebase notes (6.1.0): 2 files changed, 7 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c -index 13e032bd5e..7968735346 100644 +index 956128b409..0e8b2f7518 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1358,6 +1358,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, diff --git a/SOURCES/0016-Add-upstream-compatibility-bits.patch b/SOURCES/0016-Add-upstream-compatibility-bits.patch new file mode 100644 index 0000000..3efa22c --- /dev/null +++ b/SOURCES/0016-Add-upstream-compatibility-bits.patch @@ -0,0 +1,121 @@ +From 59470e8ab849f22b407f55292e540e16a8cad01a Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 20 Mar 2024 05:34:32 -0400 +Subject: Add upstream compatibility bits + +Adding new compats structure for changes introduced during rebase to QEMU 9.0.0. + +Signed-off-by: Miroslav Rezanina + +--- + +Rebase notes (9.0.0 rc2): +- Add aw-bits setting for aarch compat record (overwritten for 9.4 and older) +--- + hw/arm/virt.c | 3 +++ + hw/core/machine.c | 10 ++++++++++ + hw/i386/pc_piix.c | 3 ++- + hw/i386/pc_q35.c | 3 +++ + hw/s390x/s390-virtio-ccw.c | 1 + + include/hw/boards.h | 3 +++ + 6 files changed, 22 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 22bc345137..f1af9495c6 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -144,6 +144,8 @@ GlobalProperty arm_rhel_compat[] = { + {"virtio-net-pci", "romfile", "" }, + {"virtio-net-pci-transitional", "romfile", "" }, + {"virtio-net-pci-non-transitional", "romfile", "" }, ++ /* arm_rhel_compat from arm_virt_compat, added for 9.0.0 rebase */ ++ { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "48" }, + }; + const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); + +@@ -3728,6 +3730,7 @@ type_init(rhel_machine_init); + + static void rhel940_virt_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_5, hw_compat_rhel_9_5_len); + } + DEFINE_RHEL_MACHINE_AS_LATEST(9, 4, 0) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 695cb89a46..0f256d9633 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -302,6 +302,16 @@ const size_t hw_compat_2_1_len = G_N_ELEMENTS(hw_compat_2_1); + const char *rhel_old_machine_deprecation = + "machine types for previous major releases are deprecated"; + ++GlobalProperty hw_compat_rhel_9_5[] = { ++ /* hw_compat_rhel_9_5 from hw_compat_8_2 */ ++ { "migration", "zero-page-detection", "legacy"}, ++ /* hw_compat_rhel_9_5 from hw_compat_8_2 */ ++ { TYPE_VIRTIO_IOMMU_PCI, "granule", "4k" }, ++ /* hw_compat_rhel_9_5 from hw_compat_8_2 */ ++ { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "64" }, ++}; ++const size_t hw_compat_rhel_9_5_len = G_N_ELEMENTS(hw_compat_rhel_9_5); ++ + GlobalProperty hw_compat_rhel_9_4[] = { + /* hw_compat_rhel_9_4 from hw_compat_8_0 */ + { TYPE_VIRTIO_NET, "host_uso", "off"}, +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index a647262d63..6b260682eb 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1015,7 +1015,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + object_class_property_set_description(oc, "x-south-bridge", + "Use a different south bridge than PIIX3"); + +- ++ compat_props_add(m->compat_props, hw_compat_rhel_9_5, ++ hw_compat_rhel_9_5_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_4, + hw_compat_rhel_9_4_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_3, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index e872dc7e46..2b54944c0f 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -733,6 +733,9 @@ static void pc_q35_machine_rhel940_options(MachineClass *m) + m->desc = "RHEL-9.4.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.4.0"; ++ ++ compat_props_add(m->compat_props, hw_compat_rhel_9_5, ++ hw_compat_rhel_9_5_len); + } + + DEFINE_PC_MACHINE(q35_rhel940, "pc-q35-rhel9.4.0", pc_q35_init_rhel940, +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index ff753a29e0..9ad54682c6 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1282,6 +1282,7 @@ static void ccw_machine_rhel940_instance_options(MachineState *machine) + + static void ccw_machine_rhel940_class_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_5, hw_compat_rhel_9_5_len); + } + DEFINE_CCW_MACHINE(rhel940, "rhel9.4.0", true); + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 46b8725c41..cca62f906b 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -514,6 +514,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_9_5[]; ++extern const size_t hw_compat_rhel_9_5_len; ++ + extern GlobalProperty hw_compat_rhel_9_4[]; + extern const size_t hw_compat_rhel_9_4_len; + +-- +2.39.3 + diff --git a/SOURCES/0016-Introduce-RHEL-9.4.0-qemu-kvm-machine-type-for-aarch.patch b/SOURCES/0016-Introduce-RHEL-9.4.0-qemu-kvm-machine-type-for-aarch.patch deleted file mode 100644 index 4e62baa..0000000 --- a/SOURCES/0016-Introduce-RHEL-9.4.0-qemu-kvm-machine-type-for-aarch.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 3b9b38339346ebfaf3e8ddf0822eba1cc9e78408 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Thu, 14 Dec 2023 04:42:01 -0500 -Subject: Introduce RHEL 9.4.0 qemu-kvm machine type for aarch64 - -Jira: https://issues.redhat.com/browse/RHEL-17168 - -Adding new machine type to support enabling new features. - -Signed-off-by: Miroslav Rezanina ---- - hw/arm/virt.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index c541efee5e..0b17c94ad7 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3630,14 +3630,21 @@ static void rhel_machine_init(void) - } - type_init(rhel_machine_init); - -+static void rhel940_virt_options(MachineClass *mc) -+{ -+} -+DEFINE_RHEL_MACHINE_AS_LATEST(9, 4, 0) -+ - static void rhel920_virt_options(MachineClass *mc) - { -+ rhel940_virt_options(mc); -+ - compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); - } --DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) -+DEFINE_RHEL_MACHINE(9, 2, 0) - - static void rhel900_virt_options(MachineClass *mc) - { --- -2.39.3 - diff --git a/SOURCES/0017-x86-rhel-9.4.0-machine-type-compat-fix.patch b/SOURCES/0017-x86-rhel-9.4.0-machine-type-compat-fix.patch new file mode 100644 index 0000000..5befe68 --- /dev/null +++ b/SOURCES/0017-x86-rhel-9.4.0-machine-type-compat-fix.patch @@ -0,0 +1,30 @@ +From ba574acacf679850e337ec2d5e7836b8277cf393 Mon Sep 17 00:00:00 2001 +From: Sebastian Ott +Date: Thu, 18 Apr 2024 15:04:28 +0200 +Subject: x86: rhel 9.4.0 machine type compat fix + +Fix up the compatibility for 9.4.0. Ensure that pc-q35-rhel9.4.0 +still uses SMBIOS 3.X by default. + +Signed-off-by: Sebastian Ott +--- + hw/i386/pc_q35.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 2b54944c0f..2f11f9af7d 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -734,6 +734,9 @@ static void pc_q35_machine_rhel940_options(MachineClass *m) + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.4.0"; + ++ /* From pc_q35_8_2_machine_options() - use SMBIOS 3.X by default */ ++ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64; ++ + compat_props_add(m->compat_props, hw_compat_rhel_9_5, + hw_compat_rhel_9_5_len); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-Compile-IOMMUFD-object-on-aarch64.patch b/SOURCES/kvm-Compile-IOMMUFD-object-on-aarch64.patch deleted file mode 100644 index ed776c0..0000000 --- a/SOURCES/kvm-Compile-IOMMUFD-object-on-aarch64.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 363d6aedc82314a70bdfbe9fa23b7e8fdda50138 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 11 Jan 2024 12:26:19 -0500 -Subject: [PATCH 066/101] Compile IOMMUFD object on aarch64 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [65/67] 9358030fdd499c5fe122dee3bb4f114966fac9c2 (eauger1/centos-qemu-kvm) - -Upstream: RHEL only - -Compiles the IOMMUFD object on aarch64 to be able to use -the IOMMUFD VFIO backend. - -Signed-off-by: Eric Auger ---- - configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -index aec1831199..b0191d3c69 100644 ---- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -+++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -@@ -39,3 +39,4 @@ CONFIG_PXB=y - CONFIG_VHOST_VSOCK=y - CONFIG_VHOST_USER_VSOCK=y - CONFIG_VHOST_USER_FS=y -+CONFIG_IOMMUFD=y --- -2.39.3 - diff --git a/SOURCES/kvm-Compile-IOMMUFD-on-s390x.patch b/SOURCES/kvm-Compile-IOMMUFD-on-s390x.patch deleted file mode 100644 index 9a98477..0000000 --- a/SOURCES/kvm-Compile-IOMMUFD-on-s390x.patch +++ /dev/null @@ -1,37 +0,0 @@ -From c1e9ddf8d0ea6d358fcaa5cacd3a91920f36e73b Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 11 Jan 2024 12:33:17 -0500 -Subject: [PATCH 067/101] Compile IOMMUFD on s390x -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [66/67] d3004aafca2bb76d817ac99c3d65973b8fbd4557 (eauger1/centos-qemu-kvm) - -Upstream: RHEL only - -Compiles the IOMMUFD object on s390x to be able to use -the IOMMUFD VFIO backend. - -Signed-off-by: Eric Auger ---- - configs/devices/s390x-softmmu/s390x-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak -index 69a799adbd..24cf6dbd03 100644 ---- a/configs/devices/s390x-softmmu/s390x-rh-devices.mak -+++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak -@@ -16,3 +16,4 @@ CONFIG_WDT_DIAG288=y - CONFIG_VHOST_VSOCK=y - CONFIG_VHOST_USER_VSOCK=y - CONFIG_VHOST_USER_FS=y -+CONFIG_IOMMUFD=y --- -2.39.3 - diff --git a/SOURCES/kvm-Compile-IOMMUFD-on-x86_64.patch b/SOURCES/kvm-Compile-IOMMUFD-on-x86_64.patch deleted file mode 100644 index a3eb40e..0000000 --- a/SOURCES/kvm-Compile-IOMMUFD-on-x86_64.patch +++ /dev/null @@ -1,37 +0,0 @@ -From be2c3d9bbee1bdec061c901f507bc999fa40a53e Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 11 Jan 2024 12:34:44 -0500 -Subject: [PATCH 068/101] Compile IOMMUFD on x86_64 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [67/67] 411d48a5cc7ce1f05be793fd9a89c143ce34c91a (eauger1/centos-qemu-kvm) - -Upstream: RHEL only - -Compiles the IOMMUFD object on s390x to be able to use -the IOMMUFD VFIO backend. - -Signed-off-by: Eric Auger ---- - configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -index ce5be73633..ba41108e0c 100644 ---- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -@@ -108,3 +108,4 @@ CONFIG_SGX=y - CONFIG_VHOST_VSOCK=y - CONFIG_VHOST_USER_VSOCK=y - CONFIG_VHOST_USER_FS=y -+CONFIG_IOMMUFD=y --- -2.39.3 - diff --git a/SOURCES/kvm-Fix-scanout-version-with-pc-q35-rhel9.4.0.patch b/SOURCES/kvm-Fix-scanout-version-with-pc-q35-rhel9.4.0.patch deleted file mode 100644 index fdac70c..0000000 --- a/SOURCES/kvm-Fix-scanout-version-with-pc-q35-rhel9.4.0.patch +++ /dev/null @@ -1,128 +0,0 @@ -From ef212eb3026a2460f6502a76afa3baedeb74d975 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Tue, 6 Aug 2024 14:14:27 +0400 -Subject: [PATCH 1/5] Fix scanout version with pc-q35-rhel9.4.0 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -RH-MergeRequest: 383: Fix scanout version with pc-q35-rhel9.4.0 -RH-Jira: RHEL-53565 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] ed2860cfb933654b0046c1b59f78d2e50146bd6c - -Resolves: https://issues.redhat.com/browse/RHEL-52940 - -Introduce hw_compat_rhel_9_4_extra so that pc-q35-rhel9.4.0 has -x-scanout-vmstate-version=1 - -Signed-off-by: Marc-André Lureau ---- - hw/arm/virt.c | 1 + - hw/core/machine.c | 13 +++++++++++-- - hw/i386/pc_piix.c | 2 ++ - hw/i386/pc_q35.c | 3 +++ - hw/s390x/s390-virtio-ccw.c | 1 + - include/hw/boards.h | 3 +++ - 6 files changed, 21 insertions(+), 2 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index e4a66affcb..851eff6b28 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3670,6 +3670,7 @@ type_init(rhel_machine_init); - - static void rhel940_virt_options(MachineClass *mc) - { -+ compat_props_add(mc->compat_props, hw_compat_rhel_9_4_extra, hw_compat_rhel_9_4_extra_len); - } - DEFINE_RHEL_MACHINE_AS_LATEST(9, 4, 0) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index c8c460c916..fe0a28ef3b 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -64,6 +64,17 @@ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); - const char *rhel_old_machine_deprecation = - "machine types for previous major releases are deprecated"; - -+/* -+ * rhel_9_4_extra is used for <= 9.4 machines. -+ * -+ * (for compatibility and historical reasons, rhel_9_4 is used for <= 9.2 too) -+ */ -+GlobalProperty hw_compat_rhel_9_4_extra[] = { -+ /* hw_compat_rhel_9_4_extra from hw_compat_8_2 */ -+ { "virtio-gpu-device", "x-scanout-vmstate-version", "1" }, -+}; -+const size_t hw_compat_rhel_9_4_extra_len = G_N_ELEMENTS(hw_compat_rhel_9_4_extra); -+ - GlobalProperty hw_compat_rhel_9_4[] = { - /* hw_compat_rhel_9_4 from hw_compat_8_0 */ - { TYPE_VIRTIO_NET, "host_uso", "off"}, -@@ -81,8 +92,6 @@ GlobalProperty hw_compat_rhel_9_4[] = { - { "igb", "x-pcie-flr-init", "off" }, - /* hw_compat_rhel_9_4 jira RHEL-24045 */ - { "virtio-mem", "dynamic-memslots", "off" }, -- /* hw_compat_rhel_9_4 from hw_compat_8_1 */ -- { "virtio-gpu-device", "x-scanout-vmstate-version", "1" }, - }; - const size_t hw_compat_rhel_9_4_len = G_N_ELEMENTS(hw_compat_rhel_9_4); - -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 54d1c58bce..c846673d87 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -1031,6 +1031,8 @@ static void pc_machine_rhel760_options(MachineClass *m) - "Use a different south bridge than PIIX3"); - - -+ compat_props_add(m->compat_props, hw_compat_rhel_9_4_extra, -+ hw_compat_rhel_9_4_extra_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_4, - hw_compat_rhel_9_4_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_3, -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index cd5fb7380e..02bc3d515f 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -731,6 +731,9 @@ static void pc_q35_machine_rhel940_options(MachineClass *m) - m->desc = "RHEL-9.4.0 PC (Q35 + ICH9, 2009)"; - pcmc->smbios_stream_product = "RHEL"; - pcmc->smbios_stream_version = "9.4.0"; -+ -+ compat_props_add(m->compat_props, hw_compat_rhel_9_4_extra, -+ hw_compat_rhel_9_4_extra_len); - } - - DEFINE_PC_MACHINE(q35_rhel940, "pc-q35-rhel9.4.0", pc_q35_init_rhel940, -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 24f4773179..cc6087c37a 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1277,6 +1277,7 @@ static void ccw_machine_rhel940_instance_options(MachineState *machine) - - static void ccw_machine_rhel940_class_options(MachineClass *mc) - { -+ compat_props_add(mc->compat_props, hw_compat_rhel_9_4_extra, hw_compat_rhel_9_4_extra_len); - } - DEFINE_CCW_MACHINE(rhel940, "rhel9.4.0", true); - -diff --git a/include/hw/boards.h b/include/hw/boards.h -index 4edfdb0ddb..e6ae0e61cf 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -505,6 +505,9 @@ extern const size_t hw_compat_2_2_len; - extern GlobalProperty hw_compat_2_1[]; - extern const size_t hw_compat_2_1_len; - -+extern GlobalProperty hw_compat_rhel_9_4_extra[]; -+extern const size_t hw_compat_rhel_9_4_extra_len; -+ - extern GlobalProperty hw_compat_rhel_9_4[]; - extern const size_t hw_compat_rhel_9_4_len; - --- -2.39.3 - diff --git a/SOURCES/kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch b/SOURCES/kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch new file mode 100644 index 0000000..65da2cc --- /dev/null +++ b/SOURCES/kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch @@ -0,0 +1,139 @@ +From 93ea86ac8849ad9ca365b1646313dde9a34ba59c Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:03 -0500 +Subject: [PATCH 031/100] HostMem: Add mechanism to opt in kvm guest memfd via + MachineState + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [31/91] 43ce32aef954479cdb736301d1adcb919602c321 (bonzini/rhel-qemu-kvm) + +Add a new member "guest_memfd" to memory backends. When it's set +to true, it enables RAM_GUEST_MEMFD in ram_flags, thus private kvm +guest_memfd will be allocated during RAMBlock allocation. + +Memory backend's @guest_memfd is wired with @require_guest_memfd +field of MachineState. It avoid looking up the machine in phymem.c. + +MachineState::require_guest_memfd is supposed to be set by any VMs +that requires KVM guest memfd as private memory, e.g., TDX VM. + +Signed-off-by: Xiaoyao Li +Reviewed-by: David Hildenbrand +Message-ID: <20240320083945.991426-8-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 37662d85b0b7dded0ebdf6747bef6c3bb7ed6a0c) +Signed-off-by: Paolo Bonzini +--- + backends/hostmem-file.c | 1 + + backends/hostmem-memfd.c | 1 + + backends/hostmem-ram.c | 1 + + backends/hostmem.c | 1 + + hw/core/machine.c | 5 +++++ + include/hw/boards.h | 2 ++ + include/sysemu/hostmem.h | 1 + + 7 files changed, 12 insertions(+) + +diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c +index ac3e433cbd..3c69db7946 100644 +--- a/backends/hostmem-file.c ++++ b/backends/hostmem-file.c +@@ -85,6 +85,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) + ram_flags |= fb->readonly ? RAM_READONLY_FD : 0; + ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0; + ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; ++ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; + ram_flags |= fb->is_pmem ? RAM_PMEM : 0; + ram_flags |= RAM_NAMED_FILE; + return memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name, +diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c +index 3923ea9364..745ead0034 100644 +--- a/backends/hostmem-memfd.c ++++ b/backends/hostmem-memfd.c +@@ -55,6 +55,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) + name = host_memory_backend_get_name(backend); + ram_flags = backend->share ? RAM_SHARED : 0; + ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; ++ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; + return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name, + backend->size, ram_flags, fd, 0, errp); + } +diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c +index d121249f0f..f7d81af783 100644 +--- a/backends/hostmem-ram.c ++++ b/backends/hostmem-ram.c +@@ -30,6 +30,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) + name = host_memory_backend_get_name(backend); + ram_flags = backend->share ? RAM_SHARED : 0; + ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; ++ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; + return memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend), + name, backend->size, + ram_flags, errp); +diff --git a/backends/hostmem.c b/backends/hostmem.c +index 81a72ce40b..eb9682b4a8 100644 +--- a/backends/hostmem.c ++++ b/backends/hostmem.c +@@ -277,6 +277,7 @@ static void host_memory_backend_init(Object *obj) + /* TODO: convert access to globals to compat properties */ + backend->merge = machine_mem_merge(machine); + backend->dump = machine_dump_guest_core(machine); ++ backend->guest_memfd = machine_require_guest_memfd(machine); + backend->reserve = true; + backend->prealloc_threads = machine->smp.cpus; + } +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 92609aae27..07b994e136 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -1480,6 +1480,11 @@ bool machine_mem_merge(MachineState *machine) + return machine->mem_merge; + } + ++bool machine_require_guest_memfd(MachineState *machine) ++{ ++ return machine->require_guest_memfd; ++} ++ + static char *cpu_slot_to_string(const CPUArchId *cpu) + { + GString *s = g_string_new(NULL); +diff --git a/include/hw/boards.h b/include/hw/boards.h +index cca62f906b..815a1c4b26 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -36,6 +36,7 @@ bool machine_usb(MachineState *machine); + int machine_phandle_start(MachineState *machine); + bool machine_dump_guest_core(MachineState *machine); + bool machine_mem_merge(MachineState *machine); ++bool machine_require_guest_memfd(MachineState *machine); + HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine); + void machine_set_cpu_numa_node(MachineState *machine, + const CpuInstanceProperties *props, +@@ -372,6 +373,7 @@ struct MachineState { + char *dt_compatible; + bool dump_guest_core; + bool mem_merge; ++ bool require_guest_memfd; + bool usb; + bool usb_disabled; + char *firmware; +diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h +index 0e411aaa29..04b884bf42 100644 +--- a/include/sysemu/hostmem.h ++++ b/include/sysemu/hostmem.h +@@ -74,6 +74,7 @@ struct HostMemoryBackend { + uint64_t size; + bool merge, dump, use_canonical_path; + bool prealloc, is_mapped, share, reserve; ++ bool guest_memfd; + uint32_t prealloc_threads; + ThreadContext *prealloc_context; + DECLARE_BITMAP(host_nodes, MAX_NODES + 1); +-- +2.39.3 + diff --git a/SOURCES/kvm-Implement-SMBIOS-type-9-v2.6.patch b/SOURCES/kvm-Implement-SMBIOS-type-9-v2.6.patch deleted file mode 100644 index 31439d7..0000000 --- a/SOURCES/kvm-Implement-SMBIOS-type-9-v2.6.patch +++ /dev/null @@ -1,155 +0,0 @@ -From 5c639f8ce65183ce8e44ee8e0230e9d627a440d7 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Wed, 21 Feb 2024 17:00:27 +0000 -Subject: [PATCH 05/20] Implement SMBIOS type 9 v2.6 - -RH-Author: Igor Mammedov -RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS -RH-Jira: RHEL-21705 -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [3/18] ead230527d93938907a561cf5b985ee4f54d82b1 - -JIRA: https://issues.redhat.com/browse/RHEL-21705 -Author: Felix Wu - - Signed-off-by: Felix Wu - Signed-off-by: Nabih Estefan - Message-Id: <20240221170027.1027325-3-nabihestefan@google.com> - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Michael S. Tsirkin - -(cherry picked from commit 04f143d828845d0fd52dd4a52664d81a4f5431f7) -Signed-off-by: Igor Mammedov ---- - hw/smbios/smbios.c | 49 +++++++++++++++++++++++++++++++++--- - include/hw/firmware/smbios.h | 4 +++ - qemu-options.hx | 2 +- - 3 files changed, 51 insertions(+), 4 deletions(-) - -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 4f5637d445..074705fa4c 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -124,7 +124,7 @@ static QTAILQ_HEAD(, type8_instance) type8 = QTAILQ_HEAD_INITIALIZER(type8); - - /* type 9 instance for parsing */ - struct type9_instance { -- const char *slot_designation; -+ const char *slot_designation, *pcidev; - uint8_t slot_type, slot_data_bus_width, current_usage, slot_length, - slot_characteristics1, slot_characteristics2; - uint16_t slot_id; -@@ -427,6 +427,11 @@ static const QemuOptDesc qemu_smbios_type9_opts[] = { - .type = QEMU_OPT_NUMBER, - .help = "slot characteristics2, see the spec", - }, -+ { -+ .name = "pci_device", -+ .type = QEMU_OPT_STRING, -+ .help = "PCI device, if provided." -+ } - }; - - static const QemuOptDesc qemu_smbios_type11_opts[] = { -@@ -851,7 +856,7 @@ static void smbios_build_type_8_table(void) - } - } - --static void smbios_build_type_9_table(void) -+static void smbios_build_type_9_table(Error **errp) - { - unsigned instance = 0; - struct type9_instance *t9; -@@ -868,6 +873,43 @@ static void smbios_build_type_9_table(void) - t->slot_characteristics1 = t9->slot_characteristics1; - t->slot_characteristics2 = t9->slot_characteristics2; - -+ if (t9->pcidev) { -+ PCIDevice *pdev = NULL; -+ int rc = pci_qdev_find_device(t9->pcidev, &pdev); -+ if (rc != 0) { -+ error_setg(errp, -+ "No PCI device %s for SMBIOS type 9 entry %s", -+ t9->pcidev, t9->slot_designation); -+ return; -+ } -+ /* -+ * We only handle the case were the device is attached to -+ * the PCI root bus. The general case is more complex as -+ * bridges are enumerated later and the table would need -+ * to be updated at this moment. -+ */ -+ if (!pci_bus_is_root(pci_get_bus(pdev))) { -+ error_setg(errp, -+ "Cannot create type 9 entry for PCI device %s: " -+ "not attached to the root bus", -+ t9->pcidev); -+ return; -+ } -+ t->segment_group_number = cpu_to_le16(0); -+ t->bus_number = pci_dev_bus_num(pdev); -+ t->device_number = pdev->devfn; -+ } else { -+ /* -+ * Per SMBIOS spec, For slots that are not of the PCI, AGP, PCI-X, -+ * or PCI-Express type that do not have bus/device/function -+ * information, 0FFh should be populated in the fields of Segment -+ * Group Number, Bus Number, Device/Function Number. -+ */ -+ t->segment_group_number = 0xff; -+ t->bus_number = 0xff; -+ t->device_number = 0xff; -+ } -+ - SMBIOS_BUILD_TABLE_POST; - instance++; - } -@@ -1222,7 +1264,7 @@ void smbios_get_tables(MachineState *ms, - } - - smbios_build_type_8_table(); -- smbios_build_type_9_table(); -+ smbios_build_type_9_table(errp); - smbios_build_type_11_table(); - - #define MAX_DIMM_SZ (16 * GiB) -@@ -1568,6 +1610,7 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) - t->slot_id = qemu_opt_get_number(opts, "slot_id", 0); - t->slot_characteristics1 = qemu_opt_get_number(opts, "slot_characteristics1", 0); - t->slot_characteristics2 = qemu_opt_get_number(opts, "slot_characteristics2", 0); -+ save_opt(&t->pcidev, opts, "pcidev"); - QTAILQ_INSERT_TAIL(&type9, t, next); - return; - } -diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 6bbd5a4c20..f8dd07fe4c 100644 ---- a/include/hw/firmware/smbios.h -+++ b/include/hw/firmware/smbios.h -@@ -222,6 +222,10 @@ struct smbios_type_9 { - uint16_t slot_id; - uint8_t slot_characteristics1; - uint8_t slot_characteristics2; -+ /* SMBIOS spec v2.6+ */ -+ uint16_t segment_group_number; -+ uint8_t bus_number; -+ uint8_t device_number; - } QEMU_PACKED; - - /* SMBIOS type 11 - OEM strings */ -diff --git a/qemu-options.hx b/qemu-options.hx -index 94cacc2c63..93364e1765 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -2710,7 +2710,7 @@ SRST - ``-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str][,asset=str][,part=str][,processor-id=%d]`` - Specify SMBIOS type 4 fields - --``-smbios type=9[,slot_designation=str][,slot_type=%d][,slot_data_bus_width=%d][,current_usage=%d][,slot_length=%d][,slot_id=%d][,slot_characteristics1=%d][,slot_characteristics12=%d]`` -+``-smbios type=9[,slot_designation=str][,slot_type=%d][,slot_data_bus_width=%d][,current_usage=%d][,slot_length=%d][,slot_id=%d][,slot_characteristics1=%d][,slot_characteristics12=%d][,pci_device=str]`` - Specify SMBIOS type 9 fields - - ``-smbios type=11[,value=str][,path=filename]`` --- -2.39.3 - diff --git a/SOURCES/kvm-Implement-base-of-SMBIOS-type-9-descriptor.patch b/SOURCES/kvm-Implement-base-of-SMBIOS-type-9-descriptor.patch deleted file mode 100644 index 89466c7..0000000 --- a/SOURCES/kvm-Implement-base-of-SMBIOS-type-9-descriptor.patch +++ /dev/null @@ -1,218 +0,0 @@ -From 84fc607d678bd72397a41d706e91fa241fd97266 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Wed, 21 Feb 2024 17:00:26 +0000 -Subject: [PATCH 04/20] Implement base of SMBIOS type 9 descriptor. - -RH-Author: Igor Mammedov -RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS -RH-Jira: RHEL-21705 -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [2/18] 2678cc080bfbf3357fa2f94ceaf42fc61b690d32 - -JIRA: https://issues.redhat.com/browse/RHEL-21705 - -commit: 735eee07d1f963635d3c3bf9f5e4bf1bc000870e -Author: Felix Wu - - Version 2.1+. - - Signed-off-by: Felix Wu - Signed-off-by: Nabih Estefan - Message-Id: <20240221170027.1027325-2-nabihestefan@google.com> - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Michael S. Tsirkin - -Signed-off-by: Igor Mammedov ---- - hw/smbios/smbios.c | 99 ++++++++++++++++++++++++++++++++++++ - include/hw/firmware/smbios.h | 13 +++++ - qemu-options.hx | 3 ++ - 3 files changed, 115 insertions(+) - -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 7bde23e59d..4f5637d445 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -122,6 +122,16 @@ struct type8_instance { - }; - static QTAILQ_HEAD(, type8_instance) type8 = QTAILQ_HEAD_INITIALIZER(type8); - -+/* type 9 instance for parsing */ -+struct type9_instance { -+ const char *slot_designation; -+ uint8_t slot_type, slot_data_bus_width, current_usage, slot_length, -+ slot_characteristics1, slot_characteristics2; -+ uint16_t slot_id; -+ QTAILQ_ENTRY(type9_instance) next; -+}; -+static QTAILQ_HEAD(, type9_instance) type9 = QTAILQ_HEAD_INITIALIZER(type9); -+ - static struct { - size_t nvalues; - char **values; -@@ -371,6 +381,54 @@ static const QemuOptDesc qemu_smbios_type8_opts[] = { - }, - }; - -+static const QemuOptDesc qemu_smbios_type9_opts[] = { -+ { -+ .name = "type", -+ .type = QEMU_OPT_NUMBER, -+ .help = "SMBIOS element type", -+ }, -+ { -+ .name = "slot_designation", -+ .type = QEMU_OPT_STRING, -+ .help = "string number for reference designation", -+ }, -+ { -+ .name = "slot_type", -+ .type = QEMU_OPT_NUMBER, -+ .help = "connector type", -+ }, -+ { -+ .name = "slot_data_bus_width", -+ .type = QEMU_OPT_NUMBER, -+ .help = "port type", -+ }, -+ { -+ .name = "current_usage", -+ .type = QEMU_OPT_NUMBER, -+ .help = "current usage", -+ }, -+ { -+ .name = "slot_length", -+ .type = QEMU_OPT_NUMBER, -+ .help = "system slot length", -+ }, -+ { -+ .name = "slot_id", -+ .type = QEMU_OPT_NUMBER, -+ .help = "system slot id", -+ }, -+ { -+ .name = "slot_characteristics1", -+ .type = QEMU_OPT_NUMBER, -+ .help = "slot characteristics1, see the spec", -+ }, -+ { -+ .name = "slot_characteristics2", -+ .type = QEMU_OPT_NUMBER, -+ .help = "slot characteristics2, see the spec", -+ }, -+}; -+ - static const QemuOptDesc qemu_smbios_type11_opts[] = { - { - .name = "value", -@@ -594,6 +652,7 @@ bool smbios_skip_table(uint8_t type, bool required_table) - #define T2_BASE 0x200 - #define T3_BASE 0x300 - #define T4_BASE 0x400 -+#define T9_BASE 0x900 - #define T11_BASE 0xe00 - - #define T16_BASE 0x1000 -@@ -792,6 +851,28 @@ static void smbios_build_type_8_table(void) - } - } - -+static void smbios_build_type_9_table(void) -+{ -+ unsigned instance = 0; -+ struct type9_instance *t9; -+ -+ QTAILQ_FOREACH(t9, &type9, next) { -+ SMBIOS_BUILD_TABLE_PRE(9, T9_BASE + instance, true); -+ -+ SMBIOS_TABLE_SET_STR(9, slot_designation, t9->slot_designation); -+ t->slot_type = t9->slot_type; -+ t->slot_data_bus_width = t9->slot_data_bus_width; -+ t->current_usage = t9->current_usage; -+ t->slot_length = t9->slot_length; -+ t->slot_id = t9->slot_id; -+ t->slot_characteristics1 = t9->slot_characteristics1; -+ t->slot_characteristics2 = t9->slot_characteristics2; -+ -+ SMBIOS_BUILD_TABLE_POST; -+ instance++; -+ } -+} -+ - static void smbios_build_type_11_table(void) - { - char count_str[128]; -@@ -1141,6 +1222,7 @@ void smbios_get_tables(MachineState *ms, - } - - smbios_build_type_8_table(); -+ smbios_build_type_9_table(); - smbios_build_type_11_table(); - - #define MAX_DIMM_SZ (16 * GiB) -@@ -1472,6 +1554,23 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) - t8_i->port_type = qemu_opt_get_number(opts, "port_type", 0); - QTAILQ_INSERT_TAIL(&type8, t8_i, next); - return; -+ case 9: { -+ if (!qemu_opts_validate(opts, qemu_smbios_type9_opts, errp)) { -+ return; -+ } -+ struct type9_instance *t; -+ t = g_new0(struct type9_instance, 1); -+ save_opt(&t->slot_designation, opts, "slot_designation"); -+ t->slot_type = qemu_opt_get_number(opts, "slot_type", 0); -+ t->slot_data_bus_width = qemu_opt_get_number(opts, "slot_data_bus_width", 0); -+ t->current_usage = qemu_opt_get_number(opts, "current_usage", 0); -+ t->slot_length = qemu_opt_get_number(opts, "slot_length", 0); -+ t->slot_id = qemu_opt_get_number(opts, "slot_id", 0); -+ t->slot_characteristics1 = qemu_opt_get_number(opts, "slot_characteristics1", 0); -+ t->slot_characteristics2 = qemu_opt_get_number(opts, "slot_characteristics2", 0); -+ QTAILQ_INSERT_TAIL(&type9, t, next); -+ return; -+ } - case 11: - if (!qemu_opts_validate(opts, qemu_smbios_type11_opts, errp)) { - return; -diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index d24b3ccd32..6bbd5a4c20 100644 ---- a/include/hw/firmware/smbios.h -+++ b/include/hw/firmware/smbios.h -@@ -211,6 +211,19 @@ struct smbios_type_8 { - uint8_t port_type; - } QEMU_PACKED; - -+/* SMBIOS type 9 - System Slots (v2.1+) */ -+struct smbios_type_9 { -+ struct smbios_structure_header header; -+ uint8_t slot_designation; -+ uint8_t slot_type; -+ uint8_t slot_data_bus_width; -+ uint8_t current_usage; -+ uint8_t slot_length; -+ uint16_t slot_id; -+ uint8_t slot_characteristics1; -+ uint8_t slot_characteristics2; -+} QEMU_PACKED; -+ - /* SMBIOS type 11 - OEM strings */ - struct smbios_type_11 { - struct smbios_structure_header header; -diff --git a/qemu-options.hx b/qemu-options.hx -index 0814f43066..94cacc2c63 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -2710,6 +2710,9 @@ SRST - ``-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str][,asset=str][,part=str][,processor-id=%d]`` - Specify SMBIOS type 4 fields - -+``-smbios type=9[,slot_designation=str][,slot_type=%d][,slot_data_bus_width=%d][,current_usage=%d][,slot_length=%d][,slot_id=%d][,slot_characteristics1=%d][,slot_characteristics12=%d]`` -+ Specify SMBIOS type 9 fields -+ - ``-smbios type=11[,value=str][,path=filename]`` - Specify SMBIOS type 11 fields - --- -2.39.3 - diff --git a/SOURCES/kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch b/SOURCES/kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch new file mode 100644 index 0000000..aaedcf4 --- /dev/null +++ b/SOURCES/kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch @@ -0,0 +1,203 @@ +From c46ac3db0a4db60e667edeabc9ed451c6e8e0ccf Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 18 Mar 2024 14:41:33 -0400 +Subject: [PATCH 020/100] KVM: remove kvm_arch_cpu_check_are_resettable +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [20/91] d7745bd1a0ed1b215847f150f4a1bb2e912beabc (bonzini/rhel-qemu-kvm) + +Board reset requires writing a fresh CPU state. As far as KVM is +concerned, the only thing that blocks reset is that CPU state is +encrypted; therefore, kvm_cpus_are_resettable() can simply check +if that is the case. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Paolo Bonzini +(cherry picked from commit a99c0c66ebe7d8db3af6f16689ade9375247e43e) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-accel-ops.c | 2 +- + accel/kvm/kvm-all.c | 5 ----- + include/sysemu/kvm.h | 10 ---------- + target/arm/kvm.c | 5 ----- + target/i386/kvm/kvm.c | 5 ----- + target/loongarch/kvm/kvm.c | 5 ----- + target/mips/kvm.c | 5 ----- + target/ppc/kvm.c | 5 ----- + target/riscv/kvm/kvm-cpu.c | 5 ----- + target/s390x/kvm/kvm.c | 5 ----- + 10 files changed, 1 insertion(+), 51 deletions(-) + +diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c +index b3c946dc4b..74e3c5785b 100644 +--- a/accel/kvm/kvm-accel-ops.c ++++ b/accel/kvm/kvm-accel-ops.c +@@ -82,7 +82,7 @@ static bool kvm_vcpu_thread_is_idle(CPUState *cpu) + + static bool kvm_cpus_are_resettable(void) + { +- return !kvm_enabled() || kvm_cpu_check_are_resettable(); ++ return !kvm_enabled() || !kvm_state->guest_state_protected; + } + + #ifdef KVM_CAP_SET_GUEST_DEBUG +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index ec0f6df7c5..b51e09a583 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2696,11 +2696,6 @@ void kvm_flush_coalesced_mmio_buffer(void) + s->coalesced_flush_in_progress = false; + } + +-bool kvm_cpu_check_are_resettable(void) +-{ +- return kvm_arch_cpu_check_are_resettable(); +-} +- + static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) + { + if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 302e8f6f1e..54f4d83a37 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -525,16 +525,6 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target); + /* Notify resamplefd for EOI of specific interrupts. */ + void kvm_resample_fd_notify(int gsi); + +-/** +- * kvm_cpu_check_are_resettable - return whether CPUs can be reset +- * +- * Returns: true: CPUs are resettable +- * false: CPUs are not resettable +- */ +-bool kvm_cpu_check_are_resettable(void); +- +-bool kvm_arch_cpu_check_are_resettable(void); +- + bool kvm_dirty_ring_enabled(void); + + uint32_t kvm_dirty_ring_size(void); +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index ab85d628a8..21ebbf3b8f 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -1598,11 +1598,6 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) + return (data - 32) & 0xffff; + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + static void kvm_arch_get_eager_split_size(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index e271652620..a12207a8ee 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -5623,11 +5623,6 @@ bool kvm_has_waitpkg(void) + return has_msr_umwait; + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return !sev_es_enabled(); +-} +- + #define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 + + void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask) +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index d630cc39cb..8224d94333 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -733,11 +733,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs) + return true; + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + { + int ret = 0; +diff --git a/target/mips/kvm.c b/target/mips/kvm.c +index 6c52e59f55..a631ab544f 100644 +--- a/target/mips/kvm.c ++++ b/target/mips/kvm.c +@@ -1273,11 +1273,6 @@ int kvm_arch_get_default_type(MachineState *machine) + return -1; + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + void kvm_arch_accel_class_init(ObjectClass *oc) + { + } +diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c +index 59f640cf7b..9d9d9f0d79 100644 +--- a/target/ppc/kvm.c ++++ b/target/ppc/kvm.c +@@ -2968,11 +2968,6 @@ void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset) + } + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + void kvm_arch_accel_class_init(ObjectClass *oc) + { + } +diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c +index 6a6c6cae80..49d2f3ad58 100644 +--- a/target/riscv/kvm/kvm-cpu.c ++++ b/target/riscv/kvm/kvm-cpu.c +@@ -1475,11 +1475,6 @@ void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level) + } + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + static int aia_mode; + + static const char *kvm_aia_mode_str(uint64_t mode) +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index 55fb4855b1..4db59658e1 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -2630,11 +2630,6 @@ void kvm_s390_stop_interrupt(S390CPU *cpu) + kvm_s390_vcpu_interrupt(cpu, &irq); + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + int kvm_s390_get_zpci_op(void) + { + return cap_zpci_op; +-- +2.39.3 + diff --git a/SOURCES/kvm-KVM-track-whether-guest-state-is-encrypted.patch b/SOURCES/kvm-KVM-track-whether-guest-state-is-encrypted.patch new file mode 100644 index 0000000..7cdab60 --- /dev/null +++ b/SOURCES/kvm-KVM-track-whether-guest-state-is-encrypted.patch @@ -0,0 +1,127 @@ +From 50399796da938c4ea7c69058fde84695bce9d794 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 18 Mar 2024 14:41:10 -0400 +Subject: [PATCH 019/100] KVM: track whether guest state is encrypted +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [19/91] 685b9c54d43d0043d15c33d13afc3a420cbe139b (bonzini/rhel-qemu-kvm) + +So far, KVM has allowed KVM_GET/SET_* ioctls to execute even if the +guest state is encrypted, in which case they do nothing. For the new +API using VM types, instead, the ioctls will fail which is a safer and +more robust approach. + +The new API will be the only one available for SEV-SNP and TDX, but it +is also usable for SEV and SEV-ES. In preparation for that, require +architecture-specific KVM code to communicate the point at which guest +state is protected (which must be after kvm_cpu_synchronize_post_init(), +though that might change in the future in order to suppor migration). +From that point, skip reading registers so that cpu->vcpu_dirty is +never true: if it ever becomes true, kvm_arch_put_registers() will +fail miserably. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Paolo Bonzini +(cherry picked from commit 5c3131c392f84c660033d511ec39872d8beb4b1e) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 17 ++++++++++++++--- + include/sysemu/kvm.h | 2 ++ + include/sysemu/kvm_int.h | 1 + + target/i386/sev.c | 1 + + 4 files changed, 18 insertions(+), 3 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 931f74256e..ec0f6df7c5 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2703,7 +2703,7 @@ bool kvm_cpu_check_are_resettable(void) + + static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) + { +- if (!cpu->vcpu_dirty) { ++ if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { + int ret = kvm_arch_get_registers(cpu); + if (ret) { + error_report("Failed to get registers: %s", strerror(-ret)); +@@ -2717,7 +2717,7 @@ static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) + + void kvm_cpu_synchronize_state(CPUState *cpu) + { +- if (!cpu->vcpu_dirty) { ++ if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { + run_on_cpu(cpu, do_kvm_cpu_synchronize_state, RUN_ON_CPU_NULL); + } + } +@@ -2752,7 +2752,13 @@ static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) + + void kvm_cpu_synchronize_post_init(CPUState *cpu) + { +- run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); ++ if (!kvm_state->guest_state_protected) { ++ /* ++ * This runs before the machine_init_done notifiers, and is the last ++ * opportunity to synchronize the state of confidential guests. ++ */ ++ run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); ++ } + } + + static void do_kvm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) +@@ -4099,3 +4105,8 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp) + query_stats_schema_vcpu(first_cpu, &stats_args); + } + } ++ ++void kvm_mark_guest_state_protected(void) ++{ ++ kvm_state->guest_state_protected = true; ++} +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index fad9a7e8ff..302e8f6f1e 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -539,6 +539,8 @@ bool kvm_dirty_ring_enabled(void); + + uint32_t kvm_dirty_ring_size(void); + ++void kvm_mark_guest_state_protected(void); ++ + /** + * kvm_hwpoisoned_mem - indicate if there is any hwpoisoned page + * reported for the VM. +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index 882e37e12c..3496be7997 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -87,6 +87,7 @@ struct KVMState + bool kernel_irqchip_required; + OnOffAuto kernel_irqchip_split; + bool sync_mmu; ++ bool guest_state_protected; + uint64_t manual_dirty_log_protect; + /* The man page (and posix) say ioctl numbers are signed int, but + * they're not. Linux, glibc and *BSD all treat ioctl numbers as +diff --git a/target/i386/sev.c b/target/i386/sev.c +index b8f79d34d1..c49a8fd55e 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -755,6 +755,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + if (ret) { + exit(1); + } ++ kvm_mark_guest_state_protected(); + } + + /* query the measurement blob length */ +-- +2.39.3 + diff --git a/SOURCES/kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch b/SOURCES/kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch new file mode 100644 index 0000000..8e47872 --- /dev/null +++ b/SOURCES/kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch @@ -0,0 +1,329 @@ +From f4b01d645926faab2cab86fadb7398c26d6b8285 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:02 -0500 +Subject: [PATCH 028/100] RAMBlock: Add support of KVM private guest memfd + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [28/91] 95fdf196afcb67113834c20fa354ee1397411bfd (bonzini/rhel-qemu-kvm) + +Add KVM guest_memfd support to RAMBlock so both normal hva based memory +and kvm guest memfd based private memory can be associated in one RAMBlock. + +Introduce new flag RAM_GUEST_MEMFD. When it's set, it calls KVM ioctl to +create private guest_memfd during RAMBlock setup. + +Allocating a new RAM_GUEST_MEMFD flag to instruct the setup of guest memfd +is more flexible and extensible than simply relying on the VM type because +in the future we may have the case that not all the memory of a VM need +guest memfd. As a benefit, it also avoid getting MachineState in memory +subsystem. + +Note, RAM_GUEST_MEMFD is supposed to be set for memory backends of +confidential guests, such as TDX VM. How and when to set it for memory +backends will be implemented in the following patches. + +Introduce memory_region_has_guest_memfd() to query if the MemoryRegion has +KVM guest_memfd allocated. + +Signed-off-by: Xiaoyao Li +Reviewed-by: David Hildenbrand +Message-ID: <20240320083945.991426-7-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 15f7a80c49cb3637f62fa37fa4a17da913bd91ff) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 28 ++++++++++++++++++++++++++++ + accel/stubs/kvm-stub.c | 5 +++++ + include/exec/memory.h | 20 +++++++++++++++++--- + include/exec/ram_addr.h | 2 +- + include/exec/ramblock.h | 1 + + include/sysemu/kvm.h | 2 ++ + system/memory.c | 5 +++++ + system/physmem.c | 34 +++++++++++++++++++++++++++++++--- + 8 files changed, 90 insertions(+), 7 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 272e945f52..a7b9a127dd 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -92,6 +92,7 @@ static bool kvm_has_guest_debug; + static int kvm_sstep_flags; + static bool kvm_immediate_exit; + static uint64_t kvm_supported_memory_attributes; ++static bool kvm_guest_memfd_supported; + static hwaddr kvm_max_slot_size = ~0; + + static const KVMCapabilityInfo kvm_required_capabilites[] = { +@@ -2419,6 +2420,11 @@ static int kvm_init(MachineState *ms) + } + + kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES); ++ kvm_guest_memfd_supported = ++ kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) && ++ kvm_check_extension(s, KVM_CAP_USER_MEMORY2) && ++ (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE); ++ + kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); + s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); + +@@ -4138,3 +4144,25 @@ void kvm_mark_guest_state_protected(void) + { + kvm_state->guest_state_protected = true; + } ++ ++int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp) ++{ ++ int fd; ++ struct kvm_create_guest_memfd guest_memfd = { ++ .size = size, ++ .flags = flags, ++ }; ++ ++ if (!kvm_guest_memfd_supported) { ++ error_setg(errp, "KVM does not support guest_memfd"); ++ return -1; ++ } ++ ++ fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &guest_memfd); ++ if (fd < 0) { ++ error_setg_errno(errp, errno, "Error creating KVM guest_memfd"); ++ return -1; ++ } ++ ++ return fd; ++} +diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c +index ca38172884..8e0eb22e61 100644 +--- a/accel/stubs/kvm-stub.c ++++ b/accel/stubs/kvm-stub.c +@@ -129,3 +129,8 @@ bool kvm_hwpoisoned_mem(void) + { + return false; + } ++ ++int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp) ++{ ++ return -ENOSYS; ++} +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 8626a355b3..679a847685 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -243,6 +243,9 @@ typedef struct IOMMUTLBEvent { + /* RAM FD is opened read-only */ + #define RAM_READONLY_FD (1 << 11) + ++/* RAM can be private that has kvm guest memfd backend */ ++#define RAM_GUEST_MEMFD (1 << 12) ++ + static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, + IOMMUNotifierFlag flags, + hwaddr start, hwaddr end, +@@ -1307,7 +1310,8 @@ bool memory_region_init_ram_nomigrate(MemoryRegion *mr, + * @name: Region name, becomes part of RAMBlock name used in migration stream + * must be unique within any device + * @size: size of the region. +- * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE. ++ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE, ++ * RAM_GUEST_MEMFD. + * @errp: pointer to Error*, to store an error if it happens. + * + * Note that this function does not do anything to cause the data in the +@@ -1369,7 +1373,7 @@ bool memory_region_init_resizeable_ram(MemoryRegion *mr, + * (getpagesize()) will be used. + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, + * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, +- * RAM_READONLY_FD ++ * RAM_READONLY_FD, RAM_GUEST_MEMFD + * @path: the path in which to allocate the RAM. + * @offset: offset within the file referenced by path + * @errp: pointer to Error*, to store an error if it happens. +@@ -1399,7 +1403,7 @@ bool memory_region_init_ram_from_file(MemoryRegion *mr, + * @size: size of the region. + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, + * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, +- * RAM_READONLY_FD ++ * RAM_READONLY_FD, RAM_GUEST_MEMFD + * @fd: the fd to mmap. + * @offset: offset within the file referenced by fd + * @errp: pointer to Error*, to store an error if it happens. +@@ -1722,6 +1726,16 @@ static inline bool memory_region_is_romd(MemoryRegion *mr) + */ + bool memory_region_is_protected(MemoryRegion *mr); + ++/** ++ * memory_region_has_guest_memfd: check whether a memory region has guest_memfd ++ * associated ++ * ++ * Returns %true if a memory region's ram_block has valid guest_memfd assigned. ++ * ++ * @mr: the memory region being queried ++ */ ++bool memory_region_has_guest_memfd(MemoryRegion *mr); ++ + /** + * memory_region_get_iommu: check whether a memory region is an iommu + * +diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h +index de45ba7bc9..07c8f86375 100644 +--- a/include/exec/ram_addr.h ++++ b/include/exec/ram_addr.h +@@ -110,7 +110,7 @@ long qemu_maxrampagesize(void); + * @mr: the memory region where the ram block is + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, + * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, +- * RAM_READONLY_FD ++ * RAM_READONLY_FD, RAM_GUEST_MEMFD + * @mem_path or @fd: specify the backing file or device + * @offset: Offset into target file + * @errp: pointer to Error*, to store an error if it happens +diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h +index 848915ea5b..459c8917de 100644 +--- a/include/exec/ramblock.h ++++ b/include/exec/ramblock.h +@@ -41,6 +41,7 @@ struct RAMBlock { + QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; + int fd; + uint64_t fd_offset; ++ int guest_memfd; + size_t page_size; + /* dirty bitmap used during migration */ + unsigned long *bmap; +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index f114ff6986..9e4ab7ae89 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -537,6 +537,8 @@ void kvm_mark_guest_state_protected(void); + */ + bool kvm_hwpoisoned_mem(void); + ++int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp); ++ + int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); + int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); + +diff --git a/system/memory.c b/system/memory.c +index a229a79988..c756950c0c 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -1850,6 +1850,11 @@ bool memory_region_is_protected(MemoryRegion *mr) + return mr->ram && (mr->ram_block->flags & RAM_PROTECTED); + } + ++bool memory_region_has_guest_memfd(MemoryRegion *mr) ++{ ++ return mr->ram_block && mr->ram_block->guest_memfd >= 0; ++} ++ + uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) + { + uint8_t mask = mr->dirty_log_mask; +diff --git a/system/physmem.c b/system/physmem.c +index a4fe3d2bf8..f5dfa20e57 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -1808,6 +1808,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) + const bool shared = qemu_ram_is_shared(new_block); + RAMBlock *block; + RAMBlock *last_block = NULL; ++ bool free_on_error = false; + ram_addr_t old_ram_size, new_ram_size; + Error *err = NULL; + +@@ -1837,6 +1838,19 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) + return; + } + memory_try_enable_merging(new_block->host, new_block->max_length); ++ free_on_error = true; ++ } ++ } ++ ++ if (new_block->flags & RAM_GUEST_MEMFD) { ++ assert(kvm_enabled()); ++ assert(new_block->guest_memfd < 0); ++ ++ new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length, ++ 0, errp); ++ if (new_block->guest_memfd < 0) { ++ qemu_mutex_unlock_ramlist(); ++ goto out_free; + } + } + +@@ -1888,6 +1902,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) + ram_block_notify_add(new_block->host, new_block->used_length, + new_block->max_length); + } ++ return; ++ ++out_free: ++ if (free_on_error) { ++ qemu_anon_ram_free(new_block->host, new_block->max_length); ++ new_block->host = NULL; ++ } + } + + #ifdef CONFIG_POSIX +@@ -1902,7 +1923,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, + /* Just support these ram flags by now. */ + assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE | + RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY | +- RAM_READONLY_FD)) == 0); ++ RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0); + + if (xen_enabled()) { + error_setg(errp, "-mem-path not supported with Xen"); +@@ -1939,6 +1960,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, + new_block->used_length = size; + new_block->max_length = size; + new_block->flags = ram_flags; ++ new_block->guest_memfd = -1; + new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset, + errp); + if (!new_block->host) { +@@ -2018,7 +2040,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, + int align; + + assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC | +- RAM_NORESERVE)) == 0); ++ RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0); + assert(!host ^ (ram_flags & RAM_PREALLOC)); + + align = qemu_real_host_page_size(); +@@ -2033,6 +2055,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, + new_block->max_length = max_size; + assert(max_size >= size); + new_block->fd = -1; ++ new_block->guest_memfd = -1; + new_block->page_size = qemu_real_host_page_size(); + new_block->host = host; + new_block->flags = ram_flags; +@@ -2055,7 +2078,7 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, + RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, + MemoryRegion *mr, Error **errp) + { +- assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0); ++ assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0); + return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp); + } + +@@ -2083,6 +2106,11 @@ static void reclaim_ramblock(RAMBlock *block) + } else { + qemu_anon_ram_free(block->host, block->max_length); + } ++ ++ if (block->guest_memfd >= 0) { ++ close(block->guest_memfd); ++ } ++ + g_free(block); + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch b/SOURCES/kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch new file mode 100644 index 0000000..04a5fbf --- /dev/null +++ b/SOURCES/kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch @@ -0,0 +1,82 @@ +From bd289293604d6f33e9fb89196f0b19117ce81f89 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 20 Mar 2024 17:45:29 +0100 +Subject: [PATCH 032/100] RAMBlock: make guest_memfd require uncoordinated + discard + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [32/91] 0c005849026c334737b88cbd20a0ac237dfca37e (bonzini/rhel-qemu-kvm) + +Some subsystems like VFIO might disable ram block discard, but guest_memfd +uses discard operations to implement conversions between private and +shared memory. Because of this, sequences like the following can result +in stale IOMMU mappings: + +1. allocate shared page +2. convert page shared->private +3. discard shared page +4. convert page private->shared +5. allocate shared page +6. issue DMA operations against that shared page + +This is not a use-after-free, because after step 3 VFIO is still pinning +the page. However, DMA operations in step 6 will hit the old mapping +that was allocated in step 1. + +Address this by taking ram_block_discard_is_enabled() into account when +deciding whether or not to discard pages. + +Since kvm_convert_memory()/guest_memfd doesn't implement a +RamDiscardManager handler to convey and replay discard operations, +this is a case of uncoordinated discard, which is blocked/released +by ram_block_discard_require(). Interestingly, this function had +no use so far. + +Alternative approaches would be to block discard of shared pages, but +this would cause guests to consume twice the memory if they use VFIO; +or to implement a RamDiscardManager and only block uncoordinated +discard, i.e. use ram_block_coordinated_discard_require(). + +[Commit message mostly by Michael Roth ] + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 852f0048f3ea9f14de18eb279a99fccb6d250e8f) +Signed-off-by: Paolo Bonzini +--- + system/physmem.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/system/physmem.c b/system/physmem.c +index f5dfa20e57..5ebcf5be11 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -1846,6 +1846,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) + assert(kvm_enabled()); + assert(new_block->guest_memfd < 0); + ++ if (ram_block_discard_require(true) < 0) { ++ error_setg_errno(errp, errno, ++ "cannot set up private guest memory: discard currently blocked"); ++ error_append_hint(errp, "Are you using assigned devices?\n"); ++ goto out_free; ++ } ++ + new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length, + 0, errp); + if (new_block->guest_memfd < 0) { +@@ -2109,6 +2116,7 @@ static void reclaim_ramblock(RAMBlock *block) + + if (block->guest_memfd >= 0) { + close(block->guest_memfd); ++ ram_block_discard_require(false); + } + + g_free(block); +-- +2.39.3 + diff --git a/SOURCES/kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch b/SOURCES/kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch deleted file mode 100644 index bc0f15a..0000000 --- a/SOURCES/kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 2e0e4355b2d4edb66b7d8c198339e17940abd682 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Mon, 18 Mar 2024 13:03:19 +0000 -Subject: [PATCH 2/3] Revert "chardev/char-socket: Fix TLS io channels sending - too much data to the backend" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrangé -RH-MergeRequest: 233: Fix handling of TLS sessions in chardevs -RH-Jira: RHEL-24614 -RH-Acked-by: Thomas Huth -RH-Acked-by: Marc-André Lureau -RH-Commit: [2/3] 1cb3d72b86ced0f70a09dfa0d325ae8a85db1b2b (berrange/centos-src-qemu) - -This commit results in unexpected termination of the TLS connection. -When 'fd_can_read' returns 0, the code goes on to pass a zero length -buffer to qio_channel_read. The TLS impl calls into gnutls_recv() -with this zero length buffer, at which point GNUTLS returns an error -GNUTLS_E_INVALID_REQUEST. This is treated as fatal by QEMU's TLS code -resulting in the connection being torn down by the chardev. - -Simply skipping the qio_channel_read when the buffer length is zero -is also not satisfactory, as it results in a high CPU burn busy loop -massively slowing QEMU's functionality. - -The proper solution is to avoid tcp_chr_read being called at all -unless the frontend is able to accept more data. This will be done -in a followup commit. - -This reverts commit 462945cd22d2bcd233401ed3aa167d83a8e35b05 - -Reviewed-by: Thomas Huth -Signed-off-by: Daniel P. Berrangé -(cherry picked from commit e8ee827ffdb86ebbd5f5213a1f78123c25a90864) ---- - chardev/char-socket.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/chardev/char-socket.c b/chardev/char-socket.c -index f48d341ebc..51d0943fce 100644 ---- a/chardev/char-socket.c -+++ b/chardev/char-socket.c -@@ -492,9 +492,9 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) - s->max_size <= 0) { - return TRUE; - } -- len = tcp_chr_read_poll(opaque); -- if (len > sizeof(buf)) { -- len = sizeof(buf); -+ len = sizeof(buf); -+ if (len > s->max_size) { -+ len = s->max_size; - } - size = tcp_chr_recv(chr, (void *)buf, len); - if (size == 0 || (size == -1 && errno != EAGAIN)) { --- -2.39.3 - diff --git a/SOURCES/kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch b/SOURCES/kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch deleted file mode 100644 index 135afbe..0000000 --- a/SOURCES/kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch +++ /dev/null @@ -1,216 +0,0 @@ -From ab5a33d57b48e35388928e388bb6e6479bc77651 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Mon, 18 Mar 2024 17:08:30 +0000 -Subject: [PATCH 3/3] Revert "chardev: use a child source for qio input source" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrangé -RH-MergeRequest: 233: Fix handling of TLS sessions in chardevs -RH-Jira: RHEL-24614 -RH-Acked-by: Thomas Huth -RH-Acked-by: Marc-André Lureau -RH-Commit: [3/3] b58e6c19c2b11d5d28db31cf1386226fb01d195e (berrange/centos-src-qemu) - -This reverts commit a7077b8e354d90fec26c2921aa2dea85b90dff90, -and add comments to explain why child sources cannot be used. - -When a GSource is added as a child of another GSource, if its -'prepare' function indicates readiness, then the parent's -'prepare' function will never be run. The io_watch_poll_prepare -absolutely *must* be run on every iteration of the main loop, -to ensure that the chardev backend doesn't feed data to the -frontend that it is unable to consume. - -At the time a7077b8e354d90fec26c2921aa2dea85b90dff90 was made, -all the child GSource impls were relying on poll'ing an FD, -so their 'prepare' functions would never indicate readiness -ahead of poll() being invoked. So the buggy behaviour was -not noticed and lay dormant. - -Relatively recently the QIOChannelTLS impl introduced a -level 2 child GSource, which checks with GNUTLS whether it -has cached any data that was decoded but not yet consumed: - - commit ffda5db65aef42266a5053a4be34515106c4c7ee - Author: Antoine Damhet - Date: Tue Nov 15 15:23:29 2022 +0100 - - io/channel-tls: fix handling of bigger read buffers - - Since the TLS backend can read more data from the underlying QIOChannel - we introduce a minimal child GSource to notify if we still have more - data available to be read. - - Signed-off-by: Antoine Damhet - Signed-off-by: Charles Frey - Signed-off-by: Daniel P. Berrangé - -With this, it is now quite common for the 'prepare' function -on a QIOChannelTLS GSource to indicate immediate readiness, -bypassing the parent GSource 'prepare' function. IOW, the -critical 'io_watch_poll_prepare' is being skipped on some -iterations of the main loop. As a result chardev frontend -asserts are now being triggered as they are fed data they -are not ready to consume. - -A reproducer is as follows: - - * In terminal 1 run a GNUTLS *echo* server - - $ gnutls-serv --echo \ - --x509cafile ca-cert.pem \ - --x509keyfile server-key.pem \ - --x509certfile server-cert.pem \ - -p 9000 - - * In terminal 2 run a QEMU guest - - $ qemu-system-s390x \ - -nodefaults \ - -display none \ - -object tls-creds-x509,id=tls0,dir=$PWD,endpoint=client \ - -chardev socket,id=con0,host=localhost,port=9000,tls-creds=tls0 \ - -device sclpconsole,chardev=con0 \ - -hda Fedora-Cloud-Base-39-1.5.s390x.qcow2 - -After the previous patch revert, but before this patch revert, -this scenario will crash: - - qemu-system-s390x: ../hw/char/sclpconsole.c:73: chr_read: Assertion - `size <= SIZE_BUFFER_VT220 - scon->iov_data_len' failed. - -This assert indicates that 'tcp_chr_read' was called without -'tcp_chr_read_poll' having first been checked for ability to -receive more data - -QEMU's use of a 'prepare' function to create/delete another -GSource is rather a hack and not normally the kind of thing that -is expected to be done by a GSource. There is no mechanism to -force GLib to always run the 'prepare' function of a parent -GSource. The best option is to simply not use the child source -concept, and go back to the functional approach previously -relied on. - -Reviewed-by: Marc-André Lureau -Reviewed-by: Thomas Huth -Tested-by: Thomas Huth -Signed-off-by: Daniel P. Berrangé -(cherry picked from commit 038b4217884c6f297278bb1ec6f0463c6c8221de) ---- - chardev/char-io.c | 56 ++++++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 51 insertions(+), 5 deletions(-) - -diff --git a/chardev/char-io.c b/chardev/char-io.c -index 4451128cba..dab77b112e 100644 ---- a/chardev/char-io.c -+++ b/chardev/char-io.c -@@ -33,6 +33,7 @@ typedef struct IOWatchPoll { - IOCanReadHandler *fd_can_read; - GSourceFunc fd_read; - void *opaque; -+ GMainContext *context; - } IOWatchPoll; - - static IOWatchPoll *io_watch_poll_from_source(GSource *source) -@@ -50,28 +51,59 @@ static gboolean io_watch_poll_prepare(GSource *source, - return FALSE; - } - -+ /* -+ * We do not register the QIOChannel watch as a child GSource. -+ * The 'prepare' function on the parent GSource will be -+ * skipped if a child GSource's 'prepare' function indicates -+ * readiness. We need this prepare function be guaranteed -+ * to run on *every* iteration of the main loop, because -+ * it is critical to ensure we remove the QIOChannel watch -+ * if 'fd_can_read' indicates the frontend cannot receive -+ * more data. -+ */ - if (now_active) { - iwp->src = qio_channel_create_watch( - iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL); - g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL); -- g_source_add_child_source(source, iwp->src); -- g_source_unref(iwp->src); -+ g_source_attach(iwp->src, iwp->context); - } else { -- g_source_remove_child_source(source, iwp->src); -+ g_source_destroy(iwp->src); -+ g_source_unref(iwp->src); - iwp->src = NULL; - } - return FALSE; - } - -+static gboolean io_watch_poll_check(GSource *source) -+{ -+ return FALSE; -+} -+ - static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback, - gpointer user_data) - { -- return G_SOURCE_CONTINUE; -+ abort(); -+} -+ -+static void io_watch_poll_finalize(GSource *source) -+{ -+ /* -+ * Due to a glib bug, removing the last reference to a source -+ * inside a finalize callback causes recursive locking (and a -+ * deadlock). This is not a problem inside other callbacks, -+ * including dispatch callbacks, so we call io_remove_watch_poll -+ * to remove this source. At this point, iwp->src must -+ * be NULL, or we would leak it. -+ */ -+ IOWatchPoll *iwp = io_watch_poll_from_source(source); -+ assert(iwp->src == NULL); - } - - static GSourceFuncs io_watch_poll_funcs = { - .prepare = io_watch_poll_prepare, -+ .check = io_watch_poll_check, - .dispatch = io_watch_poll_dispatch, -+ .finalize = io_watch_poll_finalize, - }; - - GSource *io_add_watch_poll(Chardev *chr, -@@ -91,6 +123,7 @@ GSource *io_add_watch_poll(Chardev *chr, - iwp->ioc = ioc; - iwp->fd_read = (GSourceFunc) fd_read; - iwp->src = NULL; -+ iwp->context = context; - - name = g_strdup_printf("chardev-iowatch-%s", chr->label); - g_source_set_name((GSource *)iwp, name); -@@ -101,10 +134,23 @@ GSource *io_add_watch_poll(Chardev *chr, - return (GSource *)iwp; - } - -+static void io_remove_watch_poll(GSource *source) -+{ -+ IOWatchPoll *iwp; -+ -+ iwp = io_watch_poll_from_source(source); -+ if (iwp->src) { -+ g_source_destroy(iwp->src); -+ g_source_unref(iwp->src); -+ iwp->src = NULL; -+ } -+ g_source_destroy(&iwp->parent); -+} -+ - void remove_fd_in_watch(Chardev *chr) - { - if (chr->gsource) { -- g_source_destroy(chr->gsource); -+ io_remove_watch_poll(chr->gsource); - chr->gsource = NULL; - } - } --- -2.39.3 - diff --git a/SOURCES/kvm-Revert-monitor-use-aio_co_reschedule_self.patch b/SOURCES/kvm-Revert-monitor-use-aio_co_reschedule_self.patch new file mode 100644 index 0000000..32e792e --- /dev/null +++ b/SOURCES/kvm-Revert-monitor-use-aio_co_reschedule_self.patch @@ -0,0 +1,67 @@ +From d4e6f7105b00ba2536d5d733b7c03116f28ce116 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 6 May 2024 15:06:21 -0400 +Subject: [PATCH 2/5] Revert "monitor: use aio_co_reschedule_self()" + +RH-Author: Kevin Wolf +RH-MergeRequest: 248: Revert "monitor: use aio_co_reschedule_self()" +RH-Jira: RHEL-34618 RHEL-38697 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/2] b6a2ebd4a69dbcd2bd56c61e7c747f8f8f42337e (kmwolf/centos-qemu-kvm) + +Commit 1f25c172f837 ("monitor: use aio_co_reschedule_self()") was a code +cleanup that uses aio_co_reschedule_self() instead of open coding +coroutine rescheduling. + +Bug RHEL-34618 was reported and Kevin Wolf identified +the root cause. I missed that aio_co_reschedule_self() -> +qemu_get_current_aio_context() only knows about +qemu_aio_context/IOThread AioContexts and not about iohandler_ctx. It +does not function correctly when going back from the iohandler_ctx to +qemu_aio_context. + +Go back to open coding the AioContext transitions to avoid this bug. + +This reverts commit 1f25c172f83704e350c0829438d832384084a74d. + +Cc: qemu-stable@nongnu.org +Buglink: https://issues.redhat.com/browse/RHEL-34618 +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240506190622.56095-2-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 719c6819ed9a9838520fa732f9861918dc693bda) +Signed-off-by: Kevin Wolf +--- + qapi/qmp-dispatch.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c +index f3488afeef..176b549473 100644 +--- a/qapi/qmp-dispatch.c ++++ b/qapi/qmp-dispatch.c +@@ -212,7 +212,8 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ + * executing the command handler so that it can make progress if it + * involves an AIO_WAIT_WHILE(). + */ +- aio_co_reschedule_self(qemu_get_aio_context()); ++ aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self()); ++ qemu_coroutine_yield(); + } + + monitor_set_cur(qemu_coroutine_self(), cur_mon); +@@ -226,7 +227,9 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ + * Move back to iohandler_ctx so that nested event loops for + * qemu_aio_context don't start new monitor commands. + */ +- aio_co_reschedule_self(iohandler_get_aio_context()); ++ aio_co_schedule(iohandler_get_aio_context(), ++ qemu_coroutine_self()); ++ qemu_coroutine_yield(); + } + } else { + /* +-- +2.39.3 + diff --git a/SOURCES/kvm-Revert-x86-rhel-9.4.0-machine-type-compat-fix.patch b/SOURCES/kvm-Revert-x86-rhel-9.4.0-machine-type-compat-fix.patch new file mode 100644 index 0000000..96756df --- /dev/null +++ b/SOURCES/kvm-Revert-x86-rhel-9.4.0-machine-type-compat-fix.patch @@ -0,0 +1,38 @@ +From bcbc897cb19b3a6523de611f48f6bac6cea16c97 Mon Sep 17 00:00:00 2001 +From: Sebastian Ott +Date: Thu, 2 May 2024 13:17:03 +0200 +Subject: [PATCH 2/2] Revert "x86: rhel 9.4.0 machine type compat fix" + +RH-Author: Sebastian Ott +RH-MergeRequest: 237: Revert "x86: rhel 9.4.0 machine type compat fix" +RH-Jira: RHEL-30362 +RH-Acked-by: Ani Sinha +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] 858ec153e65e96c39ca4db17ed93fd58c77dc2eb (seott1/cos-qemu-kvm) + +This reverts commit c46e44f0f4e861fe412ce679b0b0204881c1c2f5. + +pc-q35-rhel9.4.0 and newer should stay with SMBIOS_ENTRY_POINT_TYPE_AUTO. + +Signed-off-by: Sebastian Ott +--- + hw/i386/pc_q35.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 2f11f9af7d..2b54944c0f 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -734,9 +734,6 @@ static void pc_q35_machine_rhel940_options(MachineClass *m) + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.4.0"; + +- /* From pc_q35_8_2_machine_options() - use SMBIOS 3.X by default */ +- pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64; +- + compat_props_add(m->compat_props, hw_compat_rhel_9_5, + hw_compat_rhel_9_5_len); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch b/SOURCES/kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch deleted file mode 100644 index f30b81f..0000000 --- a/SOURCES/kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 6b5cfed21e20b372090046a934387255ff4bda58 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:01 -0500 -Subject: [PATCH 084/101] aio: make aio_context_acquire()/aio_context_release() - a no-op - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [15/26] 723dcada900aaf08862e8221921be22506b561a8 (kmwolf/centos-qemu-kvm) - -aio_context_acquire()/aio_context_release() has been replaced by -fine-grained locking to protect state shared by multiple threads. The -AioContext lock still plays the role of balancing locking in -AIO_WAIT_WHILE() and many functions in QEMU either require that the -AioContext lock is held or not held for this reason. In other words, the -AioContext lock is purely there for consistency with itself and serves -no real purpose anymore. - -Stop actually acquiring/releasing the lock in -aio_context_acquire()/aio_context_release() so that subsequent patches -can remove callers across the codebase incrementally. - -I have performed "make check" and qemu-iotests stress tests across -x86-64, ppc64le, and aarch64 to confirm that there are no failures as a -result of eliminating the lock. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Acked-by: Kevin Wolf -Message-ID: <20231205182011.1976568-5-stefanha@redhat.com> -Signed-off-by: Kevin Wolf ---- - util/async.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/util/async.c b/util/async.c -index 8f90ddc304..04ee83d220 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -725,12 +725,12 @@ void aio_context_unref(AioContext *ctx) - - void aio_context_acquire(AioContext *ctx) - { -- qemu_rec_mutex_lock(&ctx->lock); -+ /* TODO remove this function */ - } - - void aio_context_release(AioContext *ctx) - { -- qemu_rec_mutex_unlock(&ctx->lock); -+ /* TODO remove this function */ - } - - QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext) --- -2.39.3 - diff --git a/SOURCES/kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch b/SOURCES/kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch deleted file mode 100644 index a64e246..0000000 --- a/SOURCES/kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 14913d8970090c8914dc19dad14f3b9f91985ec3 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:07 -0500 -Subject: [PATCH 090/101] aio: remove - aio_context_acquire()/aio_context_release() API - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [21/26] 4b6d4afcac79d3248a6722b063b5fc777dc418df (kmwolf/centos-qemu-kvm) - -Delete these functions because nothing calls these functions anymore. - -I introduced these APIs in commit 98563fc3ec44 ("aio: add -aio_context_acquire() and aio_context_release()") in 2014. It's with a -sigh of relief that I delete these APIs almost 10 years later. - -Thanks to Paolo Bonzini's vision for multi-queue QEMU, we got an -understanding of where the code needed to go in order to remove the -limitations that the original dataplane and the IOThread/AioContext -approach that followed it. - -Emanuele Giuseppe Esposito had the splendid determination to convert -large parts of the codebase so that they no longer needed the AioContext -lock. This was a painstaking process, both in the actual code changes -required and the iterations of code review that Emanuele eked out of -Kevin and me over many months. - -Kevin Wolf tackled multitudes of graph locking conversions to protect -in-flight I/O from run-time changes to the block graph as well as the -clang Thread Safety Analysis annotations that allow the compiler to -check whether the graph lock is being used correctly. - -And me, well, I'm just here to add some pizzazz to the QEMU multi-queue -block layer :). Thank you to everyone who helped with this effort, -including Eric Blake, code reviewer extraordinaire, and others who I've -forgotten to mention. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Message-ID: <20231205182011.1976568-11-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - include/block/aio.h | 17 ----------------- - util/async.c | 10 ---------- - 2 files changed, 27 deletions(-) - -diff --git a/include/block/aio.h b/include/block/aio.h -index f08b358077..af05512a7d 100644 ---- a/include/block/aio.h -+++ b/include/block/aio.h -@@ -278,23 +278,6 @@ void aio_context_ref(AioContext *ctx); - */ - void aio_context_unref(AioContext *ctx); - --/* Take ownership of the AioContext. If the AioContext will be shared between -- * threads, and a thread does not want to be interrupted, it will have to -- * take ownership around calls to aio_poll(). Otherwise, aio_poll() -- * automatically takes care of calling aio_context_acquire and -- * aio_context_release. -- * -- * Note that this is separate from bdrv_drained_begin/bdrv_drained_end. A -- * thread still has to call those to avoid being interrupted by the guest. -- * -- * Bottom halves, timers and callbacks can be created or removed without -- * acquiring the AioContext. -- */ --void aio_context_acquire(AioContext *ctx); -- --/* Relinquish ownership of the AioContext. */ --void aio_context_release(AioContext *ctx); -- - /** - * aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will - * run only once and as soon as possible. -diff --git a/util/async.c b/util/async.c -index dfd44ef612..460529057c 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -719,16 +719,6 @@ void aio_context_unref(AioContext *ctx) - g_source_unref(&ctx->source); - } - --void aio_context_acquire(AioContext *ctx) --{ -- /* TODO remove this function */ --} -- --void aio_context_release(AioContext *ctx) --{ -- /* TODO remove this function */ --} -- - QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext) - - AioContext *qemu_get_current_aio_context(void) --- -2.39.3 - diff --git a/SOURCES/kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch b/SOURCES/kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch deleted file mode 100644 index 7f95b67..0000000 --- a/SOURCES/kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch +++ /dev/null @@ -1,81 +0,0 @@ -From e1e2f3972065c4b5d6fcf37e0e1c4fb92a0d5260 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:06 -0500 -Subject: [PATCH 089/101] aio-wait: draw equivalence between AIO_WAIT_WHILE() - and AIO_WAIT_WHILE_UNLOCKED() - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [20/26] 20e49777869714c99769263103f1b0c2c370cfcd (kmwolf/centos-qemu-kvm) - -Now that the AioContext lock no longer exists, AIO_WAIT_WHILE() and -AIO_WAIT_WHILE_UNLOCKED() are equivalent. - -A future patch will get rid of AIO_WAIT_WHILE_UNLOCKED(). - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Message-ID: <20231205182011.1976568-10-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - include/block/aio-wait.h | 16 ++++------------ - 1 file changed, 4 insertions(+), 12 deletions(-) - -diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h -index 5449b6d742..157f105916 100644 ---- a/include/block/aio-wait.h -+++ b/include/block/aio-wait.h -@@ -63,9 +63,6 @@ extern AioWait global_aio_wait; - * @ctx: the aio context, or NULL if multiple aio contexts (for which the - * caller does not hold a lock) are involved in the polling condition. - * @cond: wait while this conditional expression is true -- * @unlock: whether to unlock and then lock again @ctx. This applies -- * only when waiting for another AioContext from the main loop. -- * Otherwise it's ignored. - * - * Wait while a condition is true. Use this to implement synchronous - * operations that require event loop activity. -@@ -78,7 +75,7 @@ extern AioWait global_aio_wait; - * wait on conditions between two IOThreads since that could lead to deadlock, - * go via the main loop instead. - */ --#define AIO_WAIT_WHILE_INTERNAL(ctx, cond, unlock) ({ \ -+#define AIO_WAIT_WHILE_INTERNAL(ctx, cond) ({ \ - bool waited_ = false; \ - AioWait *wait_ = &global_aio_wait; \ - AioContext *ctx_ = (ctx); \ -@@ -95,13 +92,7 @@ extern AioWait global_aio_wait; - assert(qemu_get_current_aio_context() == \ - qemu_get_aio_context()); \ - while ((cond)) { \ -- if (unlock && ctx_) { \ -- aio_context_release(ctx_); \ -- } \ - aio_poll(qemu_get_aio_context(), true); \ -- if (unlock && ctx_) { \ -- aio_context_acquire(ctx_); \ -- } \ - waited_ = true; \ - } \ - } \ -@@ -109,10 +100,11 @@ extern AioWait global_aio_wait; - waited_; }) - - #define AIO_WAIT_WHILE(ctx, cond) \ -- AIO_WAIT_WHILE_INTERNAL(ctx, cond, true) -+ AIO_WAIT_WHILE_INTERNAL(ctx, cond) - -+/* TODO replace this with AIO_WAIT_WHILE() in a future patch */ - #define AIO_WAIT_WHILE_UNLOCKED(ctx, cond) \ -- AIO_WAIT_WHILE_INTERNAL(ctx, cond, false) -+ AIO_WAIT_WHILE_INTERNAL(ctx, cond) - - /** - * aio_wait_kick: --- -2.39.3 - diff --git a/SOURCES/kvm-aio-warn-about-iohandler_ctx-special-casing.patch b/SOURCES/kvm-aio-warn-about-iohandler_ctx-special-casing.patch new file mode 100644 index 0000000..a0d9d31 --- /dev/null +++ b/SOURCES/kvm-aio-warn-about-iohandler_ctx-special-casing.patch @@ -0,0 +1,64 @@ +From 0e3934e89ad1dda21681f64ff38da69b07d1b531 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 6 May 2024 15:06:22 -0400 +Subject: [PATCH 3/5] aio: warn about iohandler_ctx special casing + +RH-Author: Kevin Wolf +RH-MergeRequest: 248: Revert "monitor: use aio_co_reschedule_self()" +RH-Jira: RHEL-34618 RHEL-38697 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/2] cc316d70b2c187ee0412d6560ca1a03e381a69c1 (kmwolf/centos-qemu-kvm) + +The main loop has two AioContexts: qemu_aio_context and iohandler_ctx. +The main loop runs them both, but nested aio_poll() calls on +qemu_aio_context exclude iohandler_ctx. + +Which one should qemu_get_current_aio_context() return when called from +the main loop? Document that it's always qemu_aio_context. + +This has subtle effects on functions that use +qemu_get_current_aio_context(). For example, aio_co_reschedule_self() +does not work when moving from iohandler_ctx to qemu_aio_context because +qemu_get_current_aio_context() does not differentiate these two +AioContexts. + +Document this in order to reduce the chance of future bugs. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240506190622.56095-3-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit e669e800fc9ef8806af5c5578249ab758a4f8a5a) +Signed-off-by: Kevin Wolf +--- + include/block/aio.h | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/include/block/aio.h b/include/block/aio.h +index 8378553eb9..4ee81936ed 100644 +--- a/include/block/aio.h ++++ b/include/block/aio.h +@@ -629,6 +629,9 @@ void aio_co_schedule(AioContext *ctx, Coroutine *co); + * + * Move the currently running coroutine to new_ctx. If the coroutine is already + * running in new_ctx, do nothing. ++ * ++ * Note that this function cannot reschedule from iohandler_ctx to ++ * qemu_aio_context. + */ + void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx); + +@@ -661,6 +664,9 @@ void aio_co_enter(AioContext *ctx, Coroutine *co); + * If called from an IOThread this will be the IOThread's AioContext. If + * called from the main thread or with the "big QEMU lock" taken it + * will be the main loop AioContext. ++ * ++ * Note that the return value is never the main loop's iohandler_ctx and the ++ * return value is the main loop AioContext instead. + */ + AioContext *qemu_get_current_aio_context(void); + +-- +2.39.3 + diff --git a/SOURCES/kvm-backends-iommufd-Introduce-the-iommufd-object.patch b/SOURCES/kvm-backends-iommufd-Introduce-the-iommufd-object.patch deleted file mode 100644 index 898e35b..0000000 --- a/SOURCES/kvm-backends-iommufd-Introduce-the-iommufd-object.patch +++ /dev/null @@ -1,476 +0,0 @@ -From 0d8255c98b3ef6f603ff0279592d3e91de26de0e Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 21 Nov 2023 16:44:00 +0800 -Subject: [PATCH 021/101] backends/iommufd: Introduce the iommufd object -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [20/67] 8a56344ab4a2126f248bfa492ccddd19265f39be (eauger1/centos-qemu-kvm) - -Introduce an iommufd object which allows the interaction -with the host /dev/iommu device. - -The /dev/iommu can have been already pre-opened outside of qemu, -in which case the fd can be passed directly along with the -iommufd object: - -This allows the iommufd object to be shared accross several -subsystems (VFIO, VDPA, ...). For example, libvirt would open -the /dev/iommu once. - -If no fd is passed along with the iommufd object, the /dev/iommu -is opened by the qemu code. - -Suggested-by: Alex Williamson -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 6e6d8ac62b5b38dc9d4b69ffdf073f0a0b43b7be) -Signed-off-by: Eric Auger ---- - MAINTAINERS | 8 ++ - backends/Kconfig | 4 + - backends/iommufd.c | 245 +++++++++++++++++++++++++++++++++++++++ - backends/meson.build | 1 + - backends/trace-events | 10 ++ - include/sysemu/iommufd.h | 38 ++++++ - qapi/qom.json | 19 +++ - qemu-options.hx | 12 ++ - 8 files changed, 337 insertions(+) - create mode 100644 backends/iommufd.c - create mode 100644 include/sysemu/iommufd.h - -diff --git a/MAINTAINERS b/MAINTAINERS -index 695e0bd34f..a5a446914a 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -2167,6 +2167,14 @@ F: hw/vfio/ap.c - F: docs/system/s390x/vfio-ap.rst - L: qemu-s390x@nongnu.org - -+iommufd -+M: Yi Liu -+M: Eric Auger -+M: Zhenzhong Duan -+S: Supported -+F: backends/iommufd.c -+F: include/sysemu/iommufd.h -+ - vhost - M: Michael S. Tsirkin - S: Supported -diff --git a/backends/Kconfig b/backends/Kconfig -index f35abc1609..2cb23f62fa 100644 ---- a/backends/Kconfig -+++ b/backends/Kconfig -@@ -1 +1,5 @@ - source tpm/Kconfig -+ -+config IOMMUFD -+ bool -+ depends on VFIO -diff --git a/backends/iommufd.c b/backends/iommufd.c -new file mode 100644 -index 0000000000..ba58a0eb0d ---- /dev/null -+++ b/backends/iommufd.c -@@ -0,0 +1,245 @@ -+/* -+ * iommufd container backend -+ * -+ * Copyright (C) 2023 Intel Corporation. -+ * Copyright Red Hat, Inc. 2023 -+ * -+ * Authors: Yi Liu -+ * Eric Auger -+ * -+ * SPDX-License-Identifier: GPL-2.0-or-later -+ */ -+ -+#include "qemu/osdep.h" -+#include "sysemu/iommufd.h" -+#include "qapi/error.h" -+#include "qapi/qmp/qerror.h" -+#include "qemu/module.h" -+#include "qom/object_interfaces.h" -+#include "qemu/error-report.h" -+#include "monitor/monitor.h" -+#include "trace.h" -+#include -+#include -+ -+static void iommufd_backend_init(Object *obj) -+{ -+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); -+ -+ be->fd = -1; -+ be->users = 0; -+ be->owned = true; -+ qemu_mutex_init(&be->lock); -+} -+ -+static void iommufd_backend_finalize(Object *obj) -+{ -+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); -+ -+ if (be->owned) { -+ close(be->fd); -+ be->fd = -1; -+ } -+} -+ -+static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp) -+{ -+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); -+ int fd = -1; -+ -+ fd = monitor_fd_param(monitor_cur(), str, errp); -+ if (fd == -1) { -+ error_prepend(errp, "Could not parse remote object fd %s:", str); -+ return; -+ } -+ qemu_mutex_lock(&be->lock); -+ be->fd = fd; -+ be->owned = false; -+ qemu_mutex_unlock(&be->lock); -+ trace_iommu_backend_set_fd(be->fd); -+} -+ -+static bool iommufd_backend_can_be_deleted(UserCreatable *uc) -+{ -+ IOMMUFDBackend *be = IOMMUFD_BACKEND(uc); -+ -+ return !be->users; -+} -+ -+static void iommufd_backend_class_init(ObjectClass *oc, void *data) -+{ -+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); -+ -+ ucc->can_be_deleted = iommufd_backend_can_be_deleted; -+ -+ object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd); -+} -+ -+int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) -+{ -+ int fd, ret = 0; -+ -+ qemu_mutex_lock(&be->lock); -+ if (be->users == UINT32_MAX) { -+ error_setg(errp, "too many connections"); -+ ret = -E2BIG; -+ goto out; -+ } -+ if (be->owned && !be->users) { -+ fd = qemu_open_old("/dev/iommu", O_RDWR); -+ if (fd < 0) { -+ error_setg_errno(errp, errno, "/dev/iommu opening failed"); -+ ret = fd; -+ goto out; -+ } -+ be->fd = fd; -+ } -+ be->users++; -+out: -+ trace_iommufd_backend_connect(be->fd, be->owned, -+ be->users, ret); -+ qemu_mutex_unlock(&be->lock); -+ return ret; -+} -+ -+void iommufd_backend_disconnect(IOMMUFDBackend *be) -+{ -+ qemu_mutex_lock(&be->lock); -+ if (!be->users) { -+ goto out; -+ } -+ be->users--; -+ if (!be->users && be->owned) { -+ close(be->fd); -+ be->fd = -1; -+ } -+out: -+ trace_iommufd_backend_disconnect(be->fd, be->users); -+ qemu_mutex_unlock(&be->lock); -+} -+ -+int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, -+ Error **errp) -+{ -+ int ret, fd = be->fd; -+ struct iommu_ioas_alloc alloc_data = { -+ .size = sizeof(alloc_data), -+ .flags = 0, -+ }; -+ -+ ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data); -+ if (ret) { -+ error_setg_errno(errp, errno, "Failed to allocate ioas"); -+ return ret; -+ } -+ -+ *ioas_id = alloc_data.out_ioas_id; -+ trace_iommufd_backend_alloc_ioas(fd, *ioas_id, ret); -+ -+ return ret; -+} -+ -+void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id) -+{ -+ int ret, fd = be->fd; -+ struct iommu_destroy des = { -+ .size = sizeof(des), -+ .id = id, -+ }; -+ -+ ret = ioctl(fd, IOMMU_DESTROY, &des); -+ trace_iommufd_backend_free_id(fd, id, ret); -+ if (ret) { -+ error_report("Failed to free id: %u %m", id); -+ } -+} -+ -+int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, -+ ram_addr_t size, void *vaddr, bool readonly) -+{ -+ int ret, fd = be->fd; -+ struct iommu_ioas_map map = { -+ .size = sizeof(map), -+ .flags = IOMMU_IOAS_MAP_READABLE | -+ IOMMU_IOAS_MAP_FIXED_IOVA, -+ .ioas_id = ioas_id, -+ .__reserved = 0, -+ .user_va = (uintptr_t)vaddr, -+ .iova = iova, -+ .length = size, -+ }; -+ -+ if (!readonly) { -+ map.flags |= IOMMU_IOAS_MAP_WRITEABLE; -+ } -+ -+ ret = ioctl(fd, IOMMU_IOAS_MAP, &map); -+ trace_iommufd_backend_map_dma(fd, ioas_id, iova, size, -+ vaddr, readonly, ret); -+ if (ret) { -+ ret = -errno; -+ -+ /* TODO: Not support mapping hardware PCI BAR region for now. */ -+ if (errno == EFAULT) { -+ warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?"); -+ } else { -+ error_report("IOMMU_IOAS_MAP failed: %m"); -+ } -+ } -+ return ret; -+} -+ -+int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, -+ hwaddr iova, ram_addr_t size) -+{ -+ int ret, fd = be->fd; -+ struct iommu_ioas_unmap unmap = { -+ .size = sizeof(unmap), -+ .ioas_id = ioas_id, -+ .iova = iova, -+ .length = size, -+ }; -+ -+ ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap); -+ /* -+ * IOMMUFD takes mapping as some kind of object, unmapping -+ * nonexistent mapping is treated as deleting a nonexistent -+ * object and return ENOENT. This is different from legacy -+ * backend which allows it. vIOMMU may trigger a lot of -+ * redundant unmapping, to avoid flush the log, treat them -+ * as succeess for IOMMUFD just like legacy backend. -+ */ -+ if (ret && errno == ENOENT) { -+ trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret); -+ ret = 0; -+ } else { -+ trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret); -+ } -+ -+ if (ret) { -+ ret = -errno; -+ error_report("IOMMU_IOAS_UNMAP failed: %m"); -+ } -+ return ret; -+} -+ -+static const TypeInfo iommufd_backend_info = { -+ .name = TYPE_IOMMUFD_BACKEND, -+ .parent = TYPE_OBJECT, -+ .instance_size = sizeof(IOMMUFDBackend), -+ .instance_init = iommufd_backend_init, -+ .instance_finalize = iommufd_backend_finalize, -+ .class_size = sizeof(IOMMUFDBackendClass), -+ .class_init = iommufd_backend_class_init, -+ .interfaces = (InterfaceInfo[]) { -+ { TYPE_USER_CREATABLE }, -+ { } -+ } -+}; -+ -+static void register_types(void) -+{ -+ type_register_static(&iommufd_backend_info); -+} -+ -+type_init(register_types); -diff --git a/backends/meson.build b/backends/meson.build -index 914c7c4afb..9a5cea480d 100644 ---- a/backends/meson.build -+++ b/backends/meson.build -@@ -20,6 +20,7 @@ if have_vhost_user - system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c')) - endif - system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c')) -+system_ss.add(when: 'CONFIG_IOMMUFD', if_true: files('iommufd.c')) - if have_vhost_user_crypto - system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c')) - endif -diff --git a/backends/trace-events b/backends/trace-events -index 652eb76a57..d45c6e31a6 100644 ---- a/backends/trace-events -+++ b/backends/trace-events -@@ -5,3 +5,13 @@ dbus_vmstate_pre_save(void) - dbus_vmstate_post_load(int version_id) "version_id: %d" - dbus_vmstate_loading(const char *id) "id: %s" - dbus_vmstate_saving(const char *id) "id: %s" -+ -+# iommufd.c -+iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d owned=%d users=%d (%d)" -+iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d" -+iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d" -+iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)" -+iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" -+iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" -+iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)" -+iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)" -diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h -new file mode 100644 -index 0000000000..9c5524b0ed ---- /dev/null -+++ b/include/sysemu/iommufd.h -@@ -0,0 +1,38 @@ -+#ifndef SYSEMU_IOMMUFD_H -+#define SYSEMU_IOMMUFD_H -+ -+#include "qom/object.h" -+#include "qemu/thread.h" -+#include "exec/hwaddr.h" -+#include "exec/cpu-common.h" -+ -+#define TYPE_IOMMUFD_BACKEND "iommufd" -+OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND) -+ -+struct IOMMUFDBackendClass { -+ ObjectClass parent_class; -+}; -+ -+struct IOMMUFDBackend { -+ Object parent; -+ -+ /*< protected >*/ -+ int fd; /* /dev/iommu file descriptor */ -+ bool owned; /* is the /dev/iommu opened internally */ -+ QemuMutex lock; -+ uint32_t users; -+ -+ /*< public >*/ -+}; -+ -+int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp); -+void iommufd_backend_disconnect(IOMMUFDBackend *be); -+ -+int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, -+ Error **errp); -+void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id); -+int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, -+ ram_addr_t size, void *vaddr, bool readonly); -+int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, -+ hwaddr iova, ram_addr_t size); -+#endif -diff --git a/qapi/qom.json b/qapi/qom.json -index c53ef978ff..95516ba325 100644 ---- a/qapi/qom.json -+++ b/qapi/qom.json -@@ -794,6 +794,23 @@ - { 'struct': 'VfioUserServerProperties', - 'data': { 'socket': 'SocketAddress', 'device': 'str' } } - -+## -+# @IOMMUFDProperties: -+# -+# Properties for iommufd objects. -+# -+# @fd: file descriptor name previously passed via 'getfd' command, -+# which represents a pre-opened /dev/iommu. This allows the -+# iommufd object to be shared accross several subsystems -+# (VFIO, VDPA, ...), and the file descriptor to be shared -+# with other process, e.g. DPDK. (default: QEMU opens -+# /dev/iommu by itself) -+# -+# Since: 9.0 -+## -+{ 'struct': 'IOMMUFDProperties', -+ 'data': { '*fd': 'str' } } -+ - ## - # @RngProperties: - # -@@ -934,6 +951,7 @@ - 'input-barrier', - { 'name': 'input-linux', - 'if': 'CONFIG_LINUX' }, -+ 'iommufd', - 'iothread', - 'main-loop', - { 'name': 'memory-backend-epc', -@@ -1003,6 +1021,7 @@ - 'input-barrier': 'InputBarrierProperties', - 'input-linux': { 'type': 'InputLinuxProperties', - 'if': 'CONFIG_LINUX' }, -+ 'iommufd': 'IOMMUFDProperties', - 'iothread': 'IothreadProperties', - 'main-loop': 'MainLoopProperties', - 'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties', -diff --git a/qemu-options.hx b/qemu-options.hx -index 557118cb1f..0814f43066 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -5224,6 +5224,18 @@ SRST - - The ``share`` boolean option is on by default with memfd. - -+ ``-object iommufd,id=id[,fd=fd]`` -+ Creates an iommufd backend which allows control of DMA mapping -+ through the ``/dev/iommu`` device. -+ -+ The ``id`` parameter is a unique ID which frontends (such as -+ vfio-pci of vdpa) will use to connect with the iommufd backend. -+ -+ The ``fd`` parameter is an optional pre-opened file descriptor -+ resulting from ``/dev/iommu`` opening. Usually the iommufd is shared -+ across all subsystems, bringing the benefit of centralized -+ reference counting. -+ - ``-object rng-builtin,id=id`` - Creates a random number generator backend which obtains entropy - from QEMU builtin functions. The ``id`` parameter is a unique ID --- -2.39.3 - diff --git a/SOURCES/kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch b/SOURCES/kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch deleted file mode 100644 index 5ee365b..0000000 --- a/SOURCES/kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch +++ /dev/null @@ -1,47 +0,0 @@ -From da9a24793e876f6f2727d57f939d882be26a47b8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Fri, 22 Dec 2023 08:55:23 +0100 -Subject: [PATCH 064/101] backends/iommufd: Remove check on number of backend - users -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [63/67] ac4d4589d1f2de5ac3f0adfd8d1f27dbf6bbfdee (eauger1/centos-qemu-kvm) - -QOM already has a ref count on objects and it will assert much -earlier, when INT_MAX is reached. - -Reviewed-by: Eric Auger -Reviewed-by: Zhenzhong Duan -Signed-off-by: Cédric Le Goater -(cherry picked from commit c2ab3a6f7411c895e538e8350fee8948ac07c1a0) -Signed-off-by: Eric Auger ---- - backends/iommufd.c | 5 ----- - 1 file changed, 5 deletions(-) - -diff --git a/backends/iommufd.c b/backends/iommufd.c -index ba58a0eb0d..393c0d9a37 100644 ---- a/backends/iommufd.c -+++ b/backends/iommufd.c -@@ -80,11 +80,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) - int fd, ret = 0; - - qemu_mutex_lock(&be->lock); -- if (be->users == UINT32_MAX) { -- error_setg(errp, "too many connections"); -- ret = -E2BIG; -- goto out; -- } - if (be->owned && !be->users) { - fd = qemu_open_old("/dev/iommu", O_RDWR); - if (fd < 0) { --- -2.39.3 - diff --git a/SOURCES/kvm-backends-iommufd-Remove-mutex.patch b/SOURCES/kvm-backends-iommufd-Remove-mutex.patch deleted file mode 100644 index 83878d5..0000000 --- a/SOURCES/kvm-backends-iommufd-Remove-mutex.patch +++ /dev/null @@ -1,112 +0,0 @@ -From 92aff3cc1a412de01e9563802fa48848eae5283f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Thu, 21 Dec 2023 16:58:41 +0100 -Subject: [PATCH 065/101] backends/iommufd: Remove mutex -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [64/67] 65518432b18f18ceadafe1b0698cdaa962e84f61 (eauger1/centos-qemu-kvm) - -Coverity reports a concurrent data access violation because be->users -is being accessed in iommufd_backend_can_be_deleted() without holding -the mutex. - -However, these routines are called from the QEMU main thread when a -device is created. In this case, the code paths should be protected by -the BQL lock and it should be safe to drop the IOMMUFD backend mutex. -Simply remove it. - -Fixes: CID 1531550 -Fixes: CID 1531549 -Reviewed-by: Eric Auger -Reviewed-by: Zhenzhong Duan -Signed-off-by: Cédric Le Goater -(cherry picked from commit 19368b1905b4b917e915526fcbd5bfa3f7439451) -Signed-off-by: Eric Auger ---- - backends/iommufd.c | 7 ------- - include/sysemu/iommufd.h | 2 -- - 2 files changed, 9 deletions(-) - -diff --git a/backends/iommufd.c b/backends/iommufd.c -index 393c0d9a37..1ef683c7b0 100644 ---- a/backends/iommufd.c -+++ b/backends/iommufd.c -@@ -29,7 +29,6 @@ static void iommufd_backend_init(Object *obj) - be->fd = -1; - be->users = 0; - be->owned = true; -- qemu_mutex_init(&be->lock); - } - - static void iommufd_backend_finalize(Object *obj) -@@ -52,10 +51,8 @@ static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp) - error_prepend(errp, "Could not parse remote object fd %s:", str); - return; - } -- qemu_mutex_lock(&be->lock); - be->fd = fd; - be->owned = false; -- qemu_mutex_unlock(&be->lock); - trace_iommu_backend_set_fd(be->fd); - } - -@@ -79,7 +76,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) - { - int fd, ret = 0; - -- qemu_mutex_lock(&be->lock); - if (be->owned && !be->users) { - fd = qemu_open_old("/dev/iommu", O_RDWR); - if (fd < 0) { -@@ -93,13 +89,11 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) - out: - trace_iommufd_backend_connect(be->fd, be->owned, - be->users, ret); -- qemu_mutex_unlock(&be->lock); - return ret; - } - - void iommufd_backend_disconnect(IOMMUFDBackend *be) - { -- qemu_mutex_lock(&be->lock); - if (!be->users) { - goto out; - } -@@ -110,7 +104,6 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be) - } - out: - trace_iommufd_backend_disconnect(be->fd, be->users); -- qemu_mutex_unlock(&be->lock); - } - - int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, -diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h -index 9c5524b0ed..9af27ebd6c 100644 ---- a/include/sysemu/iommufd.h -+++ b/include/sysemu/iommufd.h -@@ -2,7 +2,6 @@ - #define SYSEMU_IOMMUFD_H - - #include "qom/object.h" --#include "qemu/thread.h" - #include "exec/hwaddr.h" - #include "exec/cpu-common.h" - -@@ -19,7 +18,6 @@ struct IOMMUFDBackend { - /*< protected >*/ - int fd; /* /dev/iommu file descriptor */ - bool owned; /* is the /dev/iommu opened internally */ -- QemuMutex lock; - uint32_t users; - - /*< public >*/ --- -2.39.3 - diff --git a/SOURCES/kvm-block-Parse-filenames-only-when-explicitly-requested.patch b/SOURCES/kvm-block-Parse-filenames-only-when-explicitly-requested.patch index 5ea7203..8db6199 100644 --- a/SOURCES/kvm-block-Parse-filenames-only-when-explicitly-requested.patch +++ b/SOURCES/kvm-block-Parse-filenames-only-when-explicitly-requested.patch @@ -1,60 +1,52 @@ -From 5c35b7d631e9cdf75512b9e1a0b5d48e8fd768d9 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Wed, 5 Jun 2024 19:56:51 -0400 +From 2ee645a339e9ef9cd92620a8b784d18d512326be Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 25 Apr 2024 14:56:02 +0200 Subject: [PATCH 4/4] block: Parse filenames only when explicitly requested -RH-Author: Jon Maloy -RH-MergeRequest: 2: EMBARGOED CVE-2024-4467 for rhel-9.4.z (PRDSC) -RH-Jira: https://issues.redhat.com/browse/RHEL-35610 +RH-Author: Hana Czenczek +RH-MergeRequest: 1: CVE 2024-4467 (PRDSC) +RH-Jira: RHEL-35611 RH-CVE: CVE-2024-4467 RH-Acked-by: Kevin Wolf RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [4/4] 6f71e6a07bd5a9f8352db920f498f5fa5a2cdbfb +RH-Acked-by: Eric Blake +RH-Commit: [4/4] f44c2941d4419e60f16dea3e9adca164e75aa78d -commit f44c2941d4419e60f16dea3e9adca164e75aa78d (origin/cve-2024-4467-hreitz-rhel-9.5.0) -Author: Kevin Wolf -Date: Thu Apr 25 14:56:02 2024 +0200 +When handling image filenames from legacy options such as -drive or from +tools, these filenames are parsed for protocol prefixes, including for +the json:{} pseudo-protocol. - block: Parse filenames only when explicitly requested +This behaviour is intended for filenames that come directly from the +command line and for backing files, which may come from the image file +itself. Higher level management tools generally take care to verify that +untrusted images don't contain a bad (or any) backing file reference; +'qemu-img info' is a suitable tool for this. - When handling image filenames from legacy options such as -drive or from - tools, these filenames are parsed for protocol prefixes, including for - the json:{} pseudo-protocol. +However, for other files that can be referenced in images, such as +qcow2 data files or VMDK extents, the string from the image file is +usually not verified by management tools - and 'qemu-img info' wouldn't +be suitable because in contrast to backing files, it already opens these +other referenced files. So here the string should be interpreted as a +literal local filename. More complex configurations need to be specified +explicitly on the command line or in QMP. - This behaviour is intended for filenames that come directly from the - command line and for backing files, which may come from the image file - itself. Higher level management tools generally take care to verify that - untrusted images don't contain a bad (or any) backing file reference; - 'qemu-img info' is a suitable tool for this. +This patch changes bdrv_open_inherit() so that it only parses filenames +if a new parameter parse_filename is true. It is set for the top level +in bdrv_open(), for the file child and for the backing file child. All +other callers pass false and disable filename parsing this way. - However, for other files that can be referenced in images, such as - qcow2 data files or VMDK extents, the string from the image file is - usually not verified by management tools - and 'qemu-img info' wouldn't - be suitable because in contrast to backing files, it already opens these - other referenced files. So here the string should be interpreted as a - literal local filename. More complex configurations need to be specified - explicitly on the command line or in QMP. - - This patch changes bdrv_open_inherit() so that it only parses filenames - if a new parameter parse_filename is true. It is set for the top level - in bdrv_open(), for the file child and for the backing file child. All - other callers pass false and disable filename parsing this way. - - Signed-off-by: Kevin Wolf - Reviewed-by: Eric Blake - Reviewed-by: Stefan Hajnoczi - Reviewed-by: Hanna Czenczek - Upstream: N/A, embargoed - Signed-off-by: Hanna Czenczek - -Signed-off-by: Jon Maloy +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Hanna Czenczek +Upstream: N/A, embargoed +Signed-off-by: Hanna Czenczek --- block.c | 90 ++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 57 insertions(+), 33 deletions(-) diff --git a/block.c b/block.c -index a097772238..8b6aa4a65c 100644 +index 468cf5e67d..50bdd197b7 100644 --- a/block.c +++ b/block.c @@ -86,6 +86,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, @@ -65,7 +57,7 @@ index a097772238..8b6aa4a65c 100644 Error **errp); static bool bdrv_recurse_has_child(BlockDriverState *bs, -@@ -2035,7 +2036,8 @@ static void parse_json_protocol(QDict *options, const char **pfilename, +@@ -2058,7 +2059,8 @@ static void parse_json_protocol(QDict *options, const char **pfilename, * block driver has been specified explicitly. */ static int bdrv_fill_options(QDict **options, const char *filename, @@ -75,7 +67,7 @@ index a097772238..8b6aa4a65c 100644 { const char *drvname; bool protocol = *flags & BDRV_O_PROTOCOL; -@@ -2077,7 +2079,7 @@ static int bdrv_fill_options(QDict **options, const char *filename, +@@ -2100,7 +2102,7 @@ static int bdrv_fill_options(QDict **options, const char *filename, if (protocol && filename) { if (!qdict_haskey(*options, "filename")) { qdict_put_str(*options, "filename", filename); @@ -84,7 +76,7 @@ index a097772238..8b6aa4a65c 100644 } else { error_setg(errp, "Can't specify 'file' and 'filename' options at " "the same time"); -@@ -3639,7 +3641,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, +@@ -3663,7 +3665,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, } backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs, @@ -94,7 +86,7 @@ index a097772238..8b6aa4a65c 100644 if (!backing_hd) { bs->open_flags |= BDRV_O_NO_BACKING; error_prepend(errp, "Could not open backing file: "); -@@ -3673,7 +3676,8 @@ free_exit: +@@ -3697,7 +3700,8 @@ free_exit: static BlockDriverState * bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, BlockDriverState *parent, const BdrvChildClass *child_class, @@ -104,7 +96,7 @@ index a097772238..8b6aa4a65c 100644 { BlockDriverState *bs = NULL; QDict *image_options; -@@ -3704,7 +3708,8 @@ bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, +@@ -3728,7 +3732,8 @@ bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, } bs = bdrv_open_inherit(filename, reference, image_options, 0, @@ -114,7 +106,7 @@ index a097772238..8b6aa4a65c 100644 if (!bs) { goto done; } -@@ -3714,6 +3719,33 @@ done: +@@ -3738,6 +3743,33 @@ done: return bs; } @@ -148,7 +140,7 @@ index a097772238..8b6aa4a65c 100644 /* * Opens a disk image whose options are given as BlockdevRef in another block * device's options. -@@ -3737,27 +3769,15 @@ BdrvChild *bdrv_open_child(const char *filename, +@@ -3761,27 +3793,15 @@ BdrvChild *bdrv_open_child(const char *filename, BdrvChildRole child_role, bool allow_none, Error **errp) { @@ -182,7 +174,7 @@ index a097772238..8b6aa4a65c 100644 * * @parent can move to a different AioContext in this function. */ -@@ -3772,8 +3792,8 @@ int bdrv_open_file_child(const char *filename, +@@ -3796,8 +3816,8 @@ int bdrv_open_file_child(const char *filename, role = parent->drv->is_filter ? (BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY) : BDRV_CHILD_IMAGE; @@ -193,7 +185,7 @@ index a097772238..8b6aa4a65c 100644 { return -EINVAL; } -@@ -3818,7 +3838,8 @@ BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp) +@@ -3842,7 +3862,8 @@ BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp) } @@ -203,7 +195,7 @@ index a097772238..8b6aa4a65c 100644 obj = NULL; qobject_unref(obj); visit_free(v); -@@ -3907,7 +3928,7 @@ static BlockDriverState * no_coroutine_fn +@@ -3932,7 +3953,7 @@ static BlockDriverState * no_coroutine_fn bdrv_open_inherit(const char *filename, const char *reference, QDict *options, int flags, BlockDriverState *parent, const BdrvChildClass *child_class, BdrvChildRole child_role, @@ -212,7 +204,7 @@ index a097772238..8b6aa4a65c 100644 { int ret; BlockBackend *file = NULL; -@@ -3955,9 +3976,11 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, +@@ -3980,9 +4001,11 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, } /* json: syntax counts as explicit options, as if in the QDict */ @@ -227,7 +219,7 @@ index a097772238..8b6aa4a65c 100644 } bs->explicit_options = qdict_clone_shallow(options); -@@ -3982,7 +4005,8 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, +@@ -4007,7 +4030,8 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, parent->open_flags, parent->options); } @@ -237,7 +229,7 @@ index a097772238..8b6aa4a65c 100644 if (ret < 0) { goto fail; } -@@ -4051,7 +4075,7 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, +@@ -4076,7 +4100,7 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, file_bs = bdrv_open_child_bs(filename, options, "file", bs, &child_of_bds, BDRV_CHILD_IMAGE, @@ -246,7 +238,7 @@ index a097772238..8b6aa4a65c 100644 if (local_err) { goto fail; } -@@ -4200,7 +4224,7 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference, +@@ -4225,7 +4249,7 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference, GLOBAL_STATE_CODE(); return bdrv_open_inherit(filename, reference, options, flags, NULL, diff --git a/SOURCES/kvm-block-backend-Allow-concurrent-context-changes.patch b/SOURCES/kvm-block-backend-Allow-concurrent-context-changes.patch deleted file mode 100644 index 155fa19..0000000 --- a/SOURCES/kvm-block-backend-Allow-concurrent-context-changes.patch +++ /dev/null @@ -1,104 +0,0 @@ -From afa842e9fdf6e1d6e5d5785679a22779632142bd Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Fri, 2 Feb 2024 15:47:54 +0100 -Subject: [PATCH 03/22] block-backend: Allow concurrent context changes - -RH-Author: Hanna Czenczek -RH-MergeRequest: 222: Allow concurrent BlockBackend context changes -RH-Jira: RHEL-24593 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Commit: [1/2] 9e1b535f60f7afa94a0817dc3e71136e41631c71 (hreitz/qemu-kvm-c-9-s) - -Since AioContext locks have been removed, a BlockBackend's AioContext -may really change at any time (only exception is that it is often -confined to a drained section, as noted in this patch). Therefore, -blk_get_aio_context() cannot rely on its root node's context always -matching that of the BlockBackend. - -In practice, whether they match does not matter anymore anyway: Requests -can be sent to BDSs from any context, so anyone who requests the BB's -context should have no reason to require the root node to have the same -context. Therefore, we can and should remove the assertion to that -effect. - -In addition, because the context can be set and queried from different -threads concurrently, it has to be accessed with atomic operations. - -Buglink: https://issues.redhat.com/browse/RHEL-19381 -Suggested-by: Kevin Wolf -Signed-off-by: Hanna Czenczek -Message-ID: <20240202144755.671354-2-hreitz@redhat.com> -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit ad893672027ffe26db498947d70cde6d4f58a111) ---- - block/block-backend.c | 22 +++++++++++----------- - 1 file changed, 11 insertions(+), 11 deletions(-) - -diff --git a/block/block-backend.c b/block/block-backend.c -index 209eb07528..9c4de79e6b 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -44,7 +44,7 @@ struct BlockBackend { - char *name; - int refcnt; - BdrvChild *root; -- AioContext *ctx; -+ AioContext *ctx; /* access with atomic operations only */ - DriveInfo *legacy_dinfo; /* null unless created by drive_new() */ - QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */ - QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */ -@@ -2414,22 +2414,22 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason) - } - } - -+/** -+ * Return BB's current AioContext. Note that this context may change -+ * concurrently at any time, with one exception: If the BB has a root node -+ * attached, its context will only change through bdrv_try_change_aio_context(), -+ * which creates a drained section. Therefore, incrementing such a BB's -+ * in-flight counter will prevent its context from changing. -+ */ - AioContext *blk_get_aio_context(BlockBackend *blk) - { -- BlockDriverState *bs; - IO_CODE(); - - if (!blk) { - return qemu_get_aio_context(); - } - -- bs = blk_bs(blk); -- if (bs) { -- AioContext *ctx = bdrv_get_aio_context(blk_bs(blk)); -- assert(ctx == blk->ctx); -- } -- -- return blk->ctx; -+ return qatomic_read(&blk->ctx); - } - - int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, -@@ -2442,7 +2442,7 @@ int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, - GLOBAL_STATE_CODE(); - - if (!bs) { -- blk->ctx = new_context; -+ qatomic_set(&blk->ctx, new_context); - return 0; - } - -@@ -2471,7 +2471,7 @@ static void blk_root_set_aio_ctx_commit(void *opaque) - AioContext *new_context = s->new_ctx; - ThrottleGroupMember *tgm = &blk->public.throttle_group_member; - -- blk->ctx = new_context; -+ qatomic_set(&blk->ctx, new_context); - if (tgm->throttle_state) { - throttle_group_detach_aio_context(tgm); - throttle_group_attach_aio_context(tgm, new_context); --- -2.39.3 - diff --git a/SOURCES/kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch b/SOURCES/kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch deleted file mode 100644 index df764fb..0000000 --- a/SOURCES/kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch +++ /dev/null @@ -1,69 +0,0 @@ -From b1a68aebadecd7d339cf5eaffeda15099c998528 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 12 Sep 2023 19:10:37 -0400 -Subject: [PATCH 095/101] block-coroutine-wrapper: use - qemu_get_current_aio_context() - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [26/26] cde767bcdc626e90721792e3889952057a548ac5 (kmwolf/centos-qemu-kvm) - -Use qemu_get_current_aio_context() in mixed wrappers and coroutine -wrappers so that code runs in the caller's AioContext instead of moving -to the BlockDriverState's AioContext. This change is necessary for the -multi-queue block layer where any thread can call into the block layer. - -Most wrappers are IO_CODE where it's safe to use the current AioContext -nowadays. BlockDrivers and the core block layer use their own locks and -no longer depend on the AioContext lock for thread-safety. - -The bdrv_create() wrapper invokes GLOBAL_STATE code. Using the current -AioContext is safe because this code is only called with the BQL held -from the main loop thread. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20230912231037.826804-6-stefanha@redhat.com> -Reviewed-by: Eric Blake -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - scripts/block-coroutine-wrapper.py | 6 ++---- - 1 file changed, 2 insertions(+), 4 deletions(-) - -diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py -index c9c09fcacd..dbbde99e39 100644 ---- a/scripts/block-coroutine-wrapper.py -+++ b/scripts/block-coroutine-wrapper.py -@@ -92,8 +92,6 @@ def __init__(self, wrapper_type: str, return_type: str, name: str, - f"{self.name}") - self.target_name = f'{subsystem}_{subname}' - -- self.ctx = self.gen_ctx() -- - self.get_result = 's->ret = ' - self.ret = 'return s.ret;' - self.co_ret = 'return ' -@@ -167,7 +165,7 @@ def create_mixed_wrapper(func: FuncDecl) -> str: - {func.co_ret}{name}({ func.gen_list('{name}') }); - }} else {{ - {struct_name} s = {{ -- .poll_state.ctx = {func.ctx}, -+ .poll_state.ctx = qemu_get_current_aio_context(), - .poll_state.in_progress = true, - - { func.gen_block(' .{name} = {name},') } -@@ -191,7 +189,7 @@ def create_co_wrapper(func: FuncDecl) -> str: - {func.return_type} {func.name}({ func.gen_list('{decl}') }) - {{ - {struct_name} s = {{ -- .poll_state.ctx = {func.ctx}, -+ .poll_state.ctx = qemu_get_current_aio_context(), - .poll_state.in_progress = true, - - { func.gen_block(' .{name} = {name},') } --- -2.39.3 - diff --git a/SOURCES/kvm-block-crypto-create-ciphers-on-demand.patch b/SOURCES/kvm-block-crypto-create-ciphers-on-demand.patch new file mode 100644 index 0000000..c2b9c47 --- /dev/null +++ b/SOURCES/kvm-block-crypto-create-ciphers-on-demand.patch @@ -0,0 +1,330 @@ +From a67edfb4b591acdffc5b4987601a30224376996f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 27 May 2024 11:58:50 -0400 +Subject: [PATCH 4/5] block/crypto: create ciphers on demand +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 251: block/crypto: create ciphers on demand +RH-Jira: RHEL-36159 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/2] 22a4c87fef774cad98a6f5a79f27df50a208013d (stefanha/centos-stream-qemu-kvm) + +Ciphers are pre-allocated by qcrypto_block_init_cipher() depending on +the given number of threads. The -device +virtio-blk-pci,iothread-vq-mapping= feature allows users to assign +multiple IOThreads to a virtio-blk device, but the association between +the virtio-blk device and the block driver happens after the block +driver is already open. + +When the number of threads given to qcrypto_block_init_cipher() is +smaller than the actual number of threads at runtime, the +block->n_free_ciphers > 0 assertion in qcrypto_block_pop_cipher() can +fail. + +Get rid of qcrypto_block_init_cipher() n_thread's argument and allocate +ciphers on demand. + +Reported-by: Qing Wang +Buglink: https://issues.redhat.com/browse/RHEL-36159 +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240527155851.892885-2-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Acked-by: Daniel P. Berrangé +Signed-off-by: Kevin Wolf +(cherry picked from commit af206c284e4c1b17cdfb0f17e898b288c0fc1751) +Signed-off-by: Stefan Hajnoczi +--- + crypto/block-luks.c | 3 +- + crypto/block-qcow.c | 2 +- + crypto/block.c | 111 ++++++++++++++++++++++++++------------------ + crypto/blockpriv.h | 12 +++-- + 4 files changed, 78 insertions(+), 50 deletions(-) + +diff --git a/crypto/block-luks.c b/crypto/block-luks.c +index 3ee928fb5a..3357852c0a 100644 +--- a/crypto/block-luks.c ++++ b/crypto/block-luks.c +@@ -1262,7 +1262,6 @@ qcrypto_block_luks_open(QCryptoBlock *block, + luks->cipher_mode, + masterkey, + luks->header.master_key_len, +- n_threads, + errp) < 0) { + goto fail; + } +@@ -1456,7 +1455,7 @@ qcrypto_block_luks_create(QCryptoBlock *block, + /* Setup the block device payload encryption objects */ + if (qcrypto_block_init_cipher(block, luks_opts.cipher_alg, + luks_opts.cipher_mode, masterkey, +- luks->header.master_key_len, 1, errp) < 0) { ++ luks->header.master_key_len, errp) < 0) { + goto error; + } + +diff --git a/crypto/block-qcow.c b/crypto/block-qcow.c +index 4d7cf36a8f..02305058e3 100644 +--- a/crypto/block-qcow.c ++++ b/crypto/block-qcow.c +@@ -75,7 +75,7 @@ qcrypto_block_qcow_init(QCryptoBlock *block, + ret = qcrypto_block_init_cipher(block, QCRYPTO_CIPHER_ALG_AES_128, + QCRYPTO_CIPHER_MODE_CBC, + keybuf, G_N_ELEMENTS(keybuf), +- n_threads, errp); ++ errp); + if (ret < 0) { + ret = -ENOTSUP; + goto fail; +diff --git a/crypto/block.c b/crypto/block.c +index 506ea1d1a3..ba6d1cebc7 100644 +--- a/crypto/block.c ++++ b/crypto/block.c +@@ -20,6 +20,7 @@ + + #include "qemu/osdep.h" + #include "qapi/error.h" ++#include "qemu/lockable.h" + #include "blockpriv.h" + #include "block-qcow.h" + #include "block-luks.h" +@@ -57,6 +58,8 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options, + { + QCryptoBlock *block = g_new0(QCryptoBlock, 1); + ++ qemu_mutex_init(&block->mutex); ++ + block->format = options->format; + + if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) || +@@ -76,8 +79,6 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options, + return NULL; + } + +- qemu_mutex_init(&block->mutex); +- + return block; + } + +@@ -92,6 +93,8 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options, + { + QCryptoBlock *block = g_new0(QCryptoBlock, 1); + ++ qemu_mutex_init(&block->mutex); ++ + block->format = options->format; + + if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) || +@@ -111,8 +114,6 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options, + return NULL; + } + +- qemu_mutex_init(&block->mutex); +- + return block; + } + +@@ -227,37 +228,42 @@ QCryptoCipher *qcrypto_block_get_cipher(QCryptoBlock *block) + * This function is used only in test with one thread (it's safe to skip + * pop/push interface), so it's enough to assert it here: + */ +- assert(block->n_ciphers <= 1); +- return block->ciphers ? block->ciphers[0] : NULL; ++ assert(block->max_free_ciphers <= 1); ++ return block->free_ciphers ? block->free_ciphers[0] : NULL; + } + + +-static QCryptoCipher *qcrypto_block_pop_cipher(QCryptoBlock *block) ++static QCryptoCipher *qcrypto_block_pop_cipher(QCryptoBlock *block, ++ Error **errp) + { +- QCryptoCipher *cipher; +- +- qemu_mutex_lock(&block->mutex); +- +- assert(block->n_free_ciphers > 0); +- block->n_free_ciphers--; +- cipher = block->ciphers[block->n_free_ciphers]; +- +- qemu_mutex_unlock(&block->mutex); ++ /* Usually there is a free cipher available */ ++ WITH_QEMU_LOCK_GUARD(&block->mutex) { ++ if (block->n_free_ciphers > 0) { ++ block->n_free_ciphers--; ++ return block->free_ciphers[block->n_free_ciphers]; ++ } ++ } + +- return cipher; ++ /* Otherwise allocate a new cipher */ ++ return qcrypto_cipher_new(block->alg, block->mode, block->key, ++ block->nkey, errp); + } + + + static void qcrypto_block_push_cipher(QCryptoBlock *block, + QCryptoCipher *cipher) + { +- qemu_mutex_lock(&block->mutex); ++ QEMU_LOCK_GUARD(&block->mutex); + +- assert(block->n_free_ciphers < block->n_ciphers); +- block->ciphers[block->n_free_ciphers] = cipher; +- block->n_free_ciphers++; ++ if (block->n_free_ciphers == block->max_free_ciphers) { ++ block->max_free_ciphers++; ++ block->free_ciphers = g_renew(QCryptoCipher *, ++ block->free_ciphers, ++ block->max_free_ciphers); ++ } + +- qemu_mutex_unlock(&block->mutex); ++ block->free_ciphers[block->n_free_ciphers] = cipher; ++ block->n_free_ciphers++; + } + + +@@ -265,24 +271,31 @@ int qcrypto_block_init_cipher(QCryptoBlock *block, + QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode, + const uint8_t *key, size_t nkey, +- size_t n_threads, Error **errp) ++ Error **errp) + { +- size_t i; ++ QCryptoCipher *cipher; + +- assert(!block->ciphers && !block->n_ciphers && !block->n_free_ciphers); ++ assert(!block->free_ciphers && !block->max_free_ciphers && ++ !block->n_free_ciphers); + +- block->ciphers = g_new0(QCryptoCipher *, n_threads); ++ /* Stash away cipher parameters for qcrypto_block_pop_cipher() */ ++ block->alg = alg; ++ block->mode = mode; ++ block->key = g_memdup2(key, nkey); ++ block->nkey = nkey; + +- for (i = 0; i < n_threads; i++) { +- block->ciphers[i] = qcrypto_cipher_new(alg, mode, key, nkey, errp); +- if (!block->ciphers[i]) { +- qcrypto_block_free_cipher(block); +- return -1; +- } +- block->n_ciphers++; +- block->n_free_ciphers++; ++ /* ++ * Create a new cipher to validate the parameters now. This reduces the ++ * chance of cipher creation failing at I/O time. ++ */ ++ cipher = qcrypto_block_pop_cipher(block, errp); ++ if (!cipher) { ++ g_free(block->key); ++ block->key = NULL; ++ return -1; + } + ++ qcrypto_block_push_cipher(block, cipher); + return 0; + } + +@@ -291,19 +304,23 @@ void qcrypto_block_free_cipher(QCryptoBlock *block) + { + size_t i; + +- if (!block->ciphers) { ++ g_free(block->key); ++ block->key = NULL; ++ ++ if (!block->free_ciphers) { + return; + } + +- assert(block->n_ciphers == block->n_free_ciphers); ++ /* All popped ciphers were eventually pushed back */ ++ assert(block->n_free_ciphers == block->max_free_ciphers); + +- for (i = 0; i < block->n_ciphers; i++) { +- qcrypto_cipher_free(block->ciphers[i]); ++ for (i = 0; i < block->max_free_ciphers; i++) { ++ qcrypto_cipher_free(block->free_ciphers[i]); + } + +- g_free(block->ciphers); +- block->ciphers = NULL; +- block->n_ciphers = block->n_free_ciphers = 0; ++ g_free(block->free_ciphers); ++ block->free_ciphers = NULL; ++ block->max_free_ciphers = block->n_free_ciphers = 0; + } + + QCryptoIVGen *qcrypto_block_get_ivgen(QCryptoBlock *block) +@@ -311,7 +328,7 @@ QCryptoIVGen *qcrypto_block_get_ivgen(QCryptoBlock *block) + /* ivgen should be accessed under mutex. However, this function is used only + * in test with one thread, so it's enough to assert it here: + */ +- assert(block->n_ciphers <= 1); ++ assert(block->max_free_ciphers <= 1); + return block->ivgen; + } + +@@ -446,7 +463,10 @@ int qcrypto_block_decrypt_helper(QCryptoBlock *block, + Error **errp) + { + int ret; +- QCryptoCipher *cipher = qcrypto_block_pop_cipher(block); ++ QCryptoCipher *cipher = qcrypto_block_pop_cipher(block, errp); ++ if (!cipher) { ++ return -1; ++ } + + ret = do_qcrypto_block_cipher_encdec(cipher, block->niv, block->ivgen, + &block->mutex, sectorsize, offset, buf, +@@ -465,7 +485,10 @@ int qcrypto_block_encrypt_helper(QCryptoBlock *block, + Error **errp) + { + int ret; +- QCryptoCipher *cipher = qcrypto_block_pop_cipher(block); ++ QCryptoCipher *cipher = qcrypto_block_pop_cipher(block, errp); ++ if (!cipher) { ++ return -1; ++ } + + ret = do_qcrypto_block_cipher_encdec(cipher, block->niv, block->ivgen, + &block->mutex, sectorsize, offset, buf, +diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h +index 836f3b4726..4bf6043d5d 100644 +--- a/crypto/blockpriv.h ++++ b/crypto/blockpriv.h +@@ -32,8 +32,14 @@ struct QCryptoBlock { + const QCryptoBlockDriver *driver; + void *opaque; + +- QCryptoCipher **ciphers; +- size_t n_ciphers; ++ /* Cipher parameters */ ++ QCryptoCipherAlgorithm alg; ++ QCryptoCipherMode mode; ++ uint8_t *key; ++ size_t nkey; ++ ++ QCryptoCipher **free_ciphers; ++ size_t max_free_ciphers; + size_t n_free_ciphers; + QCryptoIVGen *ivgen; + QemuMutex mutex; +@@ -130,7 +136,7 @@ int qcrypto_block_init_cipher(QCryptoBlock *block, + QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode, + const uint8_t *key, size_t nkey, +- size_t n_threads, Error **errp); ++ Error **errp); + + void qcrypto_block_free_cipher(QCryptoBlock *block); + +-- +2.39.3 + diff --git a/SOURCES/kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch b/SOURCES/kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch deleted file mode 100644 index 1783a64..0000000 --- a/SOURCES/kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch +++ /dev/null @@ -1,217 +0,0 @@ -From 25cce5df341861e8ba8ec57722558e2dee3ce56a Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 14 Sep 2023 10:00:58 -0400 -Subject: [PATCH 073/101] block/file-posix: set up Linux AIO and io_uring in - the current thread - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [4/26] 74c7daf805daefe706378308c3afeb28d861164b (kmwolf/centos-qemu-kvm) - -The file-posix block driver currently only sets up Linux AIO and -io_uring in the BDS's AioContext. In the multi-queue block layer we must -be able to submit I/O requests in AioContexts that do not have Linux AIO -and io_uring set up yet since any thread can call into the block driver. - -Set up Linux AIO and io_uring for the current AioContext during request -submission. We lose the ability to return an error from -.bdrv_file_open() when Linux AIO and io_uring setup fails (e.g. due to -resource limits). Instead the user only gets warnings and we fall back -to aio=threads. This is still better than a fatal error after startup. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20230914140101.1065008-2-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf ---- - block/file-posix.c | 103 ++++++++++++++++++++++----------------------- - 1 file changed, 51 insertions(+), 52 deletions(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index b862406c71..35684f7e21 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -712,17 +712,11 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, - - #ifdef CONFIG_LINUX_AIO - /* Currently Linux does AIO only for files opened with O_DIRECT */ -- if (s->use_linux_aio) { -- if (!(s->open_flags & O_DIRECT)) { -- error_setg(errp, "aio=native was specified, but it requires " -- "cache.direct=on, which was not specified."); -- ret = -EINVAL; -- goto fail; -- } -- if (!aio_setup_linux_aio(bdrv_get_aio_context(bs), errp)) { -- error_prepend(errp, "Unable to use native AIO: "); -- goto fail; -- } -+ if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) { -+ error_setg(errp, "aio=native was specified, but it requires " -+ "cache.direct=on, which was not specified."); -+ ret = -EINVAL; -+ goto fail; - } - #else - if (s->use_linux_aio) { -@@ -733,14 +727,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, - } - #endif /* !defined(CONFIG_LINUX_AIO) */ - --#ifdef CONFIG_LINUX_IO_URING -- if (s->use_linux_io_uring) { -- if (!aio_setup_linux_io_uring(bdrv_get_aio_context(bs), errp)) { -- error_prepend(errp, "Unable to use io_uring: "); -- goto fail; -- } -- } --#else -+#ifndef CONFIG_LINUX_IO_URING - if (s->use_linux_io_uring) { - error_setg(errp, "aio=io_uring was specified, but is not supported " - "in this build."); -@@ -2444,6 +2431,48 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) - return true; - } - -+#ifdef CONFIG_LINUX_IO_URING -+static inline bool raw_check_linux_io_uring(BDRVRawState *s) -+{ -+ Error *local_err = NULL; -+ AioContext *ctx; -+ -+ if (!s->use_linux_io_uring) { -+ return false; -+ } -+ -+ ctx = qemu_get_current_aio_context(); -+ if (unlikely(!aio_setup_linux_io_uring(ctx, &local_err))) { -+ error_reportf_err(local_err, "Unable to use linux io_uring, " -+ "falling back to thread pool: "); -+ s->use_linux_io_uring = false; -+ return false; -+ } -+ return true; -+} -+#endif -+ -+#ifdef CONFIG_LINUX_AIO -+static inline bool raw_check_linux_aio(BDRVRawState *s) -+{ -+ Error *local_err = NULL; -+ AioContext *ctx; -+ -+ if (!s->use_linux_aio) { -+ return false; -+ } -+ -+ ctx = qemu_get_current_aio_context(); -+ if (unlikely(!aio_setup_linux_aio(ctx, &local_err))) { -+ error_reportf_err(local_err, "Unable to use Linux AIO, " -+ "falling back to thread pool: "); -+ s->use_linux_aio = false; -+ return false; -+ } -+ return true; -+} -+#endif -+ - static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, - uint64_t bytes, QEMUIOVector *qiov, int type) - { -@@ -2474,13 +2503,13 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, - if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) { - type |= QEMU_AIO_MISALIGNED; - #ifdef CONFIG_LINUX_IO_URING -- } else if (s->use_linux_io_uring) { -+ } else if (raw_check_linux_io_uring(s)) { - assert(qiov->size == bytes); - ret = luring_co_submit(bs, s->fd, offset, qiov, type); - goto out; - #endif - #ifdef CONFIG_LINUX_AIO -- } else if (s->use_linux_aio) { -+ } else if (raw_check_linux_aio(s)) { - assert(qiov->size == bytes); - ret = laio_co_submit(s->fd, offset, qiov, type, - s->aio_max_batch); -@@ -2567,39 +2596,13 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs) - }; - - #ifdef CONFIG_LINUX_IO_URING -- if (s->use_linux_io_uring) { -+ if (raw_check_linux_io_uring(s)) { - return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH); - } - #endif - return raw_thread_pool_submit(handle_aiocb_flush, &acb); - } - --static void raw_aio_attach_aio_context(BlockDriverState *bs, -- AioContext *new_context) --{ -- BDRVRawState __attribute__((unused)) *s = bs->opaque; --#ifdef CONFIG_LINUX_AIO -- if (s->use_linux_aio) { -- Error *local_err = NULL; -- if (!aio_setup_linux_aio(new_context, &local_err)) { -- error_reportf_err(local_err, "Unable to use native AIO, " -- "falling back to thread pool: "); -- s->use_linux_aio = false; -- } -- } --#endif --#ifdef CONFIG_LINUX_IO_URING -- if (s->use_linux_io_uring) { -- Error *local_err = NULL; -- if (!aio_setup_linux_io_uring(new_context, &local_err)) { -- error_reportf_err(local_err, "Unable to use linux io_uring, " -- "falling back to thread pool: "); -- s->use_linux_io_uring = false; -- } -- } --#endif --} -- - static void raw_close(BlockDriverState *bs) - { - BDRVRawState *s = bs->opaque; -@@ -3896,7 +3899,6 @@ BlockDriver bdrv_file = { - .bdrv_co_copy_range_from = raw_co_copy_range_from, - .bdrv_co_copy_range_to = raw_co_copy_range_to, - .bdrv_refresh_limits = raw_refresh_limits, -- .bdrv_attach_aio_context = raw_aio_attach_aio_context, - - .bdrv_co_truncate = raw_co_truncate, - .bdrv_co_getlength = raw_co_getlength, -@@ -4266,7 +4268,6 @@ static BlockDriver bdrv_host_device = { - .bdrv_co_copy_range_from = raw_co_copy_range_from, - .bdrv_co_copy_range_to = raw_co_copy_range_to, - .bdrv_refresh_limits = raw_refresh_limits, -- .bdrv_attach_aio_context = raw_aio_attach_aio_context, - - .bdrv_co_truncate = raw_co_truncate, - .bdrv_co_getlength = raw_co_getlength, -@@ -4402,7 +4403,6 @@ static BlockDriver bdrv_host_cdrom = { - .bdrv_co_pwritev = raw_co_pwritev, - .bdrv_co_flush_to_disk = raw_co_flush_to_disk, - .bdrv_refresh_limits = cdrom_refresh_limits, -- .bdrv_attach_aio_context = raw_aio_attach_aio_context, - - .bdrv_co_truncate = raw_co_truncate, - .bdrv_co_getlength = raw_co_getlength, -@@ -4528,7 +4528,6 @@ static BlockDriver bdrv_host_cdrom = { - .bdrv_co_pwritev = raw_co_pwritev, - .bdrv_co_flush_to_disk = raw_co_flush_to_disk, - .bdrv_refresh_limits = cdrom_refresh_limits, -- .bdrv_attach_aio_context = raw_aio_attach_aio_context, - - .bdrv_co_truncate = raw_co_truncate, - .bdrv_co_getlength = raw_co_getlength, --- -2.39.3 - diff --git a/SOURCES/kvm-block-remove-AioContext-locking.patch b/SOURCES/kvm-block-remove-AioContext-locking.patch deleted file mode 100644 index 5bcd859..0000000 --- a/SOURCES/kvm-block-remove-AioContext-locking.patch +++ /dev/null @@ -1,4438 +0,0 @@ -From df1400991580e8a60d711079865b56ed95830b28 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:03 -0500 -Subject: [PATCH 086/101] block: remove AioContext locking - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [17/26] b29c3ac7ea91ca356335ba047c66187317c482f9 (kmwolf/centos-qemu-kvm) - -This is the big patch that removes -aio_context_acquire()/aio_context_release() from the block layer and -affected block layer users. - -There isn't a clean way to split this patch and the reviewers are likely -the same group of people, so I decided to do it in one patch. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Reviewed-by: Kevin Wolf -Reviewed-by: Paul Durrant -Message-ID: <20231205182011.1976568-7-stefanha@redhat.com> -Signed-off-by: Kevin Wolf ---- - block.c | 234 +--------------------- - block/block-backend.c | 14 -- - block/copy-before-write.c | 22 +-- - block/export/export.c | 22 +-- - block/io.c | 45 +---- - block/mirror.c | 19 -- - block/monitor/bitmap-qmp-cmds.c | 20 +- - block/monitor/block-hmp-cmds.c | 29 --- - block/qapi-sysemu.c | 27 +-- - block/qapi.c | 18 +- - block/raw-format.c | 5 - - block/replication.c | 58 +----- - block/snapshot.c | 22 +-- - block/write-threshold.c | 6 - - blockdev.c | 307 +++++------------------------ - blockjob.c | 18 -- - hw/block/dataplane/virtio-blk.c | 10 - - hw/block/dataplane/xen-block.c | 17 +- - hw/block/virtio-blk.c | 13 -- - hw/core/qdev-properties-system.c | 9 - - include/block/block-global-state.h | 9 +- - include/block/block-io.h | 3 +- - include/block/snapshot.h | 2 - - job.c | 16 -- - migration/block.c | 34 +--- - migration/migration-hmp-cmds.c | 3 - - migration/savevm.c | 22 --- - net/colo-compare.c | 2 - - qemu-img.c | 4 - - qemu-io.c | 10 +- - qemu-nbd.c | 2 - - replay/replay-debugging.c | 4 - - scripts/block-coroutine-wrapper.py | 3 - - tests/tsan/suppressions.tsan | 1 - - tests/unit/test-bdrv-drain.c | 51 +---- - tests/unit/test-bdrv-graph-mod.c | 6 - - tests/unit/test-block-iothread.c | 31 --- - tests/unit/test-blockjob.c | 137 ------------- - tests/unit/test-replication.c | 11 -- - util/async.c | 4 - - util/vhost-user-server.c | 3 - - 41 files changed, 104 insertions(+), 1169 deletions(-) - -diff --git a/block.c b/block.c -index 25e1ebc606..91ace5d2d5 100644 ---- a/block.c -+++ b/block.c -@@ -1625,7 +1625,6 @@ static int no_coroutine_fn GRAPH_UNLOCKED - bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, - QDict *options, int open_flags, Error **errp) - { -- AioContext *ctx; - Error *local_err = NULL; - int i, ret; - GLOBAL_STATE_CODE(); -@@ -1673,21 +1672,15 @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, - bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF; - bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF; - -- /* Get the context after .bdrv_open, it can change the context */ -- ctx = bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); -- - ret = bdrv_refresh_total_sectors(bs, bs->total_sectors); - if (ret < 0) { - error_setg_errno(errp, -ret, "Could not refresh total sector count"); -- aio_context_release(ctx); - return ret; - } - - bdrv_graph_rdlock_main_loop(); - bdrv_refresh_limits(bs, NULL, &local_err); - bdrv_graph_rdunlock_main_loop(); -- aio_context_release(ctx); - - if (local_err) { - error_propagate(errp, local_err); -@@ -3062,7 +3055,7 @@ bdrv_attach_child_common(BlockDriverState *child_bs, - Transaction *tran, Error **errp) - { - BdrvChild *new_child; -- AioContext *parent_ctx, *new_child_ctx; -+ AioContext *parent_ctx; - AioContext *child_ctx = bdrv_get_aio_context(child_bs); - - assert(child_class->get_parent_desc); -@@ -3114,12 +3107,6 @@ bdrv_attach_child_common(BlockDriverState *child_bs, - } - } - -- new_child_ctx = bdrv_get_aio_context(child_bs); -- if (new_child_ctx != child_ctx) { -- aio_context_release(child_ctx); -- aio_context_acquire(new_child_ctx); -- } -- - bdrv_ref(child_bs); - /* - * Let every new BdrvChild start with a drained parent. Inserting the child -@@ -3149,11 +3136,6 @@ bdrv_attach_child_common(BlockDriverState *child_bs, - }; - tran_add(tran, &bdrv_attach_child_common_drv, s); - -- if (new_child_ctx != child_ctx) { -- aio_context_release(new_child_ctx); -- aio_context_acquire(child_ctx); -- } -- - return new_child; - } - -@@ -3605,7 +3587,6 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, - int ret = 0; - bool implicit_backing = false; - BlockDriverState *backing_hd; -- AioContext *backing_hd_ctx; - QDict *options; - QDict *tmp_parent_options = NULL; - Error *local_err = NULL; -@@ -3691,11 +3672,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, - - /* Hook up the backing file link; drop our reference, bs owns the - * backing_hd reference now */ -- backing_hd_ctx = bdrv_get_aio_context(backing_hd); -- aio_context_acquire(backing_hd_ctx); - ret = bdrv_set_backing_hd(bs, backing_hd, errp); - bdrv_unref(backing_hd); -- aio_context_release(backing_hd_ctx); - - if (ret < 0) { - goto free_exit; -@@ -3780,7 +3758,6 @@ BdrvChild *bdrv_open_child(const char *filename, - { - BlockDriverState *bs; - BdrvChild *child; -- AioContext *ctx; - - GLOBAL_STATE_CODE(); - -@@ -3791,11 +3768,8 @@ BdrvChild *bdrv_open_child(const char *filename, - } - - bdrv_graph_wrlock(); -- ctx = bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); - child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, - errp); -- aio_context_release(ctx); - bdrv_graph_wrunlock(); - - return child; -@@ -3881,7 +3855,6 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, - int64_t total_size; - QemuOpts *opts = NULL; - BlockDriverState *bs_snapshot = NULL; -- AioContext *ctx = bdrv_get_aio_context(bs); - int ret; - - GLOBAL_STATE_CODE(); -@@ -3890,9 +3863,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, - instead of opening 'filename' directly */ - - /* Get the required size from the image */ -- aio_context_acquire(ctx); - total_size = bdrv_getlength(bs); -- aio_context_release(ctx); - - if (total_size < 0) { - error_setg_errno(errp, -total_size, "Could not get image size"); -@@ -3927,10 +3898,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, - goto out; - } - -- aio_context_acquire(ctx); - ret = bdrv_append(bs_snapshot, bs, errp); -- aio_context_release(ctx); -- - if (ret < 0) { - bs_snapshot = NULL; - goto out; -@@ -3974,7 +3942,6 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, - Error *local_err = NULL; - QDict *snapshot_options = NULL; - int snapshot_flags = 0; -- AioContext *ctx = qemu_get_aio_context(); - - assert(!child_class || !flags); - assert(!child_class == !parent); -@@ -4115,12 +4082,10 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, - /* Not requesting BLK_PERM_CONSISTENT_READ because we're only - * looking at the header to guess the image format. This works even - * in cases where a guest would not see a consistent state. */ -- ctx = bdrv_get_aio_context(file_bs); -- aio_context_acquire(ctx); -+ AioContext *ctx = bdrv_get_aio_context(file_bs); - file = blk_new(ctx, 0, BLK_PERM_ALL); - blk_insert_bs(file, file_bs, &local_err); - bdrv_unref(file_bs); -- aio_context_release(ctx); - - if (local_err) { - goto fail; -@@ -4167,13 +4132,8 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, - goto fail; - } - -- /* The AioContext could have changed during bdrv_open_common() */ -- ctx = bdrv_get_aio_context(bs); -- - if (file) { -- aio_context_acquire(ctx); - blk_unref(file); -- aio_context_release(ctx); - file = NULL; - } - -@@ -4231,16 +4191,13 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, - * (snapshot_bs); thus, we have to drop the strong reference to bs - * (which we obtained by calling bdrv_new()). bs will not be deleted, - * though, because the overlay still has a reference to it. */ -- aio_context_acquire(ctx); - bdrv_unref(bs); -- aio_context_release(ctx); - bs = snapshot_bs; - } - - return bs; - - fail: -- aio_context_acquire(ctx); - blk_unref(file); - qobject_unref(snapshot_options); - qobject_unref(bs->explicit_options); -@@ -4249,14 +4206,11 @@ fail: - bs->options = NULL; - bs->explicit_options = NULL; - bdrv_unref(bs); -- aio_context_release(ctx); - error_propagate(errp, local_err); - return NULL; - - close_and_fail: -- aio_context_acquire(ctx); - bdrv_unref(bs); -- aio_context_release(ctx); - qobject_unref(snapshot_options); - qobject_unref(options); - error_propagate(errp, local_err); -@@ -4540,12 +4494,7 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) - if (bs_queue) { - BlockReopenQueueEntry *bs_entry, *next; - QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { -- AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs); -- -- aio_context_acquire(ctx); - bdrv_drained_end(bs_entry->state.bs); -- aio_context_release(ctx); -- - qobject_unref(bs_entry->state.explicit_options); - qobject_unref(bs_entry->state.options); - g_free(bs_entry); -@@ -4577,7 +4526,6 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) - { - int ret = -1; - BlockReopenQueueEntry *bs_entry, *next; -- AioContext *ctx; - Transaction *tran = tran_new(); - g_autoptr(GSList) refresh_list = NULL; - -@@ -4586,10 +4534,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) - GLOBAL_STATE_CODE(); - - QTAILQ_FOREACH(bs_entry, bs_queue, entry) { -- ctx = bdrv_get_aio_context(bs_entry->state.bs); -- aio_context_acquire(ctx); - ret = bdrv_flush(bs_entry->state.bs); -- aio_context_release(ctx); - if (ret < 0) { - error_setg_errno(errp, -ret, "Error flushing drive"); - goto abort; -@@ -4598,10 +4543,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) - - QTAILQ_FOREACH(bs_entry, bs_queue, entry) { - assert(bs_entry->state.bs->quiesce_counter > 0); -- ctx = bdrv_get_aio_context(bs_entry->state.bs); -- aio_context_acquire(ctx); - ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp); -- aio_context_release(ctx); - if (ret < 0) { - goto abort; - } -@@ -4644,10 +4586,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) - * to first element. - */ - QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { -- ctx = bdrv_get_aio_context(bs_entry->state.bs); -- aio_context_acquire(ctx); - bdrv_reopen_commit(&bs_entry->state); -- aio_context_release(ctx); - } - - bdrv_graph_wrlock(); -@@ -4658,10 +4597,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) - BlockDriverState *bs = bs_entry->state.bs; - - if (bs->drv->bdrv_reopen_commit_post) { -- ctx = bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); - bs->drv->bdrv_reopen_commit_post(&bs_entry->state); -- aio_context_release(ctx); - } - } - -@@ -4675,10 +4611,7 @@ abort: - - QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { - if (bs_entry->prepared) { -- ctx = bdrv_get_aio_context(bs_entry->state.bs); -- aio_context_acquire(ctx); - bdrv_reopen_abort(&bs_entry->state); -- aio_context_release(ctx); - } - } - -@@ -4691,24 +4624,13 @@ cleanup: - int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, - Error **errp) - { -- AioContext *ctx = bdrv_get_aio_context(bs); - BlockReopenQueue *queue; -- int ret; - - GLOBAL_STATE_CODE(); - - queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); - -- if (ctx != qemu_get_aio_context()) { -- aio_context_release(ctx); -- } -- ret = bdrv_reopen_multiple(queue, errp); -- -- if (ctx != qemu_get_aio_context()) { -- aio_context_acquire(ctx); -- } -- -- return ret; -+ return bdrv_reopen_multiple(queue, errp); - } - - int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, -@@ -4760,7 +4682,6 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, - const char *child_name = is_backing ? "backing" : "file"; - QObject *value; - const char *str; -- AioContext *ctx, *old_ctx; - bool has_child; - int ret; - -@@ -4844,13 +4765,6 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, - bdrv_drained_begin(old_child_bs); - } - -- old_ctx = bdrv_get_aio_context(bs); -- ctx = bdrv_get_aio_context(new_child_bs); -- if (old_ctx != ctx) { -- aio_context_release(old_ctx); -- aio_context_acquire(ctx); -- } -- - bdrv_graph_rdunlock_main_loop(); - bdrv_graph_wrlock(); - -@@ -4859,11 +4773,6 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, - - bdrv_graph_wrunlock(); - -- if (old_ctx != ctx) { -- aio_context_release(ctx); -- aio_context_acquire(old_ctx); -- } -- - if (old_child_bs) { - bdrv_drained_end(old_child_bs); - bdrv_unref(old_child_bs); -@@ -5537,7 +5446,6 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - int ret; - BdrvChild *child; - Transaction *tran = tran_new(); -- AioContext *old_context, *new_context = NULL; - - GLOBAL_STATE_CODE(); - -@@ -5545,21 +5453,8 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - assert(!bs_new->backing); - bdrv_graph_rdunlock_main_loop(); - -- old_context = bdrv_get_aio_context(bs_top); - bdrv_drained_begin(bs_top); -- -- /* -- * bdrv_drained_begin() requires that only the AioContext of the drained -- * node is locked, and at this point it can still differ from the AioContext -- * of bs_top. -- */ -- new_context = bdrv_get_aio_context(bs_new); -- aio_context_release(old_context); -- aio_context_acquire(new_context); - bdrv_drained_begin(bs_new); -- aio_context_release(new_context); -- aio_context_acquire(old_context); -- new_context = NULL; - - bdrv_graph_wrlock(); - -@@ -5571,18 +5466,6 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - goto out; - } - -- /* -- * bdrv_attach_child_noperm could change the AioContext of bs_top and -- * bs_new, but at least they are in the same AioContext now. This is the -- * AioContext that we need to lock for the rest of the function. -- */ -- new_context = bdrv_get_aio_context(bs_top); -- -- if (old_context != new_context) { -- aio_context_release(old_context); -- aio_context_acquire(new_context); -- } -- - ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp); - if (ret < 0) { - goto out; -@@ -5598,11 +5481,6 @@ out: - bdrv_drained_end(bs_top); - bdrv_drained_end(bs_new); - -- if (new_context && old_context != new_context) { -- aio_context_release(new_context); -- aio_context_acquire(old_context); -- } -- - return ret; - } - -@@ -5697,12 +5575,8 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, - - GLOBAL_STATE_CODE(); - -- aio_context_release(ctx); -- aio_context_acquire(qemu_get_aio_context()); - new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags, - errp); -- aio_context_release(qemu_get_aio_context()); -- aio_context_acquire(ctx); - assert(bdrv_get_aio_context(bs) == ctx); - - options = NULL; /* bdrv_new_open_driver() eats options */ -@@ -7037,12 +6911,9 @@ void bdrv_activate_all(Error **errp) - GRAPH_RDLOCK_GUARD_MAINLOOP(); - - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { -- AioContext *aio_context = bdrv_get_aio_context(bs); - int ret; - -- aio_context_acquire(aio_context); - ret = bdrv_activate(bs, errp); -- aio_context_release(aio_context); - if (ret < 0) { - bdrv_next_cleanup(&it); - return; -@@ -7137,20 +7008,10 @@ int bdrv_inactivate_all(void) - BlockDriverState *bs = NULL; - BdrvNextIterator it; - int ret = 0; -- GSList *aio_ctxs = NULL, *ctx; - - GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); - -- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { -- AioContext *aio_context = bdrv_get_aio_context(bs); -- -- if (!g_slist_find(aio_ctxs, aio_context)) { -- aio_ctxs = g_slist_prepend(aio_ctxs, aio_context); -- aio_context_acquire(aio_context); -- } -- } -- - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { - /* Nodes with BDS parents are covered by recursion from the last - * parent that gets inactivated. Don't inactivate them a second -@@ -7161,17 +7022,10 @@ int bdrv_inactivate_all(void) - ret = bdrv_inactivate_recurse(bs); - if (ret < 0) { - bdrv_next_cleanup(&it); -- goto out; -+ break; - } - } - --out: -- for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) { -- AioContext *aio_context = ctx->data; -- aio_context_release(aio_context); -- } -- g_slist_free(aio_ctxs); -- - return ret; - } - -@@ -7257,11 +7111,8 @@ void bdrv_unref(BlockDriverState *bs) - static void bdrv_schedule_unref_bh(void *opaque) - { - BlockDriverState *bs = opaque; -- AioContext *ctx = bdrv_get_aio_context(bs); - -- aio_context_acquire(ctx); - bdrv_unref(bs); -- aio_context_release(ctx); - } - - /* -@@ -7398,8 +7249,6 @@ void bdrv_img_create(const char *filename, const char *fmt, - return; - } - -- aio_context_acquire(qemu_get_aio_context()); -- - /* Create parameter list */ - create_opts = qemu_opts_append(create_opts, drv->create_opts); - create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); -@@ -7549,7 +7398,6 @@ out: - qemu_opts_del(opts); - qemu_opts_free(create_opts); - error_propagate(errp, local_err); -- aio_context_release(qemu_get_aio_context()); - } - - AioContext *bdrv_get_aio_context(BlockDriverState *bs) -@@ -7585,29 +7433,12 @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx) - - void coroutine_fn bdrv_co_lock(BlockDriverState *bs) - { -- AioContext *ctx = bdrv_get_aio_context(bs); -- -- /* In the main thread, bs->aio_context won't change concurrently */ -- assert(qemu_get_current_aio_context() == qemu_get_aio_context()); -- -- /* -- * We're in coroutine context, so we already hold the lock of the main -- * loop AioContext. Don't lock it twice to avoid deadlocks. -- */ -- assert(qemu_in_coroutine()); -- if (ctx != qemu_get_aio_context()) { -- aio_context_acquire(ctx); -- } -+ /* TODO removed in next patch */ - } - - void coroutine_fn bdrv_co_unlock(BlockDriverState *bs) - { -- AioContext *ctx = bdrv_get_aio_context(bs); -- -- assert(qemu_in_coroutine()); -- if (ctx != qemu_get_aio_context()) { -- aio_context_release(ctx); -- } -+ /* TODO removed in next patch */ - } - - static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) -@@ -7728,21 +7559,8 @@ static void bdrv_set_aio_context_commit(void *opaque) - BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque; - BlockDriverState *bs = (BlockDriverState *) state->bs; - AioContext *new_context = state->new_ctx; -- AioContext *old_context = bdrv_get_aio_context(bs); - -- /* -- * Take the old AioContex when detaching it from bs. -- * At this point, new_context lock is already acquired, and we are now -- * also taking old_context. This is safe as long as bdrv_detach_aio_context -- * does not call AIO_POLL_WHILE(). -- */ -- if (old_context != qemu_get_aio_context()) { -- aio_context_acquire(old_context); -- } - bdrv_detach_aio_context(bs); -- if (old_context != qemu_get_aio_context()) { -- aio_context_release(old_context); -- } - bdrv_attach_aio_context(bs, new_context); - } - -@@ -7827,7 +7645,6 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, - Transaction *tran; - GHashTable *visited; - int ret; -- AioContext *old_context = bdrv_get_aio_context(bs); - GLOBAL_STATE_CODE(); - - /* -@@ -7857,34 +7674,7 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, - return -EPERM; - } - -- /* -- * Release old AioContext, it won't be needed anymore, as all -- * bdrv_drained_begin() have been called already. -- */ -- if (qemu_get_aio_context() != old_context) { -- aio_context_release(old_context); -- } -- -- /* -- * Acquire new AioContext since bdrv_drained_end() is going to be called -- * after we switched all nodes in the new AioContext, and the function -- * assumes that the lock of the bs is always taken. -- */ -- if (qemu_get_aio_context() != ctx) { -- aio_context_acquire(ctx); -- } -- - tran_commit(tran); -- -- if (qemu_get_aio_context() != ctx) { -- aio_context_release(ctx); -- } -- -- /* Re-acquire the old AioContext, since the caller takes and releases it. */ -- if (qemu_get_aio_context() != old_context) { -- aio_context_acquire(old_context); -- } -- - return 0; - } - -@@ -8006,7 +7796,6 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, - const char *node_name, Error **errp) - { - BlockDriverState *to_replace_bs = bdrv_find_node(node_name); -- AioContext *aio_context; - - GLOBAL_STATE_CODE(); - -@@ -8015,12 +7804,8 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, - return NULL; - } - -- aio_context = bdrv_get_aio_context(to_replace_bs); -- aio_context_acquire(aio_context); -- - if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { -- to_replace_bs = NULL; -- goto out; -+ return NULL; - } - - /* We don't want arbitrary node of the BDS chain to be replaced only the top -@@ -8033,12 +7818,9 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, - "because it cannot be guaranteed that doing so would not " - "lead to an abrupt change of visible data", - node_name, parent_bs->node_name); -- to_replace_bs = NULL; -- goto out; -+ return NULL; - } - --out: -- aio_context_release(aio_context); - return to_replace_bs; - } - -diff --git a/block/block-backend.c b/block/block-backend.c -index abac4e0235..f412bed274 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -429,7 +429,6 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, - { - BlockBackend *blk; - BlockDriverState *bs; -- AioContext *ctx; - uint64_t perm = 0; - uint64_t shared = BLK_PERM_ALL; - -@@ -459,23 +458,18 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, - shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED; - } - -- aio_context_acquire(qemu_get_aio_context()); - bs = bdrv_open(filename, reference, options, flags, errp); -- aio_context_release(qemu_get_aio_context()); - if (!bs) { - return NULL; - } - - /* bdrv_open() could have moved bs to a different AioContext */ -- ctx = bdrv_get_aio_context(bs); - blk = blk_new(bdrv_get_aio_context(bs), perm, shared); - blk->perm = perm; - blk->shared_perm = shared; - -- aio_context_acquire(ctx); - blk_insert_bs(blk, bs, errp); - bdrv_unref(bs); -- aio_context_release(ctx); - - if (!blk->root) { - blk_unref(blk); -@@ -577,13 +571,9 @@ void blk_remove_all_bs(void) - GLOBAL_STATE_CODE(); - - while ((blk = blk_all_next(blk)) != NULL) { -- AioContext *ctx = blk_get_aio_context(blk); -- -- aio_context_acquire(ctx); - if (blk->root) { - blk_remove_bs(blk); - } -- aio_context_release(ctx); - } - } - -@@ -2736,20 +2726,16 @@ int blk_commit_all(void) - GRAPH_RDLOCK_GUARD_MAINLOOP(); - - while ((blk = blk_all_next(blk)) != NULL) { -- AioContext *aio_context = blk_get_aio_context(blk); - BlockDriverState *unfiltered_bs = bdrv_skip_filters(blk_bs(blk)); - -- aio_context_acquire(aio_context); - if (blk_is_inserted(blk) && bdrv_cow_child(unfiltered_bs)) { - int ret; - - ret = bdrv_commit(unfiltered_bs); - if (ret < 0) { -- aio_context_release(aio_context); - return ret; - } - } -- aio_context_release(aio_context); - } - return 0; - } -diff --git a/block/copy-before-write.c b/block/copy-before-write.c -index 13972879b1..0842a1a6df 100644 ---- a/block/copy-before-write.c -+++ b/block/copy-before-write.c -@@ -412,7 +412,6 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, - int64_t cluster_size; - g_autoptr(BlockdevOptions) full_opts = NULL; - BlockdevOptionsCbw *opts; -- AioContext *ctx; - int ret; - - full_opts = cbw_parse_options(options, errp); -@@ -435,15 +434,11 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, - - GRAPH_RDLOCK_GUARD_MAINLOOP(); - -- ctx = bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); -- - if (opts->bitmap) { - bitmap = block_dirty_bitmap_lookup(opts->bitmap->node, - opts->bitmap->name, NULL, errp); - if (!bitmap) { -- ret = -EINVAL; -- goto out; -+ return -EINVAL; - } - } - s->on_cbw_error = opts->has_on_cbw_error ? opts->on_cbw_error : -@@ -461,24 +456,21 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, - s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp); - if (!s->bcs) { - error_prepend(errp, "Cannot create block-copy-state: "); -- ret = -EINVAL; -- goto out; -+ return -EINVAL; - } - - cluster_size = block_copy_cluster_size(s->bcs); - - s->done_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp); - if (!s->done_bitmap) { -- ret = -EINVAL; -- goto out; -+ return -EINVAL; - } - bdrv_disable_dirty_bitmap(s->done_bitmap); - - /* s->access_bitmap starts equal to bcs bitmap */ - s->access_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp); - if (!s->access_bitmap) { -- ret = -EINVAL; -- goto out; -+ return -EINVAL; - } - bdrv_disable_dirty_bitmap(s->access_bitmap); - bdrv_dirty_bitmap_merge_internal(s->access_bitmap, -@@ -487,11 +479,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, - - qemu_co_mutex_init(&s->lock); - QLIST_INIT(&s->frozen_read_reqs); -- -- ret = 0; --out: -- aio_context_release(ctx); -- return ret; -+ return 0; - } - - static void cbw_close(BlockDriverState *bs) -diff --git a/block/export/export.c b/block/export/export.c -index a8f274e526..6d51ae8ed7 100644 ---- a/block/export/export.c -+++ b/block/export/export.c -@@ -114,7 +114,6 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) - } - - ctx = bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); - - if (export->iothread) { - IOThread *iothread; -@@ -133,8 +132,6 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) - set_context_errp = fixed_iothread ? errp : NULL; - ret = bdrv_try_change_aio_context(bs, new_ctx, NULL, set_context_errp); - if (ret == 0) { -- aio_context_release(ctx); -- aio_context_acquire(new_ctx); - ctx = new_ctx; - } else if (fixed_iothread) { - goto fail; -@@ -191,8 +188,6 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) - assert(exp->blk != NULL); - - QLIST_INSERT_HEAD(&block_exports, exp, next); -- -- aio_context_release(ctx); - return exp; - - fail: -@@ -200,7 +195,6 @@ fail: - blk_set_dev_ops(blk, NULL, NULL); - blk_unref(blk); - } -- aio_context_release(ctx); - if (exp) { - g_free(exp->id); - g_free(exp); -@@ -218,9 +212,6 @@ void blk_exp_ref(BlockExport *exp) - static void blk_exp_delete_bh(void *opaque) - { - BlockExport *exp = opaque; -- AioContext *aio_context = exp->ctx; -- -- aio_context_acquire(aio_context); - - assert(exp->refcount == 0); - QLIST_REMOVE(exp, next); -@@ -230,8 +221,6 @@ static void blk_exp_delete_bh(void *opaque) - qapi_event_send_block_export_deleted(exp->id); - g_free(exp->id); - g_free(exp); -- -- aio_context_release(aio_context); - } - - void blk_exp_unref(BlockExport *exp) -@@ -249,22 +238,16 @@ void blk_exp_unref(BlockExport *exp) - * connections and other internally held references start to shut down. When - * the function returns, there may still be active references while the export - * is in the process of shutting down. -- * -- * Acquires exp->ctx internally. Callers must *not* hold the lock. - */ - void blk_exp_request_shutdown(BlockExport *exp) - { -- AioContext *aio_context = exp->ctx; -- -- aio_context_acquire(aio_context); -- - /* - * If the user doesn't own the export any more, it is already shutting - * down. We must not call .request_shutdown and decrease the refcount a - * second time. - */ - if (!exp->user_owned) { -- goto out; -+ return; - } - - exp->drv->request_shutdown(exp); -@@ -272,9 +255,6 @@ void blk_exp_request_shutdown(BlockExport *exp) - assert(exp->user_owned); - exp->user_owned = false; - blk_exp_unref(exp); -- --out: -- aio_context_release(aio_context); - } - - /* -diff --git a/block/io.c b/block/io.c -index 7e62fabbf5..8fa7670571 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -294,8 +294,6 @@ static void bdrv_co_drain_bh_cb(void *opaque) - BlockDriverState *bs = data->bs; - - if (bs) { -- AioContext *ctx = bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); - bdrv_dec_in_flight(bs); - if (data->begin) { - bdrv_do_drained_begin(bs, data->parent, data->poll); -@@ -303,7 +301,6 @@ static void bdrv_co_drain_bh_cb(void *opaque) - assert(!data->poll); - bdrv_do_drained_end(bs, data->parent); - } -- aio_context_release(ctx); - } else { - assert(data->begin); - bdrv_drain_all_begin(); -@@ -320,8 +317,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - { - BdrvCoDrainData data; - Coroutine *self = qemu_coroutine_self(); -- AioContext *ctx = bdrv_get_aio_context(bs); -- AioContext *co_ctx = qemu_coroutine_get_aio_context(self); - - /* Calling bdrv_drain() from a BH ensures the current coroutine yields and - * other coroutines run if they were queued by aio_co_enter(). */ -@@ -340,17 +335,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - bdrv_inc_in_flight(bs); - } - -- /* -- * Temporarily drop the lock across yield or we would get deadlocks. -- * bdrv_co_drain_bh_cb() reaquires the lock as needed. -- * -- * When we yield below, the lock for the current context will be -- * released, so if this is actually the lock that protects bs, don't drop -- * it a second time. -- */ -- if (ctx != co_ctx) { -- aio_context_release(ctx); -- } - replay_bh_schedule_oneshot_event(qemu_get_aio_context(), - bdrv_co_drain_bh_cb, &data); - -@@ -358,11 +342,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - /* If we are resumed from some other event (such as an aio completion or a - * timer callback), it is a bug in the caller that should be fixed. */ - assert(data.done); -- -- /* Reacquire the AioContext of bs if we dropped it */ -- if (ctx != co_ctx) { -- aio_context_acquire(ctx); -- } - } - - static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -@@ -478,13 +457,12 @@ static bool bdrv_drain_all_poll(void) - GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); - -- /* bdrv_drain_poll() can't make changes to the graph and we are holding the -- * main AioContext lock, so iterating bdrv_next_all_states() is safe. */ -+ /* -+ * bdrv_drain_poll() can't make changes to the graph and we hold the BQL, -+ * so iterating bdrv_next_all_states() is safe. -+ */ - while ((bs = bdrv_next_all_states(bs))) { -- AioContext *aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - result |= bdrv_drain_poll(bs, NULL, true); -- aio_context_release(aio_context); - } - - return result; -@@ -525,11 +503,7 @@ void bdrv_drain_all_begin_nopoll(void) - /* Quiesce all nodes, without polling in-flight requests yet. The graph - * cannot change during this loop. */ - while ((bs = bdrv_next_all_states(bs))) { -- AioContext *aio_context = bdrv_get_aio_context(bs); -- -- aio_context_acquire(aio_context); - bdrv_do_drained_begin(bs, NULL, false); -- aio_context_release(aio_context); - } - } - -@@ -588,11 +562,7 @@ void bdrv_drain_all_end(void) - } - - while ((bs = bdrv_next_all_states(bs))) { -- AioContext *aio_context = bdrv_get_aio_context(bs); -- -- aio_context_acquire(aio_context); - bdrv_do_drained_end(bs, NULL); -- aio_context_release(aio_context); - } - - assert(qemu_get_current_aio_context() == qemu_get_aio_context()); -@@ -2368,15 +2338,10 @@ int bdrv_flush_all(void) - } - - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { -- AioContext *aio_context = bdrv_get_aio_context(bs); -- int ret; -- -- aio_context_acquire(aio_context); -- ret = bdrv_flush(bs); -+ int ret = bdrv_flush(bs); - if (ret < 0 && !result) { - result = ret; - } -- aio_context_release(aio_context); - } - - return result; -diff --git a/block/mirror.c b/block/mirror.c -index 51f9e2f17c..5145eb53e1 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -662,7 +662,6 @@ static int mirror_exit_common(Job *job) - MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); - BlockJob *bjob = &s->common; - MirrorBDSOpaque *bs_opaque; -- AioContext *replace_aio_context = NULL; - BlockDriverState *src; - BlockDriverState *target_bs; - BlockDriverState *mirror_top_bs; -@@ -677,7 +676,6 @@ static int mirror_exit_common(Job *job) - } - s->prepared = true; - -- aio_context_acquire(qemu_get_aio_context()); - bdrv_graph_rdlock_main_loop(); - - mirror_top_bs = s->mirror_top_bs; -@@ -742,11 +740,6 @@ static int mirror_exit_common(Job *job) - } - bdrv_graph_rdunlock_main_loop(); - -- if (s->to_replace) { -- replace_aio_context = bdrv_get_aio_context(s->to_replace); -- aio_context_acquire(replace_aio_context); -- } -- - if (s->should_complete && !abort) { - BlockDriverState *to_replace = s->to_replace ?: src; - bool ro = bdrv_is_read_only(to_replace); -@@ -785,9 +778,6 @@ static int mirror_exit_common(Job *job) - error_free(s->replace_blocker); - bdrv_unref(s->to_replace); - } -- if (replace_aio_context) { -- aio_context_release(replace_aio_context); -- } - g_free(s->replaces); - - /* -@@ -811,8 +801,6 @@ static int mirror_exit_common(Job *job) - bdrv_unref(mirror_top_bs); - bdrv_unref(src); - -- aio_context_release(qemu_get_aio_context()); -- - return ret; - } - -@@ -1191,24 +1179,17 @@ static void mirror_complete(Job *job, Error **errp) - - /* block all operations on to_replace bs */ - if (s->replaces) { -- AioContext *replace_aio_context; -- - s->to_replace = bdrv_find_node(s->replaces); - if (!s->to_replace) { - error_setg(errp, "Node name '%s' not found", s->replaces); - return; - } - -- replace_aio_context = bdrv_get_aio_context(s->to_replace); -- aio_context_acquire(replace_aio_context); -- - /* TODO Translate this into child freeze system. */ - error_setg(&s->replace_blocker, - "block device is in use by block-job-complete"); - bdrv_op_block_all(s->to_replace, s->replace_blocker); - bdrv_ref(s->to_replace); -- -- aio_context_release(replace_aio_context); - } - - s->should_complete = true; -diff --git a/block/monitor/bitmap-qmp-cmds.c b/block/monitor/bitmap-qmp-cmds.c -index 70d01a3776..a738e7bbf7 100644 ---- a/block/monitor/bitmap-qmp-cmds.c -+++ b/block/monitor/bitmap-qmp-cmds.c -@@ -95,7 +95,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - { - BlockDriverState *bs; - BdrvDirtyBitmap *bitmap; -- AioContext *aio_context; - - if (!name || name[0] == '\0') { - error_setg(errp, "Bitmap name cannot be empty"); -@@ -107,14 +106,11 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - return; - } - -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- - if (has_granularity) { - if (granularity < 512 || !is_power_of_2(granularity)) { - error_setg(errp, "Granularity must be power of 2 " - "and at least 512"); -- goto out; -+ return; - } - } else { - /* Default to cluster size, if available: */ -@@ -132,12 +128,12 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - if (persistent && - !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp)) - { -- goto out; -+ return; - } - - bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp); - if (bitmap == NULL) { -- goto out; -+ return; - } - - if (disabled) { -@@ -145,9 +141,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - } - - bdrv_dirty_bitmap_set_persistence(bitmap, persistent); -- --out: -- aio_context_release(aio_context); - } - - BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, -@@ -157,7 +150,6 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, - { - BlockDriverState *bs; - BdrvDirtyBitmap *bitmap; -- AioContext *aio_context; - - GLOBAL_STATE_CODE(); - -@@ -166,19 +158,14 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, - return NULL; - } - -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- - if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY | BDRV_BITMAP_RO, - errp)) { -- aio_context_release(aio_context); - return NULL; - } - - if (bdrv_dirty_bitmap_get_persistence(bitmap) && - bdrv_remove_persistent_dirty_bitmap(bs, name, errp) < 0) - { -- aio_context_release(aio_context); - return NULL; - } - -@@ -190,7 +177,6 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, - *bitmap_bs = bs; - } - -- aio_context_release(aio_context); - return release ? NULL : bitmap; - } - -diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c -index c729cbf1eb..bdbb5cb141 100644 ---- a/block/monitor/block-hmp-cmds.c -+++ b/block/monitor/block-hmp-cmds.c -@@ -141,7 +141,6 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) - const char *id = qdict_get_str(qdict, "id"); - BlockBackend *blk; - BlockDriverState *bs; -- AioContext *aio_context; - Error *local_err = NULL; - - GLOBAL_STATE_CODE(); -@@ -168,14 +167,10 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) - return; - } - -- aio_context = blk_get_aio_context(blk); -- aio_context_acquire(aio_context); -- - bs = blk_bs(blk); - if (bs) { - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) { - error_report_err(local_err); -- aio_context_release(aio_context); - return; - } - -@@ -196,8 +191,6 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) - } else { - blk_unref(blk); - } -- -- aio_context_release(aio_context); - } - - void hmp_commit(Monitor *mon, const QDict *qdict) -@@ -213,7 +206,6 @@ void hmp_commit(Monitor *mon, const QDict *qdict) - ret = blk_commit_all(); - } else { - BlockDriverState *bs; -- AioContext *aio_context; - - blk = blk_by_name(device); - if (!blk) { -@@ -222,18 +214,13 @@ void hmp_commit(Monitor *mon, const QDict *qdict) - } - - bs = bdrv_skip_implicit_filters(blk_bs(blk)); -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - - if (!blk_is_available(blk)) { - error_report("Device '%s' has no medium", device); -- aio_context_release(aio_context); - return; - } - - ret = bdrv_commit(bs); -- -- aio_context_release(aio_context); - } - if (ret < 0) { - error_report("'commit' error for '%s': %s", device, strerror(-ret)); -@@ -560,7 +547,6 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) - BlockBackend *blk = NULL; - BlockDriverState *bs = NULL; - BlockBackend *local_blk = NULL; -- AioContext *ctx = NULL; - bool qdev = qdict_get_try_bool(qdict, "qdev", false); - const char *device = qdict_get_str(qdict, "device"); - const char *command = qdict_get_str(qdict, "command"); -@@ -582,9 +568,6 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) - } - } - -- ctx = blk ? blk_get_aio_context(blk) : bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); -- - if (bs) { - blk = local_blk = blk_new(bdrv_get_aio_context(bs), 0, BLK_PERM_ALL); - ret = blk_insert_bs(blk, bs, &err); -@@ -622,11 +605,6 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) - - fail: - blk_unref(local_blk); -- -- if (ctx) { -- aio_context_release(ctx); -- } -- - hmp_handle_error(mon, err); - } - -@@ -882,7 +860,6 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) - int nb_sns, i; - int total; - int *global_snapshots; -- AioContext *aio_context; - - typedef struct SnapshotEntry { - QEMUSnapshotInfo sn; -@@ -909,11 +886,8 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) - error_report_err(err); - return; - } -- aio_context = bdrv_get_aio_context(bs); - -- aio_context_acquire(aio_context); - nb_sns = bdrv_snapshot_list(bs, &sn_tab); -- aio_context_release(aio_context); - - if (nb_sns < 0) { - monitor_printf(mon, "bdrv_snapshot_list: error %d\n", nb_sns); -@@ -924,9 +898,7 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) - int bs1_nb_sns = 0; - ImageEntry *ie; - SnapshotEntry *se; -- AioContext *ctx = bdrv_get_aio_context(bs1); - -- aio_context_acquire(ctx); - if (bdrv_can_snapshot(bs1)) { - sn = NULL; - bs1_nb_sns = bdrv_snapshot_list(bs1, &sn); -@@ -944,7 +916,6 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) - } - g_free(sn); - } -- aio_context_release(ctx); - } - - if (no_snapshot) { -diff --git a/block/qapi-sysemu.c b/block/qapi-sysemu.c -index 1618cd225a..e4282631d2 100644 ---- a/block/qapi-sysemu.c -+++ b/block/qapi-sysemu.c -@@ -174,7 +174,6 @@ blockdev_remove_medium(const char *device, const char *id, Error **errp) - { - BlockBackend *blk; - BlockDriverState *bs; -- AioContext *aio_context; - bool has_attached_device; - - GLOBAL_STATE_CODE(); -@@ -204,13 +203,10 @@ blockdev_remove_medium(const char *device, const char *id, Error **errp) - return; - } - -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- - bdrv_graph_rdlock_main_loop(); - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) { - bdrv_graph_rdunlock_main_loop(); -- goto out; -+ return; - } - bdrv_graph_rdunlock_main_loop(); - -@@ -223,9 +219,6 @@ blockdev_remove_medium(const char *device, const char *id, Error **errp) - * value passed here (i.e. false). */ - blk_dev_change_media_cb(blk, false, &error_abort); - } -- --out: -- aio_context_release(aio_context); - } - - void qmp_blockdev_remove_medium(const char *id, Error **errp) -@@ -237,7 +230,6 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, - BlockDriverState *bs, Error **errp) - { - Error *local_err = NULL; -- AioContext *ctx; - bool has_device; - int ret; - -@@ -259,11 +251,7 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, - return; - } - -- ctx = bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); - ret = blk_insert_bs(blk, bs, errp); -- aio_context_release(ctx); -- - if (ret < 0) { - return; - } -@@ -374,9 +362,7 @@ void qmp_blockdev_change_medium(const char *device, - qdict_put_str(options, "driver", format); - } - -- aio_context_acquire(qemu_get_aio_context()); - medium_bs = bdrv_open(filename, NULL, options, bdrv_flags, errp); -- aio_context_release(qemu_get_aio_context()); - - if (!medium_bs) { - goto fail; -@@ -437,20 +423,16 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp) - ThrottleConfig cfg; - BlockDriverState *bs; - BlockBackend *blk; -- AioContext *aio_context; - - blk = qmp_get_blk(arg->device, arg->id, errp); - if (!blk) { - return; - } - -- aio_context = blk_get_aio_context(blk); -- aio_context_acquire(aio_context); -- - bs = blk_bs(blk); - if (!bs) { - error_setg(errp, "Device has no medium"); -- goto out; -+ return; - } - - throttle_config_init(&cfg); -@@ -505,7 +487,7 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp) - } - - if (!throttle_is_valid(&cfg, errp)) { -- goto out; -+ return; - } - - if (throttle_enabled(&cfg)) { -@@ -522,9 +504,6 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp) - /* If all throttling settings are set to 0, disable I/O limits */ - blk_io_limits_disable(blk); - } -- --out: -- aio_context_release(aio_context); - } - - void qmp_block_latency_histogram_set( -diff --git a/block/qapi.c b/block/qapi.c -index 82a30b38fe..9e806fa230 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -234,13 +234,11 @@ bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp) - int ret; - Error *err = NULL; - -- aio_context_acquire(bdrv_get_aio_context(bs)); -- - size = bdrv_getlength(bs); - if (size < 0) { - error_setg_errno(errp, -size, "Can't get image size '%s'", - bs->exact_filename); -- goto out; -+ return; - } - - bdrv_refresh_filename(bs); -@@ -265,7 +263,7 @@ bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp) - info->format_specific = bdrv_get_specific_info(bs, &err); - if (err) { - error_propagate(errp, err); -- goto out; -+ return; - } - backing_filename = bs->backing_file; - if (backing_filename[0] != '\0') { -@@ -300,11 +298,8 @@ bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp) - break; - default: - error_propagate(errp, err); -- goto out; -+ return; - } -- --out: -- aio_context_release(bdrv_get_aio_context(bs)); - } - - /** -@@ -709,15 +704,10 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes, - /* Just to be safe if query_nodes is not always initialized */ - if (has_query_nodes && query_nodes) { - for (bs = bdrv_next_node(NULL); bs; bs = bdrv_next_node(bs)) { -- AioContext *ctx = bdrv_get_aio_context(bs); -- -- aio_context_acquire(ctx); - QAPI_LIST_APPEND(tail, bdrv_query_bds_stats(bs, false)); -- aio_context_release(ctx); - } - } else { - for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { -- AioContext *ctx = blk_get_aio_context(blk); - BlockStats *s; - char *qdev; - -@@ -725,7 +715,6 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes, - continue; - } - -- aio_context_acquire(ctx); - s = bdrv_query_bds_stats(blk_bs(blk), true); - s->device = g_strdup(blk_name(blk)); - -@@ -737,7 +726,6 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes, - } - - bdrv_query_blk_stats(s->stats, blk); -- aio_context_release(ctx); - - QAPI_LIST_APPEND(tail, s); - } -diff --git a/block/raw-format.c b/block/raw-format.c -index 1111dffd54..ac7e8495f6 100644 ---- a/block/raw-format.c -+++ b/block/raw-format.c -@@ -470,7 +470,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, - Error **errp) - { - BDRVRawState *s = bs->opaque; -- AioContext *ctx; - bool has_size; - uint64_t offset, size; - BdrvChildRole file_role; -@@ -522,11 +521,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, - bs->file->bs->filename); - } - -- ctx = bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); - ret = raw_apply_options(bs, s, offset, has_size, size, errp); -- aio_context_release(ctx); -- - if (ret < 0) { - return ret; - } -diff --git a/block/replication.c b/block/replication.c -index 424b537ff7..ca6bd0a720 100644 ---- a/block/replication.c -+++ b/block/replication.c -@@ -394,14 +394,7 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, - } - - if (reopen_queue) { -- AioContext *ctx = bdrv_get_aio_context(bs); -- if (ctx != qemu_get_aio_context()) { -- aio_context_release(ctx); -- } - bdrv_reopen_multiple(reopen_queue, errp); -- if (ctx != qemu_get_aio_context()) { -- aio_context_acquire(ctx); -- } - } - } - -@@ -462,14 +455,11 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - BlockDriverState *top_bs; - BdrvChild *active_disk, *hidden_disk, *secondary_disk; - int64_t active_length, hidden_length, disk_length; -- AioContext *aio_context; - Error *local_err = NULL; - BackupPerf perf = { .use_copy_range = true, .max_workers = 1 }; - - GLOBAL_STATE_CODE(); - -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - s = bs->opaque; - - if (s->stage == BLOCK_REPLICATION_DONE || -@@ -479,20 +469,17 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - * Ignore the request because the secondary side of replication - * doesn't have to do anything anymore. - */ -- aio_context_release(aio_context); - return; - } - - if (s->stage != BLOCK_REPLICATION_NONE) { - error_setg(errp, "Block replication is running or done"); -- aio_context_release(aio_context); - return; - } - - if (s->mode != mode) { - error_setg(errp, "The parameter mode's value is invalid, needs %d," - " but got %d", s->mode, mode); -- aio_context_release(aio_context); - return; - } - -@@ -505,7 +492,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - if (!active_disk || !active_disk->bs || !active_disk->bs->backing) { - error_setg(errp, "Active disk doesn't have backing file"); - bdrv_graph_rdunlock_main_loop(); -- aio_context_release(aio_context); - return; - } - -@@ -513,7 +499,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - if (!hidden_disk->bs || !hidden_disk->bs->backing) { - error_setg(errp, "Hidden disk doesn't have backing file"); - bdrv_graph_rdunlock_main_loop(); -- aio_context_release(aio_context); - return; - } - -@@ -521,7 +506,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - if (!secondary_disk->bs || !bdrv_has_blk(secondary_disk->bs)) { - error_setg(errp, "The secondary disk doesn't have block backend"); - bdrv_graph_rdunlock_main_loop(); -- aio_context_release(aio_context); - return; - } - bdrv_graph_rdunlock_main_loop(); -@@ -534,7 +518,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - active_length != hidden_length || hidden_length != disk_length) { - error_setg(errp, "Active disk, hidden disk, secondary disk's length" - " are not the same"); -- aio_context_release(aio_context); - return; - } - -@@ -546,7 +529,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - !hidden_disk->bs->drv->bdrv_make_empty) { - error_setg(errp, - "Active disk or hidden disk doesn't support make_empty"); -- aio_context_release(aio_context); - bdrv_graph_rdunlock_main_loop(); - return; - } -@@ -556,7 +538,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - reopen_backing_file(bs, true, &local_err); - if (local_err) { - error_propagate(errp, local_err); -- aio_context_release(aio_context); - return; - } - -@@ -569,7 +550,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - if (local_err) { - error_propagate(errp, local_err); - bdrv_graph_wrunlock(); -- aio_context_release(aio_context); - return; - } - -@@ -580,7 +560,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - if (local_err) { - error_propagate(errp, local_err); - bdrv_graph_wrunlock(); -- aio_context_release(aio_context); - return; - } - -@@ -594,7 +573,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - error_setg(errp, "No top_bs or it is invalid"); - bdrv_graph_wrunlock(); - reopen_backing_file(bs, false, NULL); -- aio_context_release(aio_context); - return; - } - bdrv_op_block_all(top_bs, s->blocker); -@@ -612,13 +590,11 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - if (local_err) { - error_propagate(errp, local_err); - backup_job_cleanup(bs); -- aio_context_release(aio_context); - return; - } - job_start(&s->backup_job->job); - break; - default: -- aio_context_release(aio_context); - abort(); - } - -@@ -629,18 +605,12 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - } - - s->error = 0; -- aio_context_release(aio_context); - } - - static void replication_do_checkpoint(ReplicationState *rs, Error **errp) - { - BlockDriverState *bs = rs->opaque; -- BDRVReplicationState *s; -- AioContext *aio_context; -- -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- s = bs->opaque; -+ BDRVReplicationState *s = bs->opaque; - - if (s->stage == BLOCK_REPLICATION_DONE || - s->stage == BLOCK_REPLICATION_FAILOVER) { -@@ -649,38 +619,28 @@ static void replication_do_checkpoint(ReplicationState *rs, Error **errp) - * Ignore the request because the secondary side of replication - * doesn't have to do anything anymore. - */ -- aio_context_release(aio_context); - return; - } - - if (s->mode == REPLICATION_MODE_SECONDARY) { - secondary_do_checkpoint(bs, errp); - } -- aio_context_release(aio_context); - } - - static void replication_get_error(ReplicationState *rs, Error **errp) - { - BlockDriverState *bs = rs->opaque; -- BDRVReplicationState *s; -- AioContext *aio_context; -- -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- s = bs->opaque; -+ BDRVReplicationState *s = bs->opaque; - - if (s->stage == BLOCK_REPLICATION_NONE) { - error_setg(errp, "Block replication is not running"); -- aio_context_release(aio_context); - return; - } - - if (s->error) { - error_setg(errp, "I/O error occurred"); -- aio_context_release(aio_context); - return; - } -- aio_context_release(aio_context); - } - - static void replication_done(void *opaque, int ret) -@@ -708,12 +668,7 @@ static void replication_done(void *opaque, int ret) - static void replication_stop(ReplicationState *rs, bool failover, Error **errp) - { - BlockDriverState *bs = rs->opaque; -- BDRVReplicationState *s; -- AioContext *aio_context; -- -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- s = bs->opaque; -+ BDRVReplicationState *s = bs->opaque; - - if (s->stage == BLOCK_REPLICATION_DONE || - s->stage == BLOCK_REPLICATION_FAILOVER) { -@@ -722,13 +677,11 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) - * Ignore the request because the secondary side of replication - * doesn't have to do anything anymore. - */ -- aio_context_release(aio_context); - return; - } - - if (s->stage != BLOCK_REPLICATION_RUNNING) { - error_setg(errp, "Block replication is not running"); -- aio_context_release(aio_context); - return; - } - -@@ -744,15 +697,12 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) - * disk, secondary disk in backup_job_completed(). - */ - if (s->backup_job) { -- aio_context_release(aio_context); - job_cancel_sync(&s->backup_job->job, true); -- aio_context_acquire(aio_context); - } - - if (!failover) { - secondary_do_checkpoint(bs, errp); - s->stage = BLOCK_REPLICATION_DONE; -- aio_context_release(aio_context); - return; - } - -@@ -765,10 +715,8 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) - bdrv_graph_rdunlock_main_loop(); - break; - default: -- aio_context_release(aio_context); - abort(); - } -- aio_context_release(aio_context); - } - - static const char *const replication_strong_runtime_opts[] = { -diff --git a/block/snapshot.c b/block/snapshot.c -index e486d3e205..a28f2b039f 100644 ---- a/block/snapshot.c -+++ b/block/snapshot.c -@@ -525,9 +525,7 @@ static bool GRAPH_RDLOCK bdrv_all_snapshots_includes_bs(BlockDriverState *bs) - return bdrv_has_blk(bs) || QLIST_EMPTY(&bs->parents); - } - --/* Group operations. All block drivers are involved. -- * These functions will properly handle dataplane (take aio_context_acquire -- * when appropriate for appropriate block drivers) */ -+/* Group operations. All block drivers are involved. */ - - bool bdrv_all_can_snapshot(bool has_devices, strList *devices, - Error **errp) -@@ -545,14 +543,11 @@ bool bdrv_all_can_snapshot(bool has_devices, strList *devices, - iterbdrvs = bdrvs; - while (iterbdrvs) { - BlockDriverState *bs = iterbdrvs->data; -- AioContext *ctx = bdrv_get_aio_context(bs); - bool ok = true; - -- aio_context_acquire(ctx); - if (devices || bdrv_all_snapshots_includes_bs(bs)) { - ok = bdrv_can_snapshot(bs); - } -- aio_context_release(ctx); - if (!ok) { - error_setg(errp, "Device '%s' is writable but does not support " - "snapshots", bdrv_get_device_or_node_name(bs)); -@@ -582,18 +577,15 @@ int bdrv_all_delete_snapshot(const char *name, - iterbdrvs = bdrvs; - while (iterbdrvs) { - BlockDriverState *bs = iterbdrvs->data; -- AioContext *ctx = bdrv_get_aio_context(bs); - QEMUSnapshotInfo sn1, *snapshot = &sn1; - int ret = 0; - -- aio_context_acquire(ctx); - if ((devices || bdrv_all_snapshots_includes_bs(bs)) && - bdrv_snapshot_find(bs, snapshot, name) >= 0) - { - ret = bdrv_snapshot_delete(bs, snapshot->id_str, - snapshot->name, errp); - } -- aio_context_release(ctx); - if (ret < 0) { - error_prepend(errp, "Could not delete snapshot '%s' on '%s': ", - name, bdrv_get_device_or_node_name(bs)); -@@ -628,17 +620,14 @@ int bdrv_all_goto_snapshot(const char *name, - iterbdrvs = bdrvs; - while (iterbdrvs) { - BlockDriverState *bs = iterbdrvs->data; -- AioContext *ctx = bdrv_get_aio_context(bs); - bool all_snapshots_includes_bs; - -- aio_context_acquire(ctx); - bdrv_graph_rdlock_main_loop(); - all_snapshots_includes_bs = bdrv_all_snapshots_includes_bs(bs); - bdrv_graph_rdunlock_main_loop(); - - ret = (devices || all_snapshots_includes_bs) ? - bdrv_snapshot_goto(bs, name, errp) : 0; -- aio_context_release(ctx); - if (ret < 0) { - bdrv_graph_rdlock_main_loop(); - error_prepend(errp, "Could not load snapshot '%s' on '%s': ", -@@ -670,15 +659,12 @@ int bdrv_all_has_snapshot(const char *name, - iterbdrvs = bdrvs; - while (iterbdrvs) { - BlockDriverState *bs = iterbdrvs->data; -- AioContext *ctx = bdrv_get_aio_context(bs); - QEMUSnapshotInfo sn; - int ret = 0; - -- aio_context_acquire(ctx); - if (devices || bdrv_all_snapshots_includes_bs(bs)) { - ret = bdrv_snapshot_find(bs, &sn, name); - } -- aio_context_release(ctx); - if (ret < 0) { - if (ret == -ENOENT) { - return 0; -@@ -715,10 +701,8 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, - iterbdrvs = bdrvs; - while (iterbdrvs) { - BlockDriverState *bs = iterbdrvs->data; -- AioContext *ctx = bdrv_get_aio_context(bs); - int ret = 0; - -- aio_context_acquire(ctx); - if (bs == vm_state_bs) { - sn->vm_state_size = vm_state_size; - ret = bdrv_snapshot_create(bs, sn); -@@ -726,7 +710,6 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, - sn->vm_state_size = 0; - ret = bdrv_snapshot_create(bs, sn); - } -- aio_context_release(ctx); - if (ret < 0) { - error_setg(errp, "Could not create snapshot '%s' on '%s'", - sn->name, bdrv_get_device_or_node_name(bs)); -@@ -757,13 +740,10 @@ BlockDriverState *bdrv_all_find_vmstate_bs(const char *vmstate_bs, - iterbdrvs = bdrvs; - while (iterbdrvs) { - BlockDriverState *bs = iterbdrvs->data; -- AioContext *ctx = bdrv_get_aio_context(bs); - bool found = false; - -- aio_context_acquire(ctx); - found = (devices || bdrv_all_snapshots_includes_bs(bs)) && - bdrv_can_snapshot(bs); -- aio_context_release(ctx); - - if (vmstate_bs) { - if (g_str_equal(vmstate_bs, -diff --git a/block/write-threshold.c b/block/write-threshold.c -index 76d8885677..56fe88de81 100644 ---- a/block/write-threshold.c -+++ b/block/write-threshold.c -@@ -33,7 +33,6 @@ void qmp_block_set_write_threshold(const char *node_name, - Error **errp) - { - BlockDriverState *bs; -- AioContext *aio_context; - - bs = bdrv_find_node(node_name); - if (!bs) { -@@ -41,12 +40,7 @@ void qmp_block_set_write_threshold(const char *node_name, - return; - } - -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- - bdrv_write_threshold_set(bs, threshold_bytes); -- -- aio_context_release(aio_context); - } - - void bdrv_write_threshold_check_write(BlockDriverState *bs, int64_t offset, -diff --git a/blockdev.c b/blockdev.c -index 9e1381169d..5d8b3a23eb 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -662,7 +662,6 @@ err_no_opts: - /* Takes the ownership of bs_opts */ - BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) - { -- BlockDriverState *bs; - int bdrv_flags = 0; - - GLOBAL_STATE_CODE(); -@@ -677,11 +676,7 @@ BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) - bdrv_flags |= BDRV_O_INACTIVE; - } - -- aio_context_acquire(qemu_get_aio_context()); -- bs = bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp); -- aio_context_release(qemu_get_aio_context()); -- -- return bs; -+ return bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp); - } - - void blockdev_close_all_bdrv_states(void) -@@ -690,11 +685,7 @@ void blockdev_close_all_bdrv_states(void) - - GLOBAL_STATE_CODE(); - QTAILQ_FOREACH_SAFE(bs, &monitor_bdrv_states, monitor_list, next_bs) { -- AioContext *ctx = bdrv_get_aio_context(bs); -- -- aio_context_acquire(ctx); - bdrv_unref(bs); -- aio_context_release(ctx); - } - } - -@@ -1048,7 +1039,6 @@ fail: - static BlockDriverState *qmp_get_root_bs(const char *name, Error **errp) - { - BlockDriverState *bs; -- AioContext *aio_context; - - GRAPH_RDLOCK_GUARD_MAINLOOP(); - -@@ -1062,16 +1052,11 @@ static BlockDriverState *qmp_get_root_bs(const char *name, Error **errp) - return NULL; - } - -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- - if (!bdrv_is_inserted(bs)) { - error_setg(errp, "Device has no medium"); - bs = NULL; - } - -- aio_context_release(aio_context); -- - return bs; - } - -@@ -1141,7 +1126,6 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, - Error **errp) - { - BlockDriverState *bs; -- AioContext *aio_context; - QEMUSnapshotInfo sn; - Error *local_err = NULL; - SnapshotInfo *info = NULL; -@@ -1154,39 +1138,35 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, - if (!bs) { - return NULL; - } -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - - if (!id && !name) { - error_setg(errp, "Name or id must be provided"); -- goto out_aio_context; -+ return NULL; - } - - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) { -- goto out_aio_context; -+ return NULL; - } - - ret = bdrv_snapshot_find_by_id_and_name(bs, id, name, &sn, &local_err); - if (local_err) { - error_propagate(errp, local_err); -- goto out_aio_context; -+ return NULL; - } - if (!ret) { - error_setg(errp, - "Snapshot with id '%s' and name '%s' does not exist on " - "device '%s'", - STR_OR_NULL(id), STR_OR_NULL(name), device); -- goto out_aio_context; -+ return NULL; - } - - bdrv_snapshot_delete(bs, id, name, &local_err); - if (local_err) { - error_propagate(errp, local_err); -- goto out_aio_context; -+ return NULL; - } - -- aio_context_release(aio_context); -- - info = g_new0(SnapshotInfo, 1); - info->id = g_strdup(sn.id_str); - info->name = g_strdup(sn.name); -@@ -1201,10 +1181,6 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, - } - - return info; -- --out_aio_context: -- aio_context_release(aio_context); -- return NULL; - } - - /* internal snapshot private data */ -@@ -1232,7 +1208,6 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, - bool ret; - int64_t rt; - InternalSnapshotState *state = g_new0(InternalSnapshotState, 1); -- AioContext *aio_context; - int ret1; - - GLOBAL_STATE_CODE(); -@@ -1248,33 +1223,30 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, - return; - } - -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- - state->bs = bs; - - /* Paired with .clean() */ - bdrv_drained_begin(bs); - - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, errp)) { -- goto out; -+ return; - } - - if (bdrv_is_read_only(bs)) { - error_setg(errp, "Device '%s' is read only", device); -- goto out; -+ return; - } - - if (!bdrv_can_snapshot(bs)) { - error_setg(errp, "Block format '%s' used by device '%s' " - "does not support internal snapshots", - bs->drv->format_name, device); -- goto out; -+ return; - } - - if (!strlen(name)) { - error_setg(errp, "Name is empty"); -- goto out; -+ return; - } - - /* check whether a snapshot with name exist */ -@@ -1282,12 +1254,12 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, - &local_err); - if (local_err) { - error_propagate(errp, local_err); -- goto out; -+ return; - } else if (ret) { - error_setg(errp, - "Snapshot with name '%s' already exists on device '%s'", - name, device); -- goto out; -+ return; - } - - /* 3. take the snapshot */ -@@ -1308,14 +1280,11 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, - error_setg_errno(errp, -ret1, - "Failed to create snapshot '%s' on device '%s'", - name, device); -- goto out; -+ return; - } - - /* 4. succeed, mark a snapshot is created */ - state->created = true; -- --out: -- aio_context_release(aio_context); - } - - static void internal_snapshot_abort(void *opaque) -@@ -1323,7 +1292,6 @@ static void internal_snapshot_abort(void *opaque) - InternalSnapshotState *state = opaque; - BlockDriverState *bs = state->bs; - QEMUSnapshotInfo *sn = &state->sn; -- AioContext *aio_context; - Error *local_error = NULL; - - GLOBAL_STATE_CODE(); -@@ -1333,9 +1301,6 @@ static void internal_snapshot_abort(void *opaque) - return; - } - -- aio_context = bdrv_get_aio_context(state->bs); -- aio_context_acquire(aio_context); -- - if (bdrv_snapshot_delete(bs, sn->id_str, sn->name, &local_error) < 0) { - error_reportf_err(local_error, - "Failed to delete snapshot with id '%s' and " -@@ -1343,25 +1308,17 @@ static void internal_snapshot_abort(void *opaque) - sn->id_str, sn->name, - bdrv_get_device_name(bs)); - } -- -- aio_context_release(aio_context); - } - - static void internal_snapshot_clean(void *opaque) - { - g_autofree InternalSnapshotState *state = opaque; -- AioContext *aio_context; - - if (!state->bs) { - return; - } - -- aio_context = bdrv_get_aio_context(state->bs); -- aio_context_acquire(aio_context); -- - bdrv_drained_end(state->bs); -- -- aio_context_release(aio_context); - } - - /* external snapshot private data */ -@@ -1395,7 +1352,6 @@ static void external_snapshot_action(TransactionAction *action, - /* File name of the new image (for 'blockdev-snapshot-sync') */ - const char *new_image_file; - ExternalSnapshotState *state = g_new0(ExternalSnapshotState, 1); -- AioContext *aio_context; - uint64_t perm, shared; - - /* TODO We'll eventually have to take a writer lock in this function */ -@@ -1435,26 +1391,23 @@ static void external_snapshot_action(TransactionAction *action, - return; - } - -- aio_context = bdrv_get_aio_context(state->old_bs); -- aio_context_acquire(aio_context); -- - /* Paired with .clean() */ - bdrv_drained_begin(state->old_bs); - - if (!bdrv_is_inserted(state->old_bs)) { - error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); -- goto out; -+ return; - } - - if (bdrv_op_is_blocked(state->old_bs, - BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, errp)) { -- goto out; -+ return; - } - - if (!bdrv_is_read_only(state->old_bs)) { - if (bdrv_flush(state->old_bs)) { - error_setg(errp, QERR_IO_ERROR); -- goto out; -+ return; - } - } - -@@ -1466,13 +1419,13 @@ static void external_snapshot_action(TransactionAction *action, - - if (node_name && !snapshot_node_name) { - error_setg(errp, "New overlay node-name missing"); -- goto out; -+ return; - } - - if (snapshot_node_name && - bdrv_lookup_bs(snapshot_node_name, snapshot_node_name, NULL)) { - error_setg(errp, "New overlay node-name already in use"); -- goto out; -+ return; - } - - flags = state->old_bs->open_flags; -@@ -1485,20 +1438,18 @@ static void external_snapshot_action(TransactionAction *action, - int64_t size = bdrv_getlength(state->old_bs); - if (size < 0) { - error_setg_errno(errp, -size, "bdrv_getlength failed"); -- goto out; -+ return; - } - bdrv_refresh_filename(state->old_bs); - -- aio_context_release(aio_context); - bdrv_img_create(new_image_file, format, - state->old_bs->filename, - state->old_bs->drv->format_name, - NULL, size, flags, false, &local_err); -- aio_context_acquire(aio_context); - - if (local_err) { - error_propagate(errp, local_err); -- goto out; -+ return; - } - } - -@@ -1508,20 +1459,15 @@ static void external_snapshot_action(TransactionAction *action, - } - qdict_put_str(options, "driver", format); - } -- aio_context_release(aio_context); - -- aio_context_acquire(qemu_get_aio_context()); - state->new_bs = bdrv_open(new_image_file, snapshot_ref, options, flags, - errp); -- aio_context_release(qemu_get_aio_context()); - - /* We will manually add the backing_hd field to the bs later */ - if (!state->new_bs) { - return; - } - -- aio_context_acquire(aio_context); -- - /* - * Allow attaching a backing file to an overlay that's already in use only - * if the parents don't assume that they are already seeing a valid image. -@@ -1530,41 +1476,34 @@ static void external_snapshot_action(TransactionAction *action, - bdrv_get_cumulative_perm(state->new_bs, &perm, &shared); - if (perm & BLK_PERM_CONSISTENT_READ) { - error_setg(errp, "The overlay is already in use"); -- goto out; -+ return; - } - - if (state->new_bs->drv->is_filter) { - error_setg(errp, "Filters cannot be used as overlays"); -- goto out; -+ return; - } - - if (bdrv_cow_child(state->new_bs)) { - error_setg(errp, "The overlay already has a backing image"); -- goto out; -+ return; - } - - if (!state->new_bs->drv->supports_backing) { - error_setg(errp, "The overlay does not support backing images"); -- goto out; -+ return; - } - - ret = bdrv_append(state->new_bs, state->old_bs, errp); - if (ret < 0) { -- goto out; -+ return; - } - state->overlay_appended = true; -- --out: -- aio_context_release(aio_context); - } - - static void external_snapshot_commit(void *opaque) - { - ExternalSnapshotState *state = opaque; -- AioContext *aio_context; -- -- aio_context = bdrv_get_aio_context(state->old_bs); -- aio_context_acquire(aio_context); - - /* We don't need (or want) to use the transactional - * bdrv_reopen_multiple() across all the entries at once, because we -@@ -1572,8 +1511,6 @@ static void external_snapshot_commit(void *opaque) - if (!qatomic_read(&state->old_bs->copy_on_read)) { - bdrv_reopen_set_read_only(state->old_bs, true, NULL); - } -- -- aio_context_release(aio_context); - } - - static void external_snapshot_abort(void *opaque) -@@ -1586,7 +1523,6 @@ static void external_snapshot_abort(void *opaque) - int ret; - - aio_context = bdrv_get_aio_context(state->old_bs); -- aio_context_acquire(aio_context); - - bdrv_ref(state->old_bs); /* we can't let bdrv_set_backind_hd() - close state->old_bs; we need it */ -@@ -1599,15 +1535,9 @@ static void external_snapshot_abort(void *opaque) - */ - tmp_context = bdrv_get_aio_context(state->old_bs); - if (aio_context != tmp_context) { -- aio_context_release(aio_context); -- aio_context_acquire(tmp_context); -- - ret = bdrv_try_change_aio_context(state->old_bs, - aio_context, NULL, NULL); - assert(ret == 0); -- -- aio_context_release(tmp_context); -- aio_context_acquire(aio_context); - } - - bdrv_drained_begin(state->new_bs); -@@ -1617,8 +1547,6 @@ static void external_snapshot_abort(void *opaque) - bdrv_drained_end(state->new_bs); - - bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */ -- -- aio_context_release(aio_context); - } - } - } -@@ -1626,19 +1554,13 @@ static void external_snapshot_abort(void *opaque) - static void external_snapshot_clean(void *opaque) - { - g_autofree ExternalSnapshotState *state = opaque; -- AioContext *aio_context; - - if (!state->old_bs) { - return; - } - -- aio_context = bdrv_get_aio_context(state->old_bs); -- aio_context_acquire(aio_context); -- - bdrv_drained_end(state->old_bs); - bdrv_unref(state->new_bs); -- -- aio_context_release(aio_context); - } - - typedef struct DriveBackupState { -@@ -1670,7 +1592,6 @@ static void drive_backup_action(DriveBackup *backup, - BlockDriverState *target_bs; - BlockDriverState *source = NULL; - AioContext *aio_context; -- AioContext *old_context; - const char *format; - QDict *options; - Error *local_err = NULL; -@@ -1698,7 +1619,6 @@ static void drive_backup_action(DriveBackup *backup, - } - - aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - - state->bs = bs; - /* Paired with .clean() */ -@@ -1713,7 +1633,7 @@ static void drive_backup_action(DriveBackup *backup, - bdrv_graph_rdlock_main_loop(); - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { - bdrv_graph_rdunlock_main_loop(); -- goto out; -+ return; - } - - flags = bs->open_flags | BDRV_O_RDWR; -@@ -1744,7 +1664,7 @@ static void drive_backup_action(DriveBackup *backup, - size = bdrv_getlength(bs); - if (size < 0) { - error_setg_errno(errp, -size, "bdrv_getlength failed"); -- goto out; -+ return; - } - - if (backup->mode != NEW_IMAGE_MODE_EXISTING) { -@@ -1770,7 +1690,7 @@ static void drive_backup_action(DriveBackup *backup, - - if (local_err) { - error_propagate(errp, local_err); -- goto out; -+ return; - } - - options = qdict_new(); -@@ -1779,30 +1699,18 @@ static void drive_backup_action(DriveBackup *backup, - if (format) { - qdict_put_str(options, "driver", format); - } -- aio_context_release(aio_context); - -- aio_context_acquire(qemu_get_aio_context()); - target_bs = bdrv_open(backup->target, NULL, options, flags, errp); -- aio_context_release(qemu_get_aio_context()); -- - if (!target_bs) { - return; - } - -- /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ -- old_context = bdrv_get_aio_context(target_bs); -- aio_context_acquire(old_context); -- - ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); - if (ret < 0) { - bdrv_unref(target_bs); -- aio_context_release(old_context); - return; - } - -- aio_context_release(old_context); -- aio_context_acquire(aio_context); -- - if (set_backing_hd) { - if (bdrv_set_backing_hd(target_bs, source, errp) < 0) { - goto unref; -@@ -1815,22 +1723,14 @@ static void drive_backup_action(DriveBackup *backup, - - unref: - bdrv_unref(target_bs); --out: -- aio_context_release(aio_context); - } - - static void drive_backup_commit(void *opaque) - { - DriveBackupState *state = opaque; -- AioContext *aio_context; -- -- aio_context = bdrv_get_aio_context(state->bs); -- aio_context_acquire(aio_context); - - assert(state->job); - job_start(&state->job->job); -- -- aio_context_release(aio_context); - } - - static void drive_backup_abort(void *opaque) -@@ -1845,18 +1745,12 @@ static void drive_backup_abort(void *opaque) - static void drive_backup_clean(void *opaque) - { - g_autofree DriveBackupState *state = opaque; -- AioContext *aio_context; - - if (!state->bs) { - return; - } - -- aio_context = bdrv_get_aio_context(state->bs); -- aio_context_acquire(aio_context); -- - bdrv_drained_end(state->bs); -- -- aio_context_release(aio_context); - } - - typedef struct BlockdevBackupState { -@@ -1881,7 +1775,6 @@ static void blockdev_backup_action(BlockdevBackup *backup, - BlockDriverState *bs; - BlockDriverState *target_bs; - AioContext *aio_context; -- AioContext *old_context; - int ret; - - tran_add(tran, &blockdev_backup_drv, state); -@@ -1898,17 +1791,12 @@ static void blockdev_backup_action(BlockdevBackup *backup, - - /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ - aio_context = bdrv_get_aio_context(bs); -- old_context = bdrv_get_aio_context(target_bs); -- aio_context_acquire(old_context); - - ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); - if (ret < 0) { -- aio_context_release(old_context); - return; - } - -- aio_context_release(old_context); -- aio_context_acquire(aio_context); - state->bs = bs; - - /* Paired with .clean() */ -@@ -1917,22 +1805,14 @@ static void blockdev_backup_action(BlockdevBackup *backup, - state->job = do_backup_common(qapi_BlockdevBackup_base(backup), - bs, target_bs, aio_context, - block_job_txn, errp); -- -- aio_context_release(aio_context); - } - - static void blockdev_backup_commit(void *opaque) - { - BlockdevBackupState *state = opaque; -- AioContext *aio_context; -- -- aio_context = bdrv_get_aio_context(state->bs); -- aio_context_acquire(aio_context); - - assert(state->job); - job_start(&state->job->job); -- -- aio_context_release(aio_context); - } - - static void blockdev_backup_abort(void *opaque) -@@ -1947,18 +1827,12 @@ static void blockdev_backup_abort(void *opaque) - static void blockdev_backup_clean(void *opaque) - { - g_autofree BlockdevBackupState *state = opaque; -- AioContext *aio_context; - - if (!state->bs) { - return; - } - -- aio_context = bdrv_get_aio_context(state->bs); -- aio_context_acquire(aio_context); -- - bdrv_drained_end(state->bs); -- -- aio_context_release(aio_context); - } - - typedef struct BlockDirtyBitmapState { -@@ -2454,7 +2328,6 @@ void qmp_block_stream(const char *job_id, const char *device, - } - - aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - - bdrv_graph_rdlock_main_loop(); - if (base) { -@@ -2521,7 +2394,7 @@ void qmp_block_stream(const char *job_id, const char *device, - if (!base_bs && backing_file) { - error_setg(errp, "backing file specified, but streaming the " - "entire chain"); -- goto out; -+ return; - } - - if (has_auto_finalize && !auto_finalize) { -@@ -2536,18 +2409,14 @@ void qmp_block_stream(const char *job_id, const char *device, - filter_node_name, &local_err); - if (local_err) { - error_propagate(errp, local_err); -- goto out; -+ return; - } - - trace_qmp_block_stream(bs); -- --out: -- aio_context_release(aio_context); - return; - - out_rdlock: - bdrv_graph_rdunlock_main_loop(); -- aio_context_release(aio_context); - } - - void qmp_block_commit(const char *job_id, const char *device, -@@ -2606,10 +2475,9 @@ void qmp_block_commit(const char *job_id, const char *device, - } - - aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, errp)) { -- goto out; -+ return; - } - - /* default top_bs is the active layer */ -@@ -2617,16 +2485,16 @@ void qmp_block_commit(const char *job_id, const char *device, - - if (top_node && top) { - error_setg(errp, "'top-node' and 'top' are mutually exclusive"); -- goto out; -+ return; - } else if (top_node) { - top_bs = bdrv_lookup_bs(NULL, top_node, errp); - if (top_bs == NULL) { -- goto out; -+ return; - } - if (!bdrv_chain_contains(bs, top_bs)) { - error_setg(errp, "'%s' is not in this backing file chain", - top_node); -- goto out; -+ return; - } - } else if (top) { - /* This strcmp() is just a shortcut, there is no need to -@@ -2640,35 +2508,35 @@ void qmp_block_commit(const char *job_id, const char *device, - - if (top_bs == NULL) { - error_setg(errp, "Top image file %s not found", top ? top : "NULL"); -- goto out; -+ return; - } - - assert(bdrv_get_aio_context(top_bs) == aio_context); - - if (base_node && base) { - error_setg(errp, "'base-node' and 'base' are mutually exclusive"); -- goto out; -+ return; - } else if (base_node) { - base_bs = bdrv_lookup_bs(NULL, base_node, errp); - if (base_bs == NULL) { -- goto out; -+ return; - } - if (!bdrv_chain_contains(top_bs, base_bs)) { - error_setg(errp, "'%s' is not in this backing file chain", - base_node); -- goto out; -+ return; - } - } else if (base) { - base_bs = bdrv_find_backing_image(top_bs, base); - if (base_bs == NULL) { - error_setg(errp, "Can't find '%s' in the backing chain", base); -- goto out; -+ return; - } - } else { - base_bs = bdrv_find_base(top_bs); - if (base_bs == NULL) { - error_setg(errp, "There is no backimg image"); -- goto out; -+ return; - } - } - -@@ -2678,14 +2546,14 @@ void qmp_block_commit(const char *job_id, const char *device, - iter = bdrv_filter_or_cow_bs(iter)) - { - if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { -- goto out; -+ return; - } - } - - /* Do not allow attempts to commit an image into itself */ - if (top_bs == base_bs) { - error_setg(errp, "cannot commit an image into itself"); -- goto out; -+ return; - } - - /* -@@ -2708,7 +2576,7 @@ void qmp_block_commit(const char *job_id, const char *device, - error_setg(errp, "'backing-file' specified, but 'top' has a " - "writer on it"); - } -- goto out; -+ return; - } - if (!job_id) { - /* -@@ -2724,7 +2592,7 @@ void qmp_block_commit(const char *job_id, const char *device, - } else { - BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs); - if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { -- goto out; -+ return; - } - commit_start(job_id, bs, base_bs, top_bs, job_flags, - speed, on_error, backing_file, -@@ -2732,11 +2600,8 @@ void qmp_block_commit(const char *job_id, const char *device, - } - if (local_err != NULL) { - error_propagate(errp, local_err); -- goto out; -+ return; - } -- --out: -- aio_context_release(aio_context); - } - - /* Common QMP interface for drive-backup and blockdev-backup */ -@@ -2985,8 +2850,6 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, - - if (replaces) { - BlockDriverState *to_replace_bs; -- AioContext *aio_context; -- AioContext *replace_aio_context; - int64_t bs_size, replace_size; - - bs_size = bdrv_getlength(bs); -@@ -3000,19 +2863,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, - return; - } - -- aio_context = bdrv_get_aio_context(bs); -- replace_aio_context = bdrv_get_aio_context(to_replace_bs); -- /* -- * bdrv_getlength() is a co-wrapper and uses AIO_WAIT_WHILE. Be sure not -- * to acquire the same AioContext twice. -- */ -- if (replace_aio_context != aio_context) { -- aio_context_acquire(replace_aio_context); -- } - replace_size = bdrv_getlength(to_replace_bs); -- if (replace_aio_context != aio_context) { -- aio_context_release(replace_aio_context); -- } - - if (replace_size < 0) { - error_setg_errno(errp, -replace_size, -@@ -3041,7 +2892,6 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - BlockDriverState *bs; - BlockDriverState *target_backing_bs, *target_bs; - AioContext *aio_context; -- AioContext *old_context; - BlockMirrorBackingMode backing_mode; - Error *local_err = NULL; - QDict *options = NULL; -@@ -3064,7 +2914,6 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - } - - aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - - if (!arg->has_mode) { - arg->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; -@@ -3088,14 +2937,14 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - size = bdrv_getlength(bs); - if (size < 0) { - error_setg_errno(errp, -size, "bdrv_getlength failed"); -- goto out; -+ return; - } - - if (arg->replaces) { - if (!arg->node_name) { - error_setg(errp, "a node-name must be provided when replacing a" - " named node of the graph"); -- goto out; -+ return; - } - } - -@@ -3143,7 +2992,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - - if (local_err) { - error_propagate(errp, local_err); -- goto out; -+ return; - } - - options = qdict_new(); -@@ -3153,15 +3002,11 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - if (format) { - qdict_put_str(options, "driver", format); - } -- aio_context_release(aio_context); - - /* Mirroring takes care of copy-on-write using the source's backing - * file. - */ -- aio_context_acquire(qemu_get_aio_context()); - target_bs = bdrv_open(arg->target, NULL, options, flags, errp); -- aio_context_release(qemu_get_aio_context()); -- - if (!target_bs) { - return; - } -@@ -3173,20 +3018,12 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - bdrv_graph_rdunlock_main_loop(); - - -- /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ -- old_context = bdrv_get_aio_context(target_bs); -- aio_context_acquire(old_context); -- - ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); - if (ret < 0) { - bdrv_unref(target_bs); -- aio_context_release(old_context); - return; - } - -- aio_context_release(old_context); -- aio_context_acquire(aio_context); -- - blockdev_mirror_common(arg->job_id, bs, target_bs, - arg->replaces, arg->sync, - backing_mode, zero_target, -@@ -3202,8 +3039,6 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - arg->has_auto_dismiss, arg->auto_dismiss, - errp); - bdrv_unref(target_bs); --out: -- aio_context_release(aio_context); - } - - void qmp_blockdev_mirror(const char *job_id, -@@ -3226,7 +3061,6 @@ void qmp_blockdev_mirror(const char *job_id, - BlockDriverState *bs; - BlockDriverState *target_bs; - AioContext *aio_context; -- AioContext *old_context; - BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; - bool zero_target; - int ret; -@@ -3243,18 +3077,11 @@ void qmp_blockdev_mirror(const char *job_id, - - zero_target = (sync == MIRROR_SYNC_MODE_FULL); - -- /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ -- old_context = bdrv_get_aio_context(target_bs); - aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(old_context); - - ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); -- -- aio_context_release(old_context); -- aio_context_acquire(aio_context); -- - if (ret < 0) { -- goto out; -+ return; - } - - blockdev_mirror_common(job_id, bs, target_bs, -@@ -3269,8 +3096,6 @@ void qmp_blockdev_mirror(const char *job_id, - has_auto_finalize, auto_finalize, - has_auto_dismiss, auto_dismiss, - errp); --out: -- aio_context_release(aio_context); - } - - /* -@@ -3433,7 +3258,6 @@ void qmp_change_backing_file(const char *device, - Error **errp) - { - BlockDriverState *bs = NULL; -- AioContext *aio_context; - BlockDriverState *image_bs = NULL; - Error *local_err = NULL; - bool ro; -@@ -3444,9 +3268,6 @@ void qmp_change_backing_file(const char *device, - return; - } - -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- - bdrv_graph_rdlock_main_loop(); - - image_bs = bdrv_lookup_bs(NULL, image_node_name, &local_err); -@@ -3485,7 +3306,7 @@ void qmp_change_backing_file(const char *device, - - if (ro) { - if (bdrv_reopen_set_read_only(image_bs, false, errp) != 0) { -- goto out; -+ return; - } - } - -@@ -3503,14 +3324,10 @@ void qmp_change_backing_file(const char *device, - if (ro) { - bdrv_reopen_set_read_only(image_bs, true, errp); - } -- --out: -- aio_context_release(aio_context); - return; - - out_rdlock: - bdrv_graph_rdunlock_main_loop(); -- aio_context_release(aio_context); - } - - void qmp_blockdev_add(BlockdevOptions *options, Error **errp) -@@ -3550,7 +3367,6 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) - for (; reopen_list != NULL; reopen_list = reopen_list->next) { - BlockdevOptions *options = reopen_list->value; - BlockDriverState *bs; -- AioContext *ctx; - QObject *obj; - Visitor *v; - QDict *qdict; -@@ -3578,12 +3394,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) - - qdict_flatten(qdict); - -- ctx = bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); -- - queue = bdrv_reopen_queue(queue, bs, qdict, false); -- -- aio_context_release(ctx); - } - - /* Perform the reopen operation */ -@@ -3596,7 +3407,6 @@ fail: - - void qmp_blockdev_del(const char *node_name, Error **errp) - { -- AioContext *aio_context; - BlockDriverState *bs; - - GLOBAL_STATE_CODE(); -@@ -3611,30 +3421,25 @@ void qmp_blockdev_del(const char *node_name, Error **errp) - error_setg(errp, "Node %s is in use", node_name); - return; - } -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, errp)) { -- goto out; -+ return; - } - - if (!QTAILQ_IN_USE(bs, monitor_list)) { - error_setg(errp, "Node %s is not owned by the monitor", - bs->node_name); -- goto out; -+ return; - } - - if (bs->refcnt > 1) { - error_setg(errp, "Block device %s is in use", - bdrv_get_device_or_node_name(bs)); -- goto out; -+ return; - } - - QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list); - bdrv_unref(bs); -- --out: -- aio_context_release(aio_context); - } - - static BdrvChild * GRAPH_RDLOCK -@@ -3724,7 +3529,6 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp) - void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, - bool has_force, bool force, Error **errp) - { -- AioContext *old_context; - AioContext *new_context; - BlockDriverState *bs; - -@@ -3756,12 +3560,7 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, - new_context = qemu_get_aio_context(); - } - -- old_context = bdrv_get_aio_context(bs); -- aio_context_acquire(old_context); -- - bdrv_try_change_aio_context(bs, new_context, NULL, errp); -- -- aio_context_release(old_context); - } - - QemuOptsList qemu_common_drive_opts = { -diff --git a/blockjob.c b/blockjob.c -index 7310412313..d5f29e14af 100644 ---- a/blockjob.c -+++ b/blockjob.c -@@ -198,9 +198,7 @@ void block_job_remove_all_bdrv(BlockJob *job) - * one to make sure that such a concurrent access does not attempt - * to process an already freed BdrvChild. - */ -- aio_context_release(job->job.aio_context); - bdrv_graph_wrlock(); -- aio_context_acquire(job->job.aio_context); - while (job->nodes) { - GSList *l = job->nodes; - BdrvChild *c = l->data; -@@ -234,28 +232,12 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, - uint64_t perm, uint64_t shared_perm, Error **errp) - { - BdrvChild *c; -- AioContext *ctx = bdrv_get_aio_context(bs); -- bool need_context_ops; - GLOBAL_STATE_CODE(); - - bdrv_ref(bs); - -- need_context_ops = ctx != job->job.aio_context; -- -- if (need_context_ops) { -- if (job->job.aio_context != qemu_get_aio_context()) { -- aio_context_release(job->job.aio_context); -- } -- aio_context_acquire(ctx); -- } - c = bdrv_root_attach_child(bs, name, &child_job, 0, perm, shared_perm, job, - errp); -- if (need_context_ops) { -- aio_context_release(ctx); -- if (job->job.aio_context != qemu_get_aio_context()) { -- aio_context_acquire(job->job.aio_context); -- } -- } - if (c == NULL) { - return -EPERM; - } -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -index f83bb0f116..7bbbd981ad 100644 ---- a/hw/block/dataplane/virtio-blk.c -+++ b/hw/block/dataplane/virtio-blk.c -@@ -124,7 +124,6 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - VirtIOBlockDataPlane *s = vblk->dataplane; - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vblk))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); -- AioContext *old_context; - unsigned i; - unsigned nvqs = s->conf->num_queues; - Error *local_err = NULL; -@@ -178,10 +177,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - - trace_virtio_blk_data_plane_start(s); - -- old_context = blk_get_aio_context(s->conf->conf.blk); -- aio_context_acquire(old_context); - r = blk_set_aio_context(s->conf->conf.blk, s->ctx, &local_err); -- aio_context_release(old_context); - if (r < 0) { - error_report_err(local_err); - goto fail_aio_context; -@@ -208,13 +204,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - - /* Get this show started by hooking up our callbacks */ - if (!blk_in_drain(s->conf->conf.blk)) { -- aio_context_acquire(s->ctx); - for (i = 0; i < nvqs; i++) { - VirtQueue *vq = virtio_get_queue(s->vdev, i); - - virtio_queue_aio_attach_host_notifier(vq, s->ctx); - } -- aio_context_release(s->ctx); - } - return 0; - -@@ -314,8 +308,6 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) - */ - vblk->dataplane_started = false; - -- aio_context_acquire(s->ctx); -- - /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ - blk_drain(s->conf->conf.blk); - -@@ -325,8 +317,6 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) - */ - blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context(), NULL); - -- aio_context_release(s->ctx); -- - /* Clean up guest notifier (irq) */ - k->set_guest_notifiers(qbus->parent, nvqs, false); - -diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c -index c4bb28c66f..98501e6885 100644 ---- a/hw/block/dataplane/xen-block.c -+++ b/hw/block/dataplane/xen-block.c -@@ -260,8 +260,6 @@ static void xen_block_complete_aio(void *opaque, int ret) - XenBlockRequest *request = opaque; - XenBlockDataPlane *dataplane = request->dataplane; - -- aio_context_acquire(dataplane->ctx); -- - if (ret != 0) { - error_report("%s I/O error", - request->req.operation == BLKIF_OP_READ ? -@@ -273,10 +271,10 @@ static void xen_block_complete_aio(void *opaque, int ret) - if (request->presync) { - request->presync = 0; - xen_block_do_aio(request); -- goto done; -+ return; - } - if (request->aio_inflight > 0) { -- goto done; -+ return; - } - - switch (request->req.operation) { -@@ -318,9 +316,6 @@ static void xen_block_complete_aio(void *opaque, int ret) - if (dataplane->more_work) { - qemu_bh_schedule(dataplane->bh); - } -- --done: -- aio_context_release(dataplane->ctx); - } - - static bool xen_block_split_discard(XenBlockRequest *request, -@@ -601,9 +596,7 @@ static void xen_block_dataplane_bh(void *opaque) - { - XenBlockDataPlane *dataplane = opaque; - -- aio_context_acquire(dataplane->ctx); - xen_block_handle_requests(dataplane); -- aio_context_release(dataplane->ctx); - } - - static bool xen_block_dataplane_event(void *opaque) -@@ -703,10 +696,8 @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane) - xen_block_dataplane_detach(dataplane); - } - -- aio_context_acquire(dataplane->ctx); - /* Xen doesn't have multiple users for nodes, so this can't fail */ - blk_set_aio_context(dataplane->blk, qemu_get_aio_context(), &error_abort); -- aio_context_release(dataplane->ctx); - - /* - * Now that the context has been moved onto the main thread, cancel -@@ -752,7 +743,6 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, - { - ERRP_GUARD(); - XenDevice *xendev = dataplane->xendev; -- AioContext *old_context; - unsigned int ring_size; - unsigned int i; - -@@ -836,11 +826,8 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, - goto stop; - } - -- old_context = blk_get_aio_context(dataplane->blk); -- aio_context_acquire(old_context); - /* If other users keep the BlockBackend in the iothread, that's ok */ - blk_set_aio_context(dataplane->blk, dataplane->ctx, NULL); -- aio_context_release(old_context); - - if (!blk_in_drain(dataplane->blk)) { - xen_block_dataplane_attach(dataplane); -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index e110f9718b..ec9ed09a6a 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -1210,17 +1210,13 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool running, - static void virtio_blk_reset(VirtIODevice *vdev) - { - VirtIOBlock *s = VIRTIO_BLK(vdev); -- AioContext *ctx; - VirtIOBlockReq *req; - - /* Dataplane has stopped... */ - assert(!s->dataplane_started); - - /* ...but requests may still be in flight. */ -- ctx = blk_get_aio_context(s->blk); -- aio_context_acquire(ctx); - blk_drain(s->blk); -- aio_context_release(ctx); - - /* We drop queued requests after blk_drain() because blk_drain() itself can - * produce them. */ -@@ -1250,10 +1246,6 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) - uint64_t capacity; - int64_t length; - int blk_size = conf->logical_block_size; -- AioContext *ctx; -- -- ctx = blk_get_aio_context(s->blk); -- aio_context_acquire(ctx); - - blk_get_geometry(s->blk, &capacity); - memset(&blkcfg, 0, sizeof(blkcfg)); -@@ -1277,7 +1269,6 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) - * per track (cylinder). - */ - length = blk_getlength(s->blk); -- aio_context_release(ctx); - if (length > 0 && length / conf->heads / conf->secs % blk_size) { - blkcfg.geometry.sectors = conf->secs & ~s->sector_mask; - } else { -@@ -1344,9 +1335,7 @@ static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) - - memcpy(&blkcfg, config, s->config_size); - -- aio_context_acquire(blk_get_aio_context(s->blk)); - blk_set_enable_write_cache(s->blk, blkcfg.wce != 0); -- aio_context_release(blk_get_aio_context(s->blk)); - } - - static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features, -@@ -1414,11 +1403,9 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) - * s->blk would erroneously be placed in writethrough mode. - */ - if (!virtio_vdev_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) { -- aio_context_acquire(blk_get_aio_context(s->blk)); - blk_set_enable_write_cache(s->blk, - virtio_vdev_has_feature(vdev, - VIRTIO_BLK_F_WCE)); -- aio_context_release(blk_get_aio_context(s->blk)); - } - } - -diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c -index 1473ab3d5e..73cced4626 100644 ---- a/hw/core/qdev-properties-system.c -+++ b/hw/core/qdev-properties-system.c -@@ -120,9 +120,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name, - "node"); - } - -- aio_context_acquire(ctx); - blk_replace_bs(blk, bs, errp); -- aio_context_release(ctx); - return; - } - -@@ -148,10 +146,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name, - 0, BLK_PERM_ALL); - blk_created = true; - -- aio_context_acquire(ctx); - ret = blk_insert_bs(blk, bs, errp); -- aio_context_release(ctx); -- - if (ret < 0) { - goto fail; - } -@@ -207,12 +202,8 @@ static void release_drive(Object *obj, const char *name, void *opaque) - BlockBackend **ptr = object_field_prop_ptr(obj, prop); - - if (*ptr) { -- AioContext *ctx = blk_get_aio_context(*ptr); -- -- aio_context_acquire(ctx); - blockdev_auto_del(*ptr); - blk_detach_dev(*ptr, dev); -- aio_context_release(ctx); - } - } - -diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h -index 6b21fbc73f..0327f1c605 100644 ---- a/include/block/block-global-state.h -+++ b/include/block/block-global-state.h -@@ -31,11 +31,10 @@ - /* - * Global state (GS) API. These functions run under the BQL. - * -- * If a function modifies the graph, it also uses drain and/or -- * aio_context_acquire/release to be sure it has unique access. -- * aio_context locking is needed together with BQL because of -- * the thread-safe I/O API that concurrently runs and accesses -- * the graph without the BQL. -+ * If a function modifies the graph, it also uses the graph lock to be sure it -+ * has unique access. The graph lock is needed together with BQL because of the -+ * thread-safe I/O API that concurrently runs and accesses the graph without -+ * the BQL. - * - * It is important to note that not all of these functions are - * necessarily limited to running under the BQL, but they would -diff --git a/include/block/block-io.h b/include/block/block-io.h -index f8729ccc55..8eb39a858b 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -31,8 +31,7 @@ - - /* - * I/O API functions. These functions are thread-safe, and therefore -- * can run in any thread as long as the thread has called -- * aio_context_acquire/release(). -+ * can run in any thread. - * - * These functions can only call functions from I/O and Common categories, - * but can be invoked by GS, "I/O or GS" and I/O APIs. -diff --git a/include/block/snapshot.h b/include/block/snapshot.h -index d49c5599d9..304cc6ea61 100644 ---- a/include/block/snapshot.h -+++ b/include/block/snapshot.h -@@ -86,8 +86,6 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, - - /* - * Group operations. All block drivers are involved. -- * These functions will properly handle dataplane (take aio_context_acquire -- * when appropriate for appropriate block drivers - */ - - bool bdrv_all_can_snapshot(bool has_devices, strList *devices, -diff --git a/job.c b/job.c -index 99a2e54b54..660ce22c56 100644 ---- a/job.c -+++ b/job.c -@@ -464,12 +464,8 @@ void job_unref_locked(Job *job) - assert(!job->txn); - - if (job->driver->free) { -- AioContext *aio_context = job->aio_context; - job_unlock(); -- /* FIXME: aiocontext lock is required because cb calls blk_unref */ -- aio_context_acquire(aio_context); - job->driver->free(job); -- aio_context_release(aio_context); - job_lock(); - } - -@@ -840,12 +836,10 @@ static void job_clean(Job *job) - - /* - * Called with job_mutex held, but releases it temporarily. -- * Takes AioContext lock internally to invoke a job->driver callback. - */ - static int job_finalize_single_locked(Job *job) - { - int job_ret; -- AioContext *ctx = job->aio_context; - - assert(job_is_completed_locked(job)); - -@@ -854,7 +848,6 @@ static int job_finalize_single_locked(Job *job) - - job_ret = job->ret; - job_unlock(); -- aio_context_acquire(ctx); - - if (!job_ret) { - job_commit(job); -@@ -867,7 +860,6 @@ static int job_finalize_single_locked(Job *job) - job->cb(job->opaque, job_ret); - } - -- aio_context_release(ctx); - job_lock(); - - /* Emit events only if we actually started */ -@@ -886,17 +878,13 @@ static int job_finalize_single_locked(Job *job) - - /* - * Called with job_mutex held, but releases it temporarily. -- * Takes AioContext lock internally to invoke a job->driver callback. - */ - static void job_cancel_async_locked(Job *job, bool force) - { -- AioContext *ctx = job->aio_context; - GLOBAL_STATE_CODE(); - if (job->driver->cancel) { - job_unlock(); -- aio_context_acquire(ctx); - force = job->driver->cancel(job, force); -- aio_context_release(ctx); - job_lock(); - } else { - /* No .cancel() means the job will behave as if force-cancelled */ -@@ -931,7 +919,6 @@ static void job_cancel_async_locked(Job *job, bool force) - - /* - * Called with job_mutex held, but releases it temporarily. -- * Takes AioContext lock internally to invoke a job->driver callback. - */ - static void job_completed_txn_abort_locked(Job *job) - { -@@ -979,15 +966,12 @@ static void job_completed_txn_abort_locked(Job *job) - static int job_prepare_locked(Job *job) - { - int ret; -- AioContext *ctx = job->aio_context; - - GLOBAL_STATE_CODE(); - - if (job->ret == 0 && job->driver->prepare) { - job_unlock(); -- aio_context_acquire(ctx); - ret = job->driver->prepare(job); -- aio_context_release(ctx); - job_lock(); - job->ret = ret; - job_update_rc_locked(job); -diff --git a/migration/block.c b/migration/block.c -index a15f9bddcb..6ec6a1d6e6 100644 ---- a/migration/block.c -+++ b/migration/block.c -@@ -66,7 +66,7 @@ typedef struct BlkMigDevState { - /* Protected by block migration lock. */ - int64_t completed_sectors; - -- /* During migration this is protected by iothread lock / AioContext. -+ /* During migration this is protected by bdrv_dirty_bitmap_lock(). - * Allocation and free happen during setup and cleanup respectively. - */ - BdrvDirtyBitmap *dirty_bitmap; -@@ -101,7 +101,7 @@ typedef struct BlkMigState { - int prev_progress; - int bulk_completed; - -- /* Lock must be taken _inside_ the iothread lock and any AioContexts. */ -+ /* Lock must be taken _inside_ the iothread lock. */ - QemuMutex lock; - } BlkMigState; - -@@ -270,7 +270,6 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) - - if (bmds->shared_base) { - qemu_mutex_lock_iothread(); -- aio_context_acquire(blk_get_aio_context(bb)); - /* Skip unallocated sectors; intentionally treats failure or - * partial sector as an allocated sector */ - while (cur_sector < total_sectors && -@@ -281,7 +280,6 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) - } - cur_sector += count >> BDRV_SECTOR_BITS; - } -- aio_context_release(blk_get_aio_context(bb)); - qemu_mutex_unlock_iothread(); - } - -@@ -313,21 +311,16 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) - block_mig_state.submitted++; - blk_mig_unlock(); - -- /* We do not know if bs is under the main thread (and thus does -- * not acquire the AioContext when doing AIO) or rather under -- * dataplane. Thus acquire both the iothread mutex and the -- * AioContext. -- * -- * This is ugly and will disappear when we make bdrv_* thread-safe, -- * without the need to acquire the AioContext. -+ /* -+ * The migration thread does not have an AioContext. Lock the BQL so that -+ * I/O runs in the main loop AioContext (see -+ * qemu_get_current_aio_context()). - */ - qemu_mutex_lock_iothread(); -- aio_context_acquire(blk_get_aio_context(bmds->blk)); - bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector * BDRV_SECTOR_SIZE, - nr_sectors * BDRV_SECTOR_SIZE); - blk->aiocb = blk_aio_preadv(bb, cur_sector * BDRV_SECTOR_SIZE, &blk->qiov, - 0, blk_mig_read_cb, blk); -- aio_context_release(blk_get_aio_context(bmds->blk)); - qemu_mutex_unlock_iothread(); - - bmds->cur_sector = cur_sector + nr_sectors; -@@ -512,7 +505,7 @@ static void blk_mig_reset_dirty_cursor(void) - } - } - --/* Called with iothread lock and AioContext taken. */ -+/* Called with iothread lock taken. */ - - static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, - int is_async) -@@ -606,9 +599,7 @@ static int blk_mig_save_dirty_block(QEMUFile *f, int is_async) - int ret = 1; - - QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { -- aio_context_acquire(blk_get_aio_context(bmds->blk)); - ret = mig_save_device_dirty(f, bmds, is_async); -- aio_context_release(blk_get_aio_context(bmds->blk)); - if (ret <= 0) { - break; - } -@@ -666,9 +657,9 @@ static int64_t get_remaining_dirty(void) - int64_t dirty = 0; - - QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { -- aio_context_acquire(blk_get_aio_context(bmds->blk)); -+ bdrv_dirty_bitmap_lock(bmds->dirty_bitmap); - dirty += bdrv_get_dirty_count(bmds->dirty_bitmap); -- aio_context_release(blk_get_aio_context(bmds->blk)); -+ bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap); - } - - return dirty; -@@ -681,7 +672,6 @@ static void block_migration_cleanup_bmds(void) - { - BlkMigDevState *bmds; - BlockDriverState *bs; -- AioContext *ctx; - - unset_dirty_tracking(); - -@@ -693,13 +683,7 @@ static void block_migration_cleanup_bmds(void) - bdrv_op_unblock_all(bs, bmds->blocker); - } - error_free(bmds->blocker); -- -- /* Save ctx, because bmds->blk can disappear during blk_unref. */ -- ctx = blk_get_aio_context(bmds->blk); -- aio_context_acquire(ctx); - blk_unref(bmds->blk); -- aio_context_release(ctx); -- - g_free(bmds->blk_name); - g_free(bmds->aio_bitmap); - g_free(bmds); -diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c -index 86ae832176..99710c8ffb 100644 ---- a/migration/migration-hmp-cmds.c -+++ b/migration/migration-hmp-cmds.c -@@ -852,14 +852,11 @@ static void vm_completion(ReadLineState *rs, const char *str) - - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { - SnapshotInfoList *snapshots, *snapshot; -- AioContext *ctx = bdrv_get_aio_context(bs); - bool ok = false; - -- aio_context_acquire(ctx); - if (bdrv_can_snapshot(bs)) { - ok = bdrv_query_snapshot_info_list(bs, &snapshots, NULL) == 0; - } -- aio_context_release(ctx); - if (!ok) { - continue; - } -diff --git a/migration/savevm.c b/migration/savevm.c -index eec5503a42..1b9ab7b8ee 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -3049,7 +3049,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, - int saved_vm_running; - uint64_t vm_state_size; - g_autoptr(GDateTime) now = g_date_time_new_now_local(); -- AioContext *aio_context; - - GLOBAL_STATE_CODE(); - -@@ -3092,7 +3091,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, - if (bs == NULL) { - return false; - } -- aio_context = bdrv_get_aio_context(bs); - - saved_vm_running = runstate_is_running(); - -@@ -3101,8 +3099,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, - - bdrv_drain_all_begin(); - -- aio_context_acquire(aio_context); -- - memset(sn, 0, sizeof(*sn)); - - /* fill auxiliary fields */ -@@ -3139,14 +3135,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, - goto the_end; - } - -- /* The bdrv_all_create_snapshot() call that follows acquires the AioContext -- * for itself. BDRV_POLL_WHILE() does not support nested locking because -- * it only releases the lock once. Therefore synchronous I/O will deadlock -- * unless we release the AioContext before bdrv_all_create_snapshot(). -- */ -- aio_context_release(aio_context); -- aio_context = NULL; -- - ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, - has_devices, devices, errp); - if (ret < 0) { -@@ -3157,10 +3145,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, - ret = 0; - - the_end: -- if (aio_context) { -- aio_context_release(aio_context); -- } -- - bdrv_drain_all_end(); - - if (saved_vm_running) { -@@ -3258,7 +3242,6 @@ bool load_snapshot(const char *name, const char *vmstate, - QEMUSnapshotInfo sn; - QEMUFile *f; - int ret; -- AioContext *aio_context; - MigrationIncomingState *mis = migration_incoming_get_current(); - - if (!bdrv_all_can_snapshot(has_devices, devices, errp)) { -@@ -3278,12 +3261,9 @@ bool load_snapshot(const char *name, const char *vmstate, - if (!bs_vm_state) { - return false; - } -- aio_context = bdrv_get_aio_context(bs_vm_state); - - /* Don't even try to load empty VM states */ -- aio_context_acquire(aio_context); - ret = bdrv_snapshot_find(bs_vm_state, &sn, name); -- aio_context_release(aio_context); - if (ret < 0) { - return false; - } else if (sn.vm_state_size == 0) { -@@ -3320,10 +3300,8 @@ bool load_snapshot(const char *name, const char *vmstate, - ret = -EINVAL; - goto err_drain; - } -- aio_context_acquire(aio_context); - ret = qemu_loadvm_state(f); - migration_incoming_state_destroy(); -- aio_context_release(aio_context); - - bdrv_drain_all_end(); - -diff --git a/net/colo-compare.c b/net/colo-compare.c -index 7f9e6f89ce..f2dfc0ebdc 100644 ---- a/net/colo-compare.c -+++ b/net/colo-compare.c -@@ -1439,12 +1439,10 @@ static void colo_compare_finalize(Object *obj) - qemu_bh_delete(s->event_bh); - - AioContext *ctx = iothread_get_aio_context(s->iothread); -- aio_context_acquire(ctx); - AIO_WAIT_WHILE(ctx, !s->out_sendco.done); - if (s->notify_dev) { - AIO_WAIT_WHILE(ctx, !s->notify_sendco.done); - } -- aio_context_release(ctx); - - /* Release all unhandled packets after compare thead exited */ - g_queue_foreach(&s->conn_list, colo_flush_packets, s); -diff --git a/qemu-img.c b/qemu-img.c -index 5a77f67719..7668f86769 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -960,7 +960,6 @@ static int img_commit(int argc, char **argv) - Error *local_err = NULL; - CommonBlockJobCBInfo cbi; - bool image_opts = false; -- AioContext *aio_context; - int64_t rate_limit = 0; - - fmt = NULL; -@@ -1078,12 +1077,9 @@ static int img_commit(int argc, char **argv) - .bs = bs, - }; - -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - commit_active_start("commit", bs, base_bs, JOB_DEFAULT, rate_limit, - BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb, - &cbi, false, &local_err); -- aio_context_release(aio_context); - if (local_err) { - goto done; - } -diff --git a/qemu-io.c b/qemu-io.c -index 050c70835f..6cb1e00385 100644 ---- a/qemu-io.c -+++ b/qemu-io.c -@@ -414,15 +414,7 @@ static void prep_fetchline(void *opaque) - - static int do_qemuio_command(const char *cmd) - { -- int ret; -- AioContext *ctx = -- qemuio_blk ? blk_get_aio_context(qemuio_blk) : qemu_get_aio_context(); -- -- aio_context_acquire(ctx); -- ret = qemuio_command(qemuio_blk, cmd); -- aio_context_release(ctx); -- -- return ret; -+ return qemuio_command(qemuio_blk, cmd); - } - - static int command_loop(void) -diff --git a/qemu-nbd.c b/qemu-nbd.c -index 186e6468b1..bac0b5e3ec 100644 ---- a/qemu-nbd.c -+++ b/qemu-nbd.c -@@ -1123,9 +1123,7 @@ int main(int argc, char **argv) - qdict_put_str(raw_opts, "file", bs->node_name); - qdict_put_int(raw_opts, "offset", dev_offset); - -- aio_context_acquire(qemu_get_aio_context()); - bs = bdrv_open(NULL, NULL, raw_opts, flags, &error_fatal); -- aio_context_release(qemu_get_aio_context()); - - blk_remove_bs(blk); - blk_insert_bs(blk, bs, &error_fatal); -diff --git a/replay/replay-debugging.c b/replay/replay-debugging.c -index 3e60549a4a..82c66fff26 100644 ---- a/replay/replay-debugging.c -+++ b/replay/replay-debugging.c -@@ -144,7 +144,6 @@ static char *replay_find_nearest_snapshot(int64_t icount, - char *ret = NULL; - int rv; - int nb_sns, i; -- AioContext *aio_context; - - *snapshot_icount = -1; - -@@ -152,11 +151,8 @@ static char *replay_find_nearest_snapshot(int64_t icount, - if (!bs) { - goto fail; - } -- aio_context = bdrv_get_aio_context(bs); - -- aio_context_acquire(aio_context); - nb_sns = bdrv_snapshot_list(bs, &sn_tab); -- aio_context_release(aio_context); - - for (i = 0; i < nb_sns; i++) { - rv = bdrv_all_has_snapshot(sn_tab[i].name, false, NULL, NULL); -diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py -index 38364fa557..c9c09fcacd 100644 ---- a/scripts/block-coroutine-wrapper.py -+++ b/scripts/block-coroutine-wrapper.py -@@ -278,12 +278,9 @@ def gen_no_co_wrapper(func: FuncDecl) -> str: - static void {name}_bh(void *opaque) - {{ - {struct_name} *s = opaque; -- AioContext *ctx = {func.gen_ctx('s->')}; - - {graph_lock} -- aio_context_acquire(ctx); - {func.get_result}{name}({ func.gen_list('s->{name}') }); -- aio_context_release(ctx); - {graph_unlock} - - aio_co_wake(s->co); -diff --git a/tests/tsan/suppressions.tsan b/tests/tsan/suppressions.tsan -index d9a002a2ef..b3ef59c27c 100644 ---- a/tests/tsan/suppressions.tsan -+++ b/tests/tsan/suppressions.tsan -@@ -4,7 +4,6 @@ - - # TSan reports a double lock on RECURSIVE mutexes. - # Since the recursive lock is intentional, we choose to ignore it. --mutex:aio_context_acquire - mutex:pthread_mutex_lock - - # TSan reports a race between pthread_mutex_init() and -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index d9754dfebc..17830a69c1 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -179,13 +179,7 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) - - static void do_drain_begin_unlocked(enum drain_type drain_type, BlockDriverState *bs) - { -- if (drain_type != BDRV_DRAIN_ALL) { -- aio_context_acquire(bdrv_get_aio_context(bs)); -- } - do_drain_begin(drain_type, bs); -- if (drain_type != BDRV_DRAIN_ALL) { -- aio_context_release(bdrv_get_aio_context(bs)); -- } - } - - static BlockBackend * no_coroutine_fn test_setup(void) -@@ -209,13 +203,7 @@ static BlockBackend * no_coroutine_fn test_setup(void) - - static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState *bs) - { -- if (drain_type != BDRV_DRAIN_ALL) { -- aio_context_acquire(bdrv_get_aio_context(bs)); -- } - do_drain_end(drain_type, bs); -- if (drain_type != BDRV_DRAIN_ALL) { -- aio_context_release(bdrv_get_aio_context(bs)); -- } - } - - /* -@@ -520,12 +508,8 @@ static void test_iothread_main_thread_bh(void *opaque) - { - struct test_iothread_data *data = opaque; - -- /* Test that the AioContext is not yet locked in a random BH that is -- * executed during drain, otherwise this would deadlock. */ -- aio_context_acquire(bdrv_get_aio_context(data->bs)); - bdrv_flush(data->bs); - bdrv_dec_in_flight(data->bs); /* incremented by test_iothread_common() */ -- aio_context_release(bdrv_get_aio_context(data->bs)); - } - - /* -@@ -567,7 +551,6 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) - blk_set_disable_request_queuing(blk, true); - - blk_set_aio_context(blk, ctx_a, &error_abort); -- aio_context_acquire(ctx_a); - - s->bh_indirection_ctx = ctx_b; - -@@ -582,8 +565,6 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) - g_assert(acb != NULL); - g_assert_cmpint(aio_ret, ==, -EINPROGRESS); - -- aio_context_release(ctx_a); -- - data = (struct test_iothread_data) { - .bs = bs, - .drain_type = drain_type, -@@ -592,10 +573,6 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) - - switch (drain_thread) { - case 0: -- if (drain_type != BDRV_DRAIN_ALL) { -- aio_context_acquire(ctx_a); -- } -- - /* - * Increment in_flight so that do_drain_begin() waits for - * test_iothread_main_thread_bh(). This prevents the race between -@@ -613,20 +590,10 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) - do_drain_begin(drain_type, bs); - g_assert_cmpint(bs->in_flight, ==, 0); - -- if (drain_type != BDRV_DRAIN_ALL) { -- aio_context_release(ctx_a); -- } - qemu_event_wait(&done_event); -- if (drain_type != BDRV_DRAIN_ALL) { -- aio_context_acquire(ctx_a); -- } - - g_assert_cmpint(aio_ret, ==, 0); - do_drain_end(drain_type, bs); -- -- if (drain_type != BDRV_DRAIN_ALL) { -- aio_context_release(ctx_a); -- } - break; - case 1: - co = qemu_coroutine_create(test_iothread_drain_co_entry, &data); -@@ -637,9 +604,7 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) - g_assert_not_reached(); - } - -- aio_context_acquire(ctx_a); - blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); -- aio_context_release(ctx_a); - - bdrv_unref(bs); - blk_unref(blk); -@@ -757,7 +722,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, - BlockJob *job; - TestBlockJob *tjob; - IOThread *iothread = NULL; -- AioContext *ctx; - int ret; - - src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR, -@@ -787,11 +751,11 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, - } - - if (use_iothread) { -+ AioContext *ctx; -+ - iothread = iothread_new(); - ctx = iothread_get_aio_context(iothread); - blk_set_aio_context(blk_src, ctx, &error_abort); -- } else { -- ctx = qemu_get_aio_context(); - } - - target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR, -@@ -800,7 +764,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, - blk_insert_bs(blk_target, target, &error_abort); - blk_set_allow_aio_context_change(blk_target, true); - -- aio_context_acquire(ctx); - tjob = block_job_create("job0", &test_job_driver, NULL, src, - 0, BLK_PERM_ALL, - 0, 0, NULL, NULL, &error_abort); -@@ -821,7 +784,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, - tjob->prepare_ret = -EIO; - break; - } -- aio_context_release(ctx); - - job_start(&job->job); - -@@ -912,12 +874,10 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, - } - g_assert_cmpint(ret, ==, (result == TEST_JOB_SUCCESS ? 0 : -EIO)); - -- aio_context_acquire(ctx); - if (use_iothread) { - blk_set_aio_context(blk_src, qemu_get_aio_context(), &error_abort); - assert(blk_get_aio_context(blk_target) == qemu_get_aio_context()); - } -- aio_context_release(ctx); - - blk_unref(blk_src); - blk_unref(blk_target); -@@ -1401,9 +1361,7 @@ static void test_append_to_drained(void) - g_assert_cmpint(base_s->drain_count, ==, 1); - g_assert_cmpint(base->in_flight, ==, 0); - -- aio_context_acquire(qemu_get_aio_context()); - bdrv_append(overlay, base, &error_abort); -- aio_context_release(qemu_get_aio_context()); - - g_assert_cmpint(base->in_flight, ==, 0); - g_assert_cmpint(overlay->in_flight, ==, 0); -@@ -1438,16 +1396,11 @@ static void test_set_aio_context(void) - - bdrv_drained_begin(bs); - bdrv_try_change_aio_context(bs, ctx_a, NULL, &error_abort); -- -- aio_context_acquire(ctx_a); - bdrv_drained_end(bs); - - bdrv_drained_begin(bs); - bdrv_try_change_aio_context(bs, ctx_b, NULL, &error_abort); -- aio_context_release(ctx_a); -- aio_context_acquire(ctx_b); - bdrv_try_change_aio_context(bs, qemu_get_aio_context(), NULL, &error_abort); -- aio_context_release(ctx_b); - bdrv_drained_end(bs); - - bdrv_unref(bs); -diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c -index 8ee6ef38d8..cafc023db4 100644 ---- a/tests/unit/test-bdrv-graph-mod.c -+++ b/tests/unit/test-bdrv-graph-mod.c -@@ -142,10 +142,8 @@ static void test_update_perm_tree(void) - BDRV_CHILD_DATA, &error_abort); - bdrv_graph_wrunlock(); - -- aio_context_acquire(qemu_get_aio_context()); - ret = bdrv_append(filter, bs, NULL); - g_assert_cmpint(ret, <, 0); -- aio_context_release(qemu_get_aio_context()); - - bdrv_unref(filter); - blk_unref(root); -@@ -211,9 +209,7 @@ static void test_should_update_child(void) - bdrv_attach_child(filter, target, "target", &child_of_bds, - BDRV_CHILD_DATA, &error_abort); - bdrv_graph_wrunlock(); -- aio_context_acquire(qemu_get_aio_context()); - bdrv_append(filter, bs, &error_abort); -- aio_context_release(qemu_get_aio_context()); - - bdrv_graph_rdlock_main_loop(); - g_assert(target->backing->bs == bs); -@@ -440,9 +436,7 @@ static void test_append_greedy_filter(void) - &error_abort); - bdrv_graph_wrunlock(); - -- aio_context_acquire(qemu_get_aio_context()); - bdrv_append(fl, base, &error_abort); -- aio_context_release(qemu_get_aio_context()); - bdrv_unref(fl); - bdrv_unref(top); - } -diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c -index 9b15d2768c..3766d5de6b 100644 ---- a/tests/unit/test-block-iothread.c -+++ b/tests/unit/test-block-iothread.c -@@ -483,7 +483,6 @@ static void test_sync_op(const void *opaque) - bdrv_graph_rdunlock_main_loop(); - - blk_set_aio_context(blk, ctx, &error_abort); -- aio_context_acquire(ctx); - if (t->fn) { - t->fn(c); - } -@@ -491,7 +490,6 @@ static void test_sync_op(const void *opaque) - t->blkfn(blk); - } - blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); -- aio_context_release(ctx); - - bdrv_unref(bs); - blk_unref(blk); -@@ -576,9 +574,7 @@ static void test_attach_blockjob(void) - aio_poll(qemu_get_aio_context(), false); - } - -- aio_context_acquire(ctx); - blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); -- aio_context_release(ctx); - - tjob->n = 0; - while (tjob->n == 0) { -@@ -595,9 +591,7 @@ static void test_attach_blockjob(void) - WITH_JOB_LOCK_GUARD() { - job_complete_sync_locked(&tjob->common.job, &error_abort); - } -- aio_context_acquire(ctx); - blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); -- aio_context_release(ctx); - - bdrv_unref(bs); - blk_unref(blk); -@@ -654,9 +648,7 @@ static void test_propagate_basic(void) - - /* Switch the AioContext back */ - main_ctx = qemu_get_aio_context(); -- aio_context_acquire(ctx); - blk_set_aio_context(blk, main_ctx, &error_abort); -- aio_context_release(ctx); - g_assert(blk_get_aio_context(blk) == main_ctx); - g_assert(bdrv_get_aio_context(bs_a) == main_ctx); - g_assert(bdrv_get_aio_context(bs_verify) == main_ctx); -@@ -732,9 +724,7 @@ static void test_propagate_diamond(void) - - /* Switch the AioContext back */ - main_ctx = qemu_get_aio_context(); -- aio_context_acquire(ctx); - blk_set_aio_context(blk, main_ctx, &error_abort); -- aio_context_release(ctx); - g_assert(blk_get_aio_context(blk) == main_ctx); - g_assert(bdrv_get_aio_context(bs_verify) == main_ctx); - g_assert(bdrv_get_aio_context(bs_a) == main_ctx); -@@ -764,13 +754,11 @@ static void test_propagate_mirror(void) - &error_abort); - - /* Start a mirror job */ -- aio_context_acquire(main_ctx); - mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0, - MIRROR_SYNC_MODE_NONE, MIRROR_OPEN_BACKING_CHAIN, false, - BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT, - false, "filter_node", MIRROR_COPY_MODE_BACKGROUND, - &error_abort); -- aio_context_release(main_ctx); - - WITH_JOB_LOCK_GUARD() { - job = job_get_locked("job0"); -@@ -785,9 +773,7 @@ static void test_propagate_mirror(void) - g_assert(job->aio_context == ctx); - - /* Change the AioContext of target */ -- aio_context_acquire(ctx); - bdrv_try_change_aio_context(target, main_ctx, NULL, &error_abort); -- aio_context_release(ctx); - g_assert(bdrv_get_aio_context(src) == main_ctx); - g_assert(bdrv_get_aio_context(target) == main_ctx); - g_assert(bdrv_get_aio_context(filter) == main_ctx); -@@ -805,10 +791,8 @@ static void test_propagate_mirror(void) - g_assert(bdrv_get_aio_context(filter) == main_ctx); - - /* ...unless we explicitly allow it */ -- aio_context_acquire(ctx); - blk_set_allow_aio_context_change(blk, true); - bdrv_try_change_aio_context(target, ctx, NULL, &error_abort); -- aio_context_release(ctx); - - g_assert(blk_get_aio_context(blk) == ctx); - g_assert(bdrv_get_aio_context(src) == ctx); -@@ -817,10 +801,8 @@ static void test_propagate_mirror(void) - - job_cancel_sync_all(); - -- aio_context_acquire(ctx); - blk_set_aio_context(blk, main_ctx, &error_abort); - bdrv_try_change_aio_context(target, main_ctx, NULL, &error_abort); -- aio_context_release(ctx); - - blk_unref(blk); - bdrv_unref(src); -@@ -836,7 +818,6 @@ static void test_attach_second_node(void) - BlockDriverState *bs, *filter; - QDict *options; - -- aio_context_acquire(main_ctx); - blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL); - bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); - blk_insert_bs(blk, bs, &error_abort); -@@ -846,15 +827,12 @@ static void test_attach_second_node(void) - qdict_put_str(options, "file", "base"); - - filter = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort); -- aio_context_release(main_ctx); - - g_assert(blk_get_aio_context(blk) == ctx); - g_assert(bdrv_get_aio_context(bs) == ctx); - g_assert(bdrv_get_aio_context(filter) == ctx); - -- aio_context_acquire(ctx); - blk_set_aio_context(blk, main_ctx, &error_abort); -- aio_context_release(ctx); - g_assert(blk_get_aio_context(blk) == main_ctx); - g_assert(bdrv_get_aio_context(bs) == main_ctx); - g_assert(bdrv_get_aio_context(filter) == main_ctx); -@@ -868,11 +846,9 @@ static void test_attach_preserve_blk_ctx(void) - { - IOThread *iothread = iothread_new(); - AioContext *ctx = iothread_get_aio_context(iothread); -- AioContext *main_ctx = qemu_get_aio_context(); - BlockBackend *blk; - BlockDriverState *bs; - -- aio_context_acquire(main_ctx); - blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL); - bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); - bs->total_sectors = 65536 / BDRV_SECTOR_SIZE; -@@ -881,25 +857,18 @@ static void test_attach_preserve_blk_ctx(void) - blk_insert_bs(blk, bs, &error_abort); - g_assert(blk_get_aio_context(blk) == ctx); - g_assert(bdrv_get_aio_context(bs) == ctx); -- aio_context_release(main_ctx); - - /* Remove the node again */ -- aio_context_acquire(ctx); - blk_remove_bs(blk); -- aio_context_release(ctx); - g_assert(blk_get_aio_context(blk) == ctx); - g_assert(bdrv_get_aio_context(bs) == qemu_get_aio_context()); - - /* Re-attach the node */ -- aio_context_acquire(main_ctx); - blk_insert_bs(blk, bs, &error_abort); -- aio_context_release(main_ctx); - g_assert(blk_get_aio_context(blk) == ctx); - g_assert(bdrv_get_aio_context(bs) == ctx); - -- aio_context_acquire(ctx); - blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); -- aio_context_release(ctx); - bdrv_unref(bs); - blk_unref(blk); - } -diff --git a/tests/unit/test-blockjob.c b/tests/unit/test-blockjob.c -index a130f6fefb..fe3e0d2d38 100644 ---- a/tests/unit/test-blockjob.c -+++ b/tests/unit/test-blockjob.c -@@ -228,7 +228,6 @@ static void cancel_common(CancelJob *s) - BlockJob *job = &s->common; - BlockBackend *blk = s->blk; - JobStatus sts = job->job.status; -- AioContext *ctx = job->job.aio_context; - - job_cancel_sync(&job->job, true); - WITH_JOB_LOCK_GUARD() { -@@ -240,9 +239,7 @@ static void cancel_common(CancelJob *s) - job_unref_locked(&job->job); - } - -- aio_context_acquire(ctx); - destroy_blk(blk); -- aio_context_release(ctx); - - } - -@@ -391,132 +388,6 @@ static void test_cancel_concluded(void) - cancel_common(s); - } - --/* (See test_yielding_driver for the job description) */ --typedef struct YieldingJob { -- BlockJob common; -- bool should_complete; --} YieldingJob; -- --static void yielding_job_complete(Job *job, Error **errp) --{ -- YieldingJob *s = container_of(job, YieldingJob, common.job); -- s->should_complete = true; -- job_enter(job); --} -- --static int coroutine_fn yielding_job_run(Job *job, Error **errp) --{ -- YieldingJob *s = container_of(job, YieldingJob, common.job); -- -- job_transition_to_ready(job); -- -- while (!s->should_complete) { -- job_yield(job); -- } -- -- return 0; --} -- --/* -- * This job transitions immediately to the READY state, and then -- * yields until it is to complete. -- */ --static const BlockJobDriver test_yielding_driver = { -- .job_driver = { -- .instance_size = sizeof(YieldingJob), -- .free = block_job_free, -- .user_resume = block_job_user_resume, -- .run = yielding_job_run, -- .complete = yielding_job_complete, -- }, --}; -- --/* -- * Test that job_complete_locked() works even on jobs that are in a paused -- * state (i.e., STANDBY). -- * -- * To do this, run YieldingJob in an IO thread, get it into the READY -- * state, then have a drained section. Before ending the section, -- * acquire the context so the job will not be entered and will thus -- * remain on STANDBY. -- * -- * job_complete_locked() should still work without error. -- * -- * Note that on the QMP interface, it is impossible to lock an IO -- * thread before a drained section ends. In practice, the -- * bdrv_drain_all_end() and the aio_context_acquire() will be -- * reversed. However, that makes for worse reproducibility here: -- * Sometimes, the job would no longer be in STANDBY then but already -- * be started. We cannot prevent that, because the IO thread runs -- * concurrently. We can only prevent it by taking the lock before -- * ending the drained section, so we do that. -- * -- * (You can reverse the order of operations and most of the time the -- * test will pass, but sometimes the assert(status == STANDBY) will -- * fail.) -- */ --static void test_complete_in_standby(void) --{ -- BlockBackend *blk; -- IOThread *iothread; -- AioContext *ctx; -- Job *job; -- BlockJob *bjob; -- -- /* Create a test drive, move it to an IO thread */ -- blk = create_blk(NULL); -- iothread = iothread_new(); -- -- ctx = iothread_get_aio_context(iothread); -- blk_set_aio_context(blk, ctx, &error_abort); -- -- /* Create our test job */ -- bjob = mk_job(blk, "job", &test_yielding_driver, true, -- JOB_MANUAL_FINALIZE | JOB_MANUAL_DISMISS); -- job = &bjob->job; -- assert_job_status_is(job, JOB_STATUS_CREATED); -- -- /* Wait for the job to become READY */ -- job_start(job); -- /* -- * Here we are waiting for the status to change, so don't bother -- * protecting the read every time. -- */ -- AIO_WAIT_WHILE_UNLOCKED(ctx, job->status != JOB_STATUS_READY); -- -- /* Begin the drained section, pausing the job */ -- bdrv_drain_all_begin(); -- assert_job_status_is(job, JOB_STATUS_STANDBY); -- -- /* Lock the IO thread to prevent the job from being run */ -- aio_context_acquire(ctx); -- /* This will schedule the job to resume it */ -- bdrv_drain_all_end(); -- aio_context_release(ctx); -- -- WITH_JOB_LOCK_GUARD() { -- /* But the job cannot run, so it will remain on standby */ -- assert(job->status == JOB_STATUS_STANDBY); -- -- /* Even though the job is on standby, this should work */ -- job_complete_locked(job, &error_abort); -- -- /* The test is done now, clean up. */ -- job_finish_sync_locked(job, NULL, &error_abort); -- assert(job->status == JOB_STATUS_PENDING); -- -- job_finalize_locked(job, &error_abort); -- assert(job->status == JOB_STATUS_CONCLUDED); -- -- job_dismiss_locked(&job, &error_abort); -- } -- -- aio_context_acquire(ctx); -- destroy_blk(blk); -- aio_context_release(ctx); -- iothread_join(iothread); --} -- - int main(int argc, char **argv) - { - qemu_init_main_loop(&error_abort); -@@ -531,13 +402,5 @@ int main(int argc, char **argv) - g_test_add_func("/blockjob/cancel/standby", test_cancel_standby); - g_test_add_func("/blockjob/cancel/pending", test_cancel_pending); - g_test_add_func("/blockjob/cancel/concluded", test_cancel_concluded); -- -- /* -- * This test is flaky and sometimes fails in CI and otherwise: -- * don't run unless user opts in via environment variable. -- */ -- if (getenv("QEMU_TEST_FLAKY_TESTS")) { -- g_test_add_func("/blockjob/complete_in_standby", test_complete_in_standby); -- } - return g_test_run(); - } -diff --git a/tests/unit/test-replication.c b/tests/unit/test-replication.c -index afff908d77..5d2003b8ce 100644 ---- a/tests/unit/test-replication.c -+++ b/tests/unit/test-replication.c -@@ -199,17 +199,13 @@ static BlockBackend *start_primary(void) - static void teardown_primary(void) - { - BlockBackend *blk; -- AioContext *ctx; - - /* remove P_ID */ - blk = blk_by_name(P_ID); - assert(blk); - -- ctx = blk_get_aio_context(blk); -- aio_context_acquire(ctx); - monitor_remove_blk(blk); - blk_unref(blk); -- aio_context_release(ctx); - } - - static void test_primary_read(void) -@@ -345,27 +341,20 @@ static void teardown_secondary(void) - { - /* only need to destroy two BBs */ - BlockBackend *blk; -- AioContext *ctx; - - /* remove S_LOCAL_DISK_ID */ - blk = blk_by_name(S_LOCAL_DISK_ID); - assert(blk); - -- ctx = blk_get_aio_context(blk); -- aio_context_acquire(ctx); - monitor_remove_blk(blk); - blk_unref(blk); -- aio_context_release(ctx); - - /* remove S_ID */ - blk = blk_by_name(S_ID); - assert(blk); - -- ctx = blk_get_aio_context(blk); -- aio_context_acquire(ctx); - monitor_remove_blk(blk); - blk_unref(blk); -- aio_context_release(ctx); - } - - static void test_secondary_read(void) -diff --git a/util/async.c b/util/async.c -index 04ee83d220..dfd44ef612 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -562,12 +562,10 @@ static void co_schedule_bh_cb(void *opaque) - Coroutine *co = QSLIST_FIRST(&straight); - QSLIST_REMOVE_HEAD(&straight, co_scheduled_next); - trace_aio_co_schedule_bh_cb(ctx, co); -- aio_context_acquire(ctx); - - /* Protected by write barrier in qemu_aio_coroutine_enter */ - qatomic_set(&co->scheduled, NULL); - qemu_aio_coroutine_enter(ctx, co); -- aio_context_release(ctx); - } - } - -@@ -707,9 +705,7 @@ void aio_co_enter(AioContext *ctx, Coroutine *co) - assert(self != co); - QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next); - } else { -- aio_context_acquire(ctx); - qemu_aio_coroutine_enter(ctx, co); -- aio_context_release(ctx); - } - } - -diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c -index a9a48fffb8..3bfb1ad3ec 100644 ---- a/util/vhost-user-server.c -+++ b/util/vhost-user-server.c -@@ -360,10 +360,7 @@ static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc, - - qio_channel_set_follow_coroutine_ctx(server->ioc, true); - -- /* Attaching the AioContext starts the vu_client_trip coroutine */ -- aio_context_acquire(server->ctx); - vhost_user_server_attach_aio_context(server, server->ctx); -- aio_context_release(server->ctx); - } - - /* server->ctx acquired by caller */ --- -2.39.3 - diff --git a/SOURCES/kvm-block-remove-bdrv_co_lock.patch b/SOURCES/kvm-block-remove-bdrv_co_lock.patch deleted file mode 100644 index b219c1c..0000000 --- a/SOURCES/kvm-block-remove-bdrv_co_lock.patch +++ /dev/null @@ -1,97 +0,0 @@ -From d0514c7d5d6cc1aa140119c95d5ea2c1591b01e9 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:04 -0500 -Subject: [PATCH 087/101] block: remove bdrv_co_lock() - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [18/26] a303f861ea5e84d8e89fd51e530fd0cb2da17b89 (kmwolf/centos-qemu-kvm) - -The bdrv_co_lock() and bdrv_co_unlock() functions are already no-ops. -Remove them. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20231205182011.1976568-8-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - block.c | 10 ---------- - blockdev.c | 5 ----- - include/block/block-global-state.h | 14 -------------- - 3 files changed, 29 deletions(-) - -diff --git a/block.c b/block.c -index 91ace5d2d5..434b7f4d72 100644 ---- a/block.c -+++ b/block.c -@@ -7431,16 +7431,6 @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx) - bdrv_dec_in_flight(bs); - } - --void coroutine_fn bdrv_co_lock(BlockDriverState *bs) --{ -- /* TODO removed in next patch */ --} -- --void coroutine_fn bdrv_co_unlock(BlockDriverState *bs) --{ -- /* TODO removed in next patch */ --} -- - static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) - { - GLOBAL_STATE_CODE(); -diff --git a/blockdev.c b/blockdev.c -index 5d8b3a23eb..3a5e7222ec 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -2264,18 +2264,13 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name, - return; - } - -- bdrv_co_lock(bs); - bdrv_drained_begin(bs); -- bdrv_co_unlock(bs); - - old_ctx = bdrv_co_enter(bs); - blk_co_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp); - bdrv_co_leave(bs, old_ctx); - -- bdrv_co_lock(bs); - bdrv_drained_end(bs); -- bdrv_co_unlock(bs); -- - blk_co_unref(blk); - } - -diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h -index 0327f1c605..4ec0b217f0 100644 ---- a/include/block/block-global-state.h -+++ b/include/block/block-global-state.h -@@ -267,20 +267,6 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag); - int bdrv_debug_resume(BlockDriverState *bs, const char *tag); - bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); - --/** -- * Locks the AioContext of @bs if it's not the current AioContext. This avoids -- * double locking which could lead to deadlocks: This is a coroutine_fn, so we -- * know we already own the lock of the current AioContext. -- * -- * May only be called in the main thread. -- */ --void coroutine_fn bdrv_co_lock(BlockDriverState *bs); -- --/** -- * Unlocks the AioContext of @bs if it's not the current AioContext. -- */ --void coroutine_fn bdrv_co_unlock(BlockDriverState *bs); -- - bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp); --- -2.39.3 - diff --git a/SOURCES/kvm-block-remove-outdated-AioContext-locking-comments.patch b/SOURCES/kvm-block-remove-outdated-AioContext-locking-comments.patch deleted file mode 100644 index d6670c1..0000000 --- a/SOURCES/kvm-block-remove-outdated-AioContext-locking-comments.patch +++ /dev/null @@ -1,411 +0,0 @@ -From dc4eb64185957a01948217814478abc450ce5f26 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:11 -0500 -Subject: [PATCH 094/101] block: remove outdated AioContext locking comments - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [25/26] 395e18fb40d28d4bc961acee1a00da7f60748076 (kmwolf/centos-qemu-kvm) - -The AioContext lock no longer exists. - -There is one noteworthy change: - - - * More specifically, these functions use BDRV_POLL_WHILE(bs), which - - * requires the caller to be either in the main thread and hold - - * the BlockdriverState (bs) AioContext lock, or directly in the - - * home thread that runs the bs AioContext. Calling them from - - * another thread in another AioContext would cause deadlocks. - + * More specifically, these functions use BDRV_POLL_WHILE(bs), which requires - + * the caller to be either in the main thread or directly in the home thread - + * that runs the bs AioContext. Calling them from another thread in another - + * AioContext would cause deadlocks. - -I am not sure whether deadlocks are still possible. Maybe they have just -moved to the fine-grained locks that have replaced the AioContext. Since -I am not sure if the deadlocks are gone, I have kept the substance -unchanged and just removed mention of the AioContext. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Message-ID: <20231205182011.1976568-15-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - block.c | 73 ++++++---------------------- - block/block-backend.c | 8 --- - block/export/vhost-user-blk-server.c | 4 -- - include/block/block-common.h | 3 -- - include/block/block-io.h | 9 ++-- - include/block/block_int-common.h | 2 - - tests/qemu-iotests/202 | 2 +- - tests/qemu-iotests/203 | 3 +- - 8 files changed, 22 insertions(+), 82 deletions(-) - -diff --git a/block.c b/block.c -index 434b7f4d72..a097772238 100644 ---- a/block.c -+++ b/block.c -@@ -1616,11 +1616,6 @@ out: - g_free(gen_node_name); - } - --/* -- * The caller must always hold @bs AioContext lock, because this function calls -- * bdrv_refresh_total_sectors() which polls when called from non-coroutine -- * context. -- */ - static int no_coroutine_fn GRAPH_UNLOCKED - bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, - QDict *options, int open_flags, Error **errp) -@@ -2901,7 +2896,7 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) - * Replaces the node that a BdrvChild points to without updating permissions. - * - * If @new_bs is non-NULL, the parent of @child must already be drained through -- * @child and the caller must hold the AioContext lock for @new_bs. -+ * @child. - */ - static void GRAPH_WRLOCK - bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs) -@@ -3041,9 +3036,8 @@ static TransactionActionDrv bdrv_attach_child_common_drv = { - * - * Returns new created child. - * -- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and -- * @child_bs can move to a different AioContext in this function. Callers must -- * make sure that their AioContext locking is still correct after this. -+ * Both @parent_bs and @child_bs can move to a different AioContext in this -+ * function. - */ - static BdrvChild * GRAPH_WRLOCK - bdrv_attach_child_common(BlockDriverState *child_bs, -@@ -3142,9 +3136,8 @@ bdrv_attach_child_common(BlockDriverState *child_bs, - /* - * Function doesn't update permissions, caller is responsible for this. - * -- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and -- * @child_bs can move to a different AioContext in this function. Callers must -- * make sure that their AioContext locking is still correct after this. -+ * Both @parent_bs and @child_bs can move to a different AioContext in this -+ * function. - * - * After calling this function, the transaction @tran may only be completed - * while holding a writer lock for the graph. -@@ -3184,9 +3177,6 @@ bdrv_attach_child_noperm(BlockDriverState *parent_bs, - * - * On failure NULL is returned, errp is set and the reference to - * child_bs is also dropped. -- * -- * The caller must hold the AioContext lock @child_bs, but not that of @ctx -- * (unless @child_bs is already in @ctx). - */ - BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, - const char *child_name, -@@ -3226,9 +3216,6 @@ out: - * - * On failure NULL is returned, errp is set and the reference to - * child_bs is also dropped. -- * -- * If @parent_bs and @child_bs are in different AioContexts, the caller must -- * hold the AioContext lock for @child_bs, but not for @parent_bs. - */ - BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, - BlockDriverState *child_bs, -@@ -3418,9 +3405,8 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs) - * - * Function doesn't update permissions, caller is responsible for this. - * -- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and -- * @child_bs can move to a different AioContext in this function. Callers must -- * make sure that their AioContext locking is still correct after this. -+ * Both @parent_bs and @child_bs can move to a different AioContext in this -+ * function. - * - * After calling this function, the transaction @tran may only be completed - * while holding a writer lock for the graph. -@@ -3513,9 +3499,8 @@ out: - } - - /* -- * The caller must hold the AioContext lock for @backing_hd. Both @bs and -- * @backing_hd can move to a different AioContext in this function. Callers must -- * make sure that their AioContext locking is still correct after this. -+ * Both @bs and @backing_hd can move to a different AioContext in this -+ * function. - * - * If a backing child is already present (i.e. we're detaching a node), that - * child node must be drained. -@@ -3574,8 +3559,6 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, - * itself, all options starting with "${bdref_key}." are considered part of the - * BlockdevRef. - * -- * The caller must hold the main AioContext lock. -- * - * TODO Can this be unified with bdrv_open_image()? - */ - int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, -@@ -3745,9 +3728,7 @@ done: - * - * The BlockdevRef will be removed from the options QDict. - * -- * The caller must hold the lock of the main AioContext and no other AioContext. -- * @parent can move to a different AioContext in this function. Callers must -- * make sure that their AioContext locking is still correct after this. -+ * @parent can move to a different AioContext in this function. - */ - BdrvChild *bdrv_open_child(const char *filename, - QDict *options, const char *bdref_key, -@@ -3778,9 +3759,7 @@ BdrvChild *bdrv_open_child(const char *filename, - /* - * Wrapper on bdrv_open_child() for most popular case: open primary child of bs. - * -- * The caller must hold the lock of the main AioContext and no other AioContext. -- * @parent can move to a different AioContext in this function. Callers must -- * make sure that their AioContext locking is still correct after this. -+ * @parent can move to a different AioContext in this function. - */ - int bdrv_open_file_child(const char *filename, - QDict *options, const char *bdref_key, -@@ -3923,8 +3902,6 @@ out: - * The reference parameter may be used to specify an existing block device which - * should be opened. If specified, neither options nor a filename may be given, - * nor can an existing BDS be reused (that is, *pbs has to be NULL). -- * -- * The caller must always hold the main AioContext lock. - */ - static BlockDriverState * no_coroutine_fn - bdrv_open_inherit(const char *filename, const char *reference, QDict *options, -@@ -4217,7 +4194,6 @@ close_and_fail: - return NULL; - } - --/* The caller must always hold the main AioContext lock. */ - BlockDriverState *bdrv_open(const char *filename, const char *reference, - QDict *options, int flags, Error **errp) - { -@@ -4665,10 +4641,7 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, - * - * Return 0 on success, otherwise return < 0 and set @errp. - * -- * The caller must hold the AioContext lock of @reopen_state->bs. - * @reopen_state->bs can move to a different AioContext in this function. -- * Callers must make sure that their AioContext locking is still correct after -- * this. - */ - static int GRAPH_UNLOCKED - bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, -@@ -4801,8 +4774,6 @@ out_rdlock: - * It is the responsibility of the caller to then call the abort() or - * commit() for any other BDS that have been left in a prepare() state - * -- * The caller must hold the AioContext lock of @reopen_state->bs. -- * - * After calling this function, the transaction @change_child_tran may only be - * completed while holding a writer lock for the graph. - */ -@@ -5437,8 +5408,6 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp) - * child. - * - * This function does not create any image files. -- * -- * The caller must hold the AioContext lock for @bs_top. - */ - int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - Error **errp) -@@ -5545,9 +5514,8 @@ static void bdrv_delete(BlockDriverState *bs) - * after the call (even on failure), so if the caller intends to reuse the - * dictionary, it needs to use qobject_ref() before calling bdrv_open. - * -- * The caller holds the AioContext lock for @bs. It must make sure that @bs -- * stays in the same AioContext, i.e. @options must not refer to nodes in a -- * different AioContext. -+ * The caller must make sure that @bs stays in the same AioContext, i.e. -+ * @options must not refer to nodes in a different AioContext. - */ - BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, - int flags, Error **errp) -@@ -7565,10 +7533,6 @@ static TransactionActionDrv set_aio_context = { - * - * Must be called from the main AioContext. - * -- * The caller must own the AioContext lock for the old AioContext of bs, but it -- * must not own the AioContext lock for new_context (unless new_context is the -- * same as the current context of bs). -- * - * @visited will accumulate all visited BdrvChild objects. The caller is - * responsible for freeing the list afterwards. - */ -@@ -7621,13 +7585,6 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, - * - * If ignore_child is not NULL, that child (and its subgraph) will not - * be touched. -- * -- * This function still requires the caller to take the bs current -- * AioContext lock, otherwise draining will fail since AIO_WAIT_WHILE -- * assumes the lock is always held if bs is in another AioContext. -- * For the same reason, it temporarily also holds the new AioContext, since -- * bdrv_drained_end calls BDRV_POLL_WHILE that assumes the lock is taken too. -- * Therefore the new AioContext lock must not be taken by the caller. - */ - int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, - BdrvChild *ignore_child, Error **errp) -@@ -7653,8 +7610,8 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, - - /* - * Linear phase: go through all callbacks collected in the transaction. -- * Run all callbacks collected in the recursion to switch all nodes -- * AioContext lock (transaction commit), or undo all changes done in the -+ * Run all callbacks collected in the recursion to switch every node's -+ * AioContext (transaction commit), or undo all changes done in the - * recursion (transaction abort). - */ - -diff --git a/block/block-backend.c b/block/block-backend.c -index f412bed274..209eb07528 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -390,8 +390,6 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) - * Both sets of permissions can be changed later using blk_set_perm(). - * - * Return the new BlockBackend on success, null on failure. -- * -- * Callers must hold the AioContext lock of @bs. - */ - BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, - uint64_t shared_perm, Error **errp) -@@ -416,8 +414,6 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, - * Just as with bdrv_open(), after having called this function the reference to - * @options belongs to the block layer (even on failure). - * -- * Called without holding an AioContext lock. -- * - * TODO: Remove @filename and @flags; it should be possible to specify a whole - * BDS tree just by specifying the @options QDict (or @reference, - * alternatively). At the time of adding this function, this is not possible, -@@ -872,8 +868,6 @@ BlockBackend *blk_by_public(BlockBackendPublic *public) - - /* - * Disassociates the currently associated BlockDriverState from @blk. -- * -- * The caller must hold the AioContext lock for the BlockBackend. - */ - void blk_remove_bs(BlockBackend *blk) - { -@@ -915,8 +909,6 @@ void blk_remove_bs(BlockBackend *blk) - - /* - * Associates a new BlockDriverState with @blk. -- * -- * Callers must hold the AioContext lock of @bs. - */ - int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) - { -diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c -index 16f48388d3..50c358e8cd 100644 ---- a/block/export/vhost-user-blk-server.c -+++ b/block/export/vhost-user-blk-server.c -@@ -278,7 +278,6 @@ static void vu_blk_exp_resize(void *opaque) - vu_config_change_msg(&vexp->vu_server.vu_dev); - } - --/* Called with vexp->export.ctx acquired */ - static void vu_blk_drained_begin(void *opaque) - { - VuBlkExport *vexp = opaque; -@@ -287,7 +286,6 @@ static void vu_blk_drained_begin(void *opaque) - vhost_user_server_detach_aio_context(&vexp->vu_server); - } - --/* Called with vexp->export.blk AioContext acquired */ - static void vu_blk_drained_end(void *opaque) - { - VuBlkExport *vexp = opaque; -@@ -300,8 +298,6 @@ static void vu_blk_drained_end(void *opaque) - * Ensures that bdrv_drained_begin() waits until in-flight requests complete - * and the server->co_trip coroutine has terminated. It will be restarted in - * vhost_user_server_attach_aio_context(). -- * -- * Called with vexp->export.ctx acquired. - */ - static bool vu_blk_drained_poll(void *opaque) - { -diff --git a/include/block/block-common.h b/include/block/block-common.h -index d7599564db..a846023a09 100644 ---- a/include/block/block-common.h -+++ b/include/block/block-common.h -@@ -70,9 +70,6 @@ - * automatically takes the graph rdlock when calling the wrapped function. In - * the same way, no_co_wrapper_bdrv_wrlock functions automatically take the - * graph wrlock. -- * -- * If the first parameter of the function is a BlockDriverState, BdrvChild or -- * BlockBackend pointer, the AioContext lock for it is taken in the wrapper. - */ - #define no_co_wrapper - #define no_co_wrapper_bdrv_rdlock -diff --git a/include/block/block-io.h b/include/block/block-io.h -index 8eb39a858b..b49e0537dd 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -332,11 +332,10 @@ bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, - * "I/O or GS" API functions. These functions can run without - * the BQL, but only in one specific iothread/main loop. - * -- * More specifically, these functions use BDRV_POLL_WHILE(bs), which -- * requires the caller to be either in the main thread and hold -- * the BlockdriverState (bs) AioContext lock, or directly in the -- * home thread that runs the bs AioContext. Calling them from -- * another thread in another AioContext would cause deadlocks. -+ * More specifically, these functions use BDRV_POLL_WHILE(bs), which requires -+ * the caller to be either in the main thread or directly in the home thread -+ * that runs the bs AioContext. Calling them from another thread in another -+ * AioContext would cause deadlocks. - * - * Therefore, these functions are not proper I/O, because they - * can't run in *any* iothreads, but only in a specific one. -diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h -index 4e31d161c5..151279d481 100644 ---- a/include/block/block_int-common.h -+++ b/include/block/block_int-common.h -@@ -1192,8 +1192,6 @@ struct BlockDriverState { - /* The error object in use for blocking operations on backing_hd */ - Error *backing_blocker; - -- /* Protected by AioContext lock */ -- - /* - * If we are reading a disk image, give its size in sectors. - * Generally read-only; it is written to by load_snapshot and -diff --git a/tests/qemu-iotests/202 b/tests/qemu-iotests/202 -index b784dcd791..13304242e5 100755 ---- a/tests/qemu-iotests/202 -+++ b/tests/qemu-iotests/202 -@@ -21,7 +21,7 @@ - # Check that QMP 'transaction' blockdev-snapshot-sync with multiple drives on a - # single IOThread completes successfully. This particular command triggered a - # hang due to recursive AioContext locking and BDRV_POLL_WHILE(). Protect --# against regressions. -+# against regressions even though the AioContext lock no longer exists. - - import iotests - -diff --git a/tests/qemu-iotests/203 b/tests/qemu-iotests/203 -index ab80fd0e44..1ba878522b 100755 ---- a/tests/qemu-iotests/203 -+++ b/tests/qemu-iotests/203 -@@ -21,7 +21,8 @@ - # Check that QMP 'migrate' with multiple drives on a single IOThread completes - # successfully. This particular command triggered a hang in the source QEMU - # process due to recursive AioContext locking in bdrv_invalidate_all() and --# BDRV_POLL_WHILE(). -+# BDRV_POLL_WHILE(). Protect against regressions even though the AioContext -+# lock no longer exists. - - import iotests - --- -2.39.3 - diff --git a/SOURCES/kvm-chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch b/SOURCES/kvm-chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch deleted file mode 100644 index 34c4e8f..0000000 --- a/SOURCES/kvm-chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 95b2ffc5f01dc4309c2e747ed883d22cd1d26347 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Sat, 2 Mar 2024 17:00:23 +0100 -Subject: [PATCH 2/2] chardev/char-socket: Fix TLS io channels sending too much - data to the backend -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 227: Fix TLS io channels sending too much data to the backend -RH-Jira: RHEL-24614 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Daniel P. Berrangé -RH-Commit: [1/1] fce871914e0ce52e16a6edae0e007513f9fec1ae (thuth/qemu-kvm-cs9) - -JIRA: https://issues.redhat.com/browse/RHEL-24614 - -commit 462945cd22d2bcd233401ed3aa167d83a8e35b05 -Author: Thomas Huth -Date: Thu Feb 29 11:43:37 2024 +0100 - - chardev/char-socket: Fix TLS io channels sending too much data to the backend - - Commit ffda5db65a ("io/channel-tls: fix handling of bigger read buffers") - changed the behavior of the TLS io channels to schedule a second reading - attempt if there is still incoming data pending. This caused a regression - with backends like the sclpconsole that check in their read function that - the sender does not try to write more bytes to it than the device can - currently handle. - - The problem can be reproduced like this: - - 1) In one terminal, do this: - - mkdir qemu-pki - cd qemu-pki - openssl genrsa 2048 > ca-key.pem - openssl req -new -x509 -nodes -days 365000 -key ca-key.pem -out ca-cert.pem - # enter some dummy value for the cert - openssl genrsa 2048 > server-key.pem - openssl req -new -x509 -nodes -days 365000 -key server-key.pem \ - -out server-cert.pem - # enter some other dummy values for the cert - - gnutls-serv --echo --x509cafile ca-cert.pem --x509keyfile server-key.pem \ - --x509certfile server-cert.pem -p 8338 - - 2) In another terminal, do this: - - wget https://download.fedoraproject.org/pub/fedora-secondary/releases/39/Cloud/s390x/images/Fedora-Cloud-Base-39-1.5.s390x.qcow2 - - qemu-system-s390x -nographic -nodefaults \ - -hda Fedora-Cloud-Base-39-1.5.s390x.qcow2 \ - -object tls-creds-x509,id=tls0,endpoint=client,verify-peer=false,dir=$PWD/qemu-pki \ - -chardev socket,id=tls_chardev,host=localhost,port=8338,tls-creds=tls0 \ - -device sclpconsole,chardev=tls_chardev,id=tls_serial - - QEMU then aborts after a second or two with: - - qemu-system-s390x: ../hw/char/sclpconsole.c:73: chr_read: Assertion - `size <= SIZE_BUFFER_VT220 - scon->iov_data_len' failed. - Aborted (core dumped) - - It looks like the second read does not trigger the chr_can_read() function - to be called before the second read, which should normally always be done - before sending bytes to a character device to see how much it can handle, - so the s->max_size in tcp_chr_read() still contains the old value from the - previous read. Let's make sure that we use the up-to-date value by calling - tcp_chr_read_poll() again here. - - Fixes: ffda5db65a ("io/channel-tls: fix handling of bigger read buffers") - Buglink: https://issues.redhat.com/browse/RHEL-24614 - Reviewed-by: "Daniel P. Berrangé" - Message-ID: <20240229104339.42574-1-thuth@redhat.com> - Reviewed-by: Antoine Damhet - Tested-by: Antoine Damhet - Reviewed-by: Marc-André Lureau - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - chardev/char-socket.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/chardev/char-socket.c b/chardev/char-socket.c -index 73947da188..034840593d 100644 ---- a/chardev/char-socket.c -+++ b/chardev/char-socket.c -@@ -492,9 +492,9 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) - s->max_size <= 0) { - return TRUE; - } -- len = sizeof(buf); -- if (len > s->max_size) { -- len = s->max_size; -+ len = tcp_chr_read_poll(opaque); -+ if (len > sizeof(buf)) { -+ len = sizeof(buf); - } - size = tcp_chr_recv(chr, (void *)buf, len); - if (size == 0 || (size == -1 && errno != EAGAIN)) { --- -2.39.3 - diff --git a/SOURCES/kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch b/SOURCES/kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch deleted file mode 100644 index 30e055f..0000000 --- a/SOURCES/kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 4d4102f6e2f9afd6182888787ae8b570347df87d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Mon, 18 Mar 2024 18:06:59 +0000 -Subject: [PATCH 1/3] chardev: lower priority of the HUP GSource in socket - chardev -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrangé -RH-MergeRequest: 233: Fix handling of TLS sessions in chardevs -RH-Jira: RHEL-24614 -RH-Acked-by: Thomas Huth -RH-Acked-by: Marc-André Lureau -RH-Commit: [1/3] 842f54349191b0206e68f35a7a80155f5a584942 (berrange/centos-src-qemu) - -The socket chardev often has 2 GSource object registered against the -same FD. One is registered all the time and is just intended to handle -POLLHUP events, while the other gets registered & unregistered on the -fly as the frontend is ready to receive more data or not. - -It is very common for poll() to signal a POLLHUP event at the same time -as there is pending incoming data from the disconnected client. It is -therefore essential to process incoming data prior to processing HUP. -The problem with having 2 GSource on the same FD is that there is no -guaranteed ordering of execution between them, so the chardev code may -process HUP first and thus discard data. - -This failure scenario is non-deterministic but can be seen fairly -reliably by reverting a7077b8e354d90fec26c2921aa2dea85b90dff90, and -then running 'tests/unit/test-char', which will sometimes fail with -missing data. - -Ideally QEMU would only have 1 GSource, but that's a complex code -refactoring job. The next best solution is to try to ensure ordering -between the 2 GSource objects. This can be achieved by lowering the -priority of the HUP GSource, so that it is never dispatched if the -main GSource is also ready to dispatch. Counter-intuitively, lowering -the priority of a GSource is done by raising its priority number. - -Reviewed-by: Marc-André Lureau -Reviewed-by: Thomas Huth -Signed-off-by: Daniel P. Berrangé -(cherry picked from commit 8bd8b04adc9f18904f323dff085f8b4ec77915c6) ---- - chardev/char-socket.c | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/chardev/char-socket.c b/chardev/char-socket.c -index 034840593d..f48d341ebc 100644 ---- a/chardev/char-socket.c -+++ b/chardev/char-socket.c -@@ -597,6 +597,22 @@ static void update_ioc_handlers(SocketChardev *s) - - remove_hup_source(s); - s->hup_source = qio_channel_create_watch(s->ioc, G_IO_HUP); -+ /* -+ * poll() is liable to return POLLHUP even when there is -+ * still incoming data available to read on the FD. If -+ * we have the hup_source at the same priority as the -+ * main io_add_watch_poll GSource, then we might end up -+ * processing the POLLHUP event first, closing the FD, -+ * and as a result silently discard data we should have -+ * read. -+ * -+ * By setting the hup_source to G_PRIORITY_DEFAULT + 1, -+ * we ensure that io_add_watch_poll GSource will always -+ * be dispatched first, thus guaranteeing we will be -+ * able to process all incoming data before closing the -+ * FD -+ */ -+ g_source_set_priority(s->hup_source, G_PRIORITY_DEFAULT + 1); - g_source_set_callback(s->hup_source, (GSourceFunc)tcp_chr_hup, - chr, NULL); - g_source_attach(s->hup_source, chr->gcontext); --- -2.39.3 - diff --git a/SOURCES/kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch b/SOURCES/kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch new file mode 100644 index 0000000..785b437 --- /dev/null +++ b/SOURCES/kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch @@ -0,0 +1,90 @@ +From 0f0a3a860a07addea21a0282556a5022b9cb8b2c Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:00:35 -0500 +Subject: [PATCH 011/100] confidential guest support: Add kvm_init() and + kvm_reset() in class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [11/91] 21d2178178bf181a8e4d0b051f64bd983f0d0cf1 (bonzini/rhel-qemu-kvm) + +Different confidential VMs in different architectures all have the same +needs to do their specific initialization (and maybe resetting) stuffs +with KVM. Currently each of them exposes individual *_kvm_init() +functions and let machine code or kvm code to call it. + +To facilitate the introduction of confidential guest technology from +different x86 vendors, add two virtual functions, kvm_init() and kvm_reset() +in ConfidentialGuestSupportClass, and expose two helpers functions for +invodking them. + +Signed-off-by: Xiaoyao Li +Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 41a605944e3fecae43ca18ded95ec31f28e0c7fe) +Signed-off-by: Paolo Bonzini +--- + include/exec/confidential-guest-support.h | 34 ++++++++++++++++++++++- + 1 file changed, 33 insertions(+), 1 deletion(-) + +diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h +index ba2dd4b5df..e5b188cffb 100644 +--- a/include/exec/confidential-guest-support.h ++++ b/include/exec/confidential-guest-support.h +@@ -23,7 +23,10 @@ + #include "qom/object.h" + + #define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support" +-OBJECT_DECLARE_SIMPLE_TYPE(ConfidentialGuestSupport, CONFIDENTIAL_GUEST_SUPPORT) ++OBJECT_DECLARE_TYPE(ConfidentialGuestSupport, ++ ConfidentialGuestSupportClass, ++ CONFIDENTIAL_GUEST_SUPPORT) ++ + + struct ConfidentialGuestSupport { + Object parent; +@@ -55,8 +58,37 @@ struct ConfidentialGuestSupport { + + typedef struct ConfidentialGuestSupportClass { + ObjectClass parent; ++ ++ int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); ++ int (*kvm_reset)(ConfidentialGuestSupport *cgs, Error **errp); + } ConfidentialGuestSupportClass; + ++static inline int confidential_guest_kvm_init(ConfidentialGuestSupport *cgs, ++ Error **errp) ++{ ++ ConfidentialGuestSupportClass *klass; ++ ++ klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs); ++ if (klass->kvm_init) { ++ return klass->kvm_init(cgs, errp); ++ } ++ ++ return 0; ++} ++ ++static inline int confidential_guest_kvm_reset(ConfidentialGuestSupport *cgs, ++ Error **errp) ++{ ++ ConfidentialGuestSupportClass *klass; ++ ++ klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs); ++ if (klass->kvm_reset) { ++ return klass->kvm_reset(cgs, errp); ++ } ++ ++ return 0; ++} ++ + #endif /* !CONFIG_USER_ONLY */ + + #endif /* QEMU_CONFIDENTIAL_GUEST_SUPPORT_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-coroutine-cap-per-thread-local-pool-size.patch b/SOURCES/kvm-coroutine-cap-per-thread-local-pool-size.patch deleted file mode 100644 index 6fffc23..0000000 --- a/SOURCES/kvm-coroutine-cap-per-thread-local-pool-size.patch +++ /dev/null @@ -1,412 +0,0 @@ -From e99c56752a1c4021a93c92b7be78856ebefaa1b3 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 18 Mar 2024 14:34:29 -0400 -Subject: [PATCH 1/2] coroutine: cap per-thread local pool size - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 234: coroutine: cap per-thread local pool size -RH-Jira: RHEL-28947 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [1/2] 5971de1c1e238457925bfb9c4bfc932de857b28d (stefanha/centos-stream-qemu-kvm) - -The coroutine pool implementation can hit the Linux vm.max_map_count -limit, causing QEMU to abort with "failed to allocate memory for stack" -or "failed to set up stack guard page" during coroutine creation. - -This happens because per-thread pools can grow to tens of thousands of -coroutines. Each coroutine causes 2 virtual memory areas to be created. -Eventually vm.max_map_count is reached and memory-related syscalls fail. -The per-thread pool sizes are non-uniform and depend on past coroutine -usage in each thread, so it's possible for one thread to have a large -pool while another thread's pool is empty. - -Switch to a new coroutine pool implementation with a global pool that -grows to a maximum number of coroutines and per-thread local pools that -are capped at hardcoded small number of coroutines. - -This approach does not leave large numbers of coroutines pooled in a -thread that may not use them again. In order to perform well it -amortizes the cost of global pool accesses by working in batches of -coroutines instead of individual coroutines. - -The global pool is a list. Threads donate batches of coroutines to when -they have too many and take batches from when they have too few: - -.-----------------------------------. -| Batch 1 | Batch 2 | Batch 3 | ... | global_pool -`-----------------------------------' - -Each thread has up to 2 batches of coroutines: - -.-------------------. -| Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches) -`-------------------' - -The goal of this change is to reduce the excessive number of pooled -coroutines that cause QEMU to abort when vm.max_map_count is reached -without losing the performance of an adequately sized coroutine pool. - -Here are virtio-blk disk I/O benchmark results: - - RW BLKSIZE IODEPTH OLD NEW CHANGE -randread 4k 1 113725 117451 +3.3% -randread 4k 8 192968 198510 +2.9% -randread 4k 16 207138 209429 +1.1% -randread 4k 32 212399 215145 +1.3% -randread 4k 64 218319 221277 +1.4% -randread 128k 1 17587 17535 -0.3% -randread 128k 8 17614 17616 +0.0% -randread 128k 16 17608 17609 +0.0% -randread 128k 32 17552 17553 +0.0% -randread 128k 64 17484 17484 +0.0% - -See files/{fio.sh,test.xml.j2} for the benchmark configuration: -https://gitlab.com/stefanha/virt-playbooks/-/tree/coroutine-pool-fix-sizing - -Buglink: https://issues.redhat.com/browse/RHEL-28947 -Reported-by: Sanjay Rao -Reported-by: Boaz Ben Shabat -Reported-by: Joe Mario -Reviewed-by: Kevin Wolf -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240318183429.1039340-1-stefanha@redhat.com> -(cherry picked from commit 86a637e48104ae74d8be53bed6441ce32be33433) -Signed-off-by: Stefan Hajnoczi ---- - util/qemu-coroutine.c | 282 +++++++++++++++++++++++++++++++++--------- - 1 file changed, 223 insertions(+), 59 deletions(-) - -diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c -index 5fd2dbaf8b..2790959eaf 100644 ---- a/util/qemu-coroutine.c -+++ b/util/qemu-coroutine.c -@@ -18,39 +18,200 @@ - #include "qemu/atomic.h" - #include "qemu/coroutine_int.h" - #include "qemu/coroutine-tls.h" -+#include "qemu/cutils.h" - #include "block/aio.h" - --/** -- * The minimal batch size is always 64, coroutines from the release_pool are -- * reused as soon as there are 64 coroutines in it. The maximum pool size starts -- * with 64 and is increased on demand so that coroutines are not deleted even if -- * they are not immediately reused. -- */ - enum { -- POOL_MIN_BATCH_SIZE = 64, -- POOL_INITIAL_MAX_SIZE = 64, -+ COROUTINE_POOL_BATCH_MAX_SIZE = 128, - }; - --/** Free list to speed up creation */ --static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); --static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE; --static unsigned int release_pool_size; -+/* -+ * Coroutine creation and deletion is expensive so a pool of unused coroutines -+ * is kept as a cache. When the pool has coroutines available, they are -+ * recycled instead of creating new ones from scratch. Coroutines are added to -+ * the pool upon termination. -+ * -+ * The pool is global but each thread maintains a small local pool to avoid -+ * global pool contention. Threads fetch and return batches of coroutines from -+ * the global pool to maintain their local pool. The local pool holds up to two -+ * batches whereas the maximum size of the global pool is controlled by the -+ * qemu_coroutine_inc_pool_size() API. -+ * -+ * .-----------------------------------. -+ * | Batch 1 | Batch 2 | Batch 3 | ... | global_pool -+ * `-----------------------------------' -+ * -+ * .-------------------. -+ * | Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches) -+ * `-------------------' -+ */ -+typedef struct CoroutinePoolBatch { -+ /* Batches are kept in a list */ -+ QSLIST_ENTRY(CoroutinePoolBatch) next; -+ -+ /* This batch holds up to @COROUTINE_POOL_BATCH_MAX_SIZE coroutines */ -+ QSLIST_HEAD(, Coroutine) list; -+ unsigned int size; -+} CoroutinePoolBatch; -+ -+typedef QSLIST_HEAD(, CoroutinePoolBatch) CoroutinePool; -+ -+/* Host operating system limit on number of pooled coroutines */ -+static unsigned int global_pool_hard_max_size; -+ -+static QemuMutex global_pool_lock; /* protects the following variables */ -+static CoroutinePool global_pool = QSLIST_HEAD_INITIALIZER(global_pool); -+static unsigned int global_pool_size; -+static unsigned int global_pool_max_size = COROUTINE_POOL_BATCH_MAX_SIZE; -+ -+QEMU_DEFINE_STATIC_CO_TLS(CoroutinePool, local_pool); -+QEMU_DEFINE_STATIC_CO_TLS(Notifier, local_pool_cleanup_notifier); - --typedef QSLIST_HEAD(, Coroutine) CoroutineQSList; --QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool); --QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size); --QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier); -+static CoroutinePoolBatch *coroutine_pool_batch_new(void) -+{ -+ CoroutinePoolBatch *batch = g_new(CoroutinePoolBatch, 1); -+ -+ QSLIST_INIT(&batch->list); -+ batch->size = 0; -+ return batch; -+} - --static void coroutine_pool_cleanup(Notifier *n, void *value) -+static void coroutine_pool_batch_delete(CoroutinePoolBatch *batch) - { - Coroutine *co; - Coroutine *tmp; -- CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); - -- QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) { -- QSLIST_REMOVE_HEAD(alloc_pool, pool_next); -+ QSLIST_FOREACH_SAFE(co, &batch->list, pool_next, tmp) { -+ QSLIST_REMOVE_HEAD(&batch->list, pool_next); - qemu_coroutine_delete(co); - } -+ g_free(batch); -+} -+ -+static void local_pool_cleanup(Notifier *n, void *value) -+{ -+ CoroutinePool *local_pool = get_ptr_local_pool(); -+ CoroutinePoolBatch *batch; -+ CoroutinePoolBatch *tmp; -+ -+ QSLIST_FOREACH_SAFE(batch, local_pool, next, tmp) { -+ QSLIST_REMOVE_HEAD(local_pool, next); -+ coroutine_pool_batch_delete(batch); -+ } -+} -+ -+/* Ensure the atexit notifier is registered */ -+static void local_pool_cleanup_init_once(void) -+{ -+ Notifier *notifier = get_ptr_local_pool_cleanup_notifier(); -+ if (!notifier->notify) { -+ notifier->notify = local_pool_cleanup; -+ qemu_thread_atexit_add(notifier); -+ } -+} -+ -+/* Helper to get the next unused coroutine from the local pool */ -+static Coroutine *coroutine_pool_get_local(void) -+{ -+ CoroutinePool *local_pool = get_ptr_local_pool(); -+ CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool); -+ Coroutine *co; -+ -+ if (unlikely(!batch)) { -+ return NULL; -+ } -+ -+ co = QSLIST_FIRST(&batch->list); -+ QSLIST_REMOVE_HEAD(&batch->list, pool_next); -+ batch->size--; -+ -+ if (batch->size == 0) { -+ QSLIST_REMOVE_HEAD(local_pool, next); -+ coroutine_pool_batch_delete(batch); -+ } -+ return co; -+} -+ -+/* Get the next batch from the global pool */ -+static void coroutine_pool_refill_local(void) -+{ -+ CoroutinePool *local_pool = get_ptr_local_pool(); -+ CoroutinePoolBatch *batch; -+ -+ WITH_QEMU_LOCK_GUARD(&global_pool_lock) { -+ batch = QSLIST_FIRST(&global_pool); -+ -+ if (batch) { -+ QSLIST_REMOVE_HEAD(&global_pool, next); -+ global_pool_size -= batch->size; -+ } -+ } -+ -+ if (batch) { -+ QSLIST_INSERT_HEAD(local_pool, batch, next); -+ local_pool_cleanup_init_once(); -+ } -+} -+ -+/* Add a batch of coroutines to the global pool */ -+static void coroutine_pool_put_global(CoroutinePoolBatch *batch) -+{ -+ WITH_QEMU_LOCK_GUARD(&global_pool_lock) { -+ unsigned int max = MIN(global_pool_max_size, -+ global_pool_hard_max_size); -+ -+ if (global_pool_size < max) { -+ QSLIST_INSERT_HEAD(&global_pool, batch, next); -+ -+ /* Overshooting the max pool size is allowed */ -+ global_pool_size += batch->size; -+ return; -+ } -+ } -+ -+ /* The global pool was full, so throw away this batch */ -+ coroutine_pool_batch_delete(batch); -+} -+ -+/* Get the next unused coroutine from the pool or return NULL */ -+static Coroutine *coroutine_pool_get(void) -+{ -+ Coroutine *co; -+ -+ co = coroutine_pool_get_local(); -+ if (!co) { -+ coroutine_pool_refill_local(); -+ co = coroutine_pool_get_local(); -+ } -+ return co; -+} -+ -+static void coroutine_pool_put(Coroutine *co) -+{ -+ CoroutinePool *local_pool = get_ptr_local_pool(); -+ CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool); -+ -+ if (unlikely(!batch)) { -+ batch = coroutine_pool_batch_new(); -+ QSLIST_INSERT_HEAD(local_pool, batch, next); -+ local_pool_cleanup_init_once(); -+ } -+ -+ if (unlikely(batch->size >= COROUTINE_POOL_BATCH_MAX_SIZE)) { -+ CoroutinePoolBatch *next = QSLIST_NEXT(batch, next); -+ -+ /* Is the local pool full? */ -+ if (next) { -+ QSLIST_REMOVE_HEAD(local_pool, next); -+ coroutine_pool_put_global(batch); -+ } -+ -+ batch = coroutine_pool_batch_new(); -+ QSLIST_INSERT_HEAD(local_pool, batch, next); -+ } -+ -+ QSLIST_INSERT_HEAD(&batch->list, co, pool_next); -+ batch->size++; - } - - Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) -@@ -58,31 +219,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) - Coroutine *co = NULL; - - if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { -- CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); -- -- co = QSLIST_FIRST(alloc_pool); -- if (!co) { -- if (release_pool_size > POOL_MIN_BATCH_SIZE) { -- /* Slow path; a good place to register the destructor, too. */ -- Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier(); -- if (!notifier->notify) { -- notifier->notify = coroutine_pool_cleanup; -- qemu_thread_atexit_add(notifier); -- } -- -- /* This is not exact; there could be a little skew between -- * release_pool_size and the actual size of release_pool. But -- * it is just a heuristic, it does not need to be perfect. -- */ -- set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0)); -- QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool); -- co = QSLIST_FIRST(alloc_pool); -- } -- } -- if (co) { -- QSLIST_REMOVE_HEAD(alloc_pool, pool_next); -- set_alloc_pool_size(get_alloc_pool_size() - 1); -- } -+ co = coroutine_pool_get(); - } - - if (!co) { -@@ -100,19 +237,10 @@ static void coroutine_delete(Coroutine *co) - co->caller = NULL; - - if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { -- if (release_pool_size < qatomic_read(&pool_max_size) * 2) { -- QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); -- qatomic_inc(&release_pool_size); -- return; -- } -- if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) { -- QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next); -- set_alloc_pool_size(get_alloc_pool_size() + 1); -- return; -- } -+ coroutine_pool_put(co); -+ } else { -+ qemu_coroutine_delete(co); - } -- -- qemu_coroutine_delete(co); - } - - void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co) -@@ -223,10 +351,46 @@ AioContext *qemu_coroutine_get_aio_context(Coroutine *co) - - void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) - { -- qatomic_add(&pool_max_size, additional_pool_size); -+ QEMU_LOCK_GUARD(&global_pool_lock); -+ global_pool_max_size += additional_pool_size; - } - - void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) - { -- qatomic_sub(&pool_max_size, removing_pool_size); -+ QEMU_LOCK_GUARD(&global_pool_lock); -+ global_pool_max_size -= removing_pool_size; -+} -+ -+static unsigned int get_global_pool_hard_max_size(void) -+{ -+#ifdef __linux__ -+ g_autofree char *contents = NULL; -+ int max_map_count; -+ -+ /* -+ * Linux processes can have up to max_map_count virtual memory areas -+ * (VMAs). mmap(2), mprotect(2), etc fail with ENOMEM beyond this limit. We -+ * must limit the coroutine pool to a safe size to avoid running out of -+ * VMAs. -+ */ -+ if (g_file_get_contents("/proc/sys/vm/max_map_count", &contents, NULL, -+ NULL) && -+ qemu_strtoi(contents, NULL, 10, &max_map_count) == 0) { -+ /* -+ * This is a conservative upper bound that avoids exceeding -+ * max_map_count. Leave half for non-coroutine users like library -+ * dependencies, vhost-user, etc. Each coroutine takes up 2 VMAs so -+ * halve the amount again. -+ */ -+ return max_map_count / 4; -+ } -+#endif -+ -+ return UINT_MAX; -+} -+ -+static void __attribute__((constructor)) qemu_coroutine_init(void) -+{ -+ qemu_mutex_init(&global_pool_lock); -+ global_pool_hard_max_size = get_global_pool_hard_max_size(); - } --- -2.39.3 - diff --git a/SOURCES/kvm-coroutine-reserve-5-000-mappings.patch b/SOURCES/kvm-coroutine-reserve-5-000-mappings.patch deleted file mode 100644 index cdbb666..0000000 --- a/SOURCES/kvm-coroutine-reserve-5-000-mappings.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 0aa65dc3acba481f7064df936ab49e3bceb1d5bd Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Wed, 20 Mar 2024 14:12:32 -0400 -Subject: [PATCH 2/2] coroutine: reserve 5,000 mappings -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 234: coroutine: cap per-thread local pool size -RH-Jira: RHEL-28947 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [2/2] 78560c2b947471111cc16c313d6f38db42860a1c (stefanha/centos-stream-qemu-kvm) - -Daniel P. Berrangé pointed out that the coroutine -pool size heuristic is very conservative. Instead of halving -max_map_count, he suggested reserving 5,000 mappings for non-coroutine -users based on observations of guests he has access to. - -Fixes: 86a637e48104 ("coroutine: cap per-thread local pool size") -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Message-id: 20240320181232.1464819-1-stefanha@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 9352f80cd926fe2dde7c89b93ee33bb0356ff40e) -Signed-off-by: Stefan Hajnoczi ---- - util/qemu-coroutine.c | 15 ++++++++++----- - 1 file changed, 10 insertions(+), 5 deletions(-) - -diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c -index 2790959eaf..eb4eebefdf 100644 ---- a/util/qemu-coroutine.c -+++ b/util/qemu-coroutine.c -@@ -377,12 +377,17 @@ static unsigned int get_global_pool_hard_max_size(void) - NULL) && - qemu_strtoi(contents, NULL, 10, &max_map_count) == 0) { - /* -- * This is a conservative upper bound that avoids exceeding -- * max_map_count. Leave half for non-coroutine users like library -- * dependencies, vhost-user, etc. Each coroutine takes up 2 VMAs so -- * halve the amount again. -+ * This is an upper bound that avoids exceeding max_map_count. Leave a -+ * fixed amount for non-coroutine users like library dependencies, -+ * vhost-user, etc. Each coroutine takes up 2 VMAs so halve the -+ * remaining amount. - */ -- return max_map_count / 4; -+ if (max_map_count > 5000) { -+ return (max_map_count - 5000) / 2; -+ } else { -+ /* Disable the global pool but threads still have local pools */ -+ return 0; -+ } - } - #endif - --- -2.39.3 - diff --git a/SOURCES/kvm-crypto-block-drop-qcrypto_block_open-n_threads-argum.patch b/SOURCES/kvm-crypto-block-drop-qcrypto_block_open-n_threads-argum.patch new file mode 100644 index 0000000..b5fcef5 --- /dev/null +++ b/SOURCES/kvm-crypto-block-drop-qcrypto_block_open-n_threads-argum.patch @@ -0,0 +1,228 @@ +From 117486e0820f135f191e19f8ebb8838a98b121c6 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 27 May 2024 11:58:51 -0400 +Subject: [PATCH 5/5] crypto/block: drop qcrypto_block_open() n_threads + argument +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 251: block/crypto: create ciphers on demand +RH-Jira: RHEL-36159 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/2] 68290935b174b1f2b76aa857a926da9011e54abe (stefanha/centos-stream-qemu-kvm) + +The n_threads argument is no longer used since the previous commit. +Remove it. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240527155851.892885-3-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Acked-by: Daniel P. Berrangé +Signed-off-by: Kevin Wolf +(cherry picked from commit 3ab0f063e58ed9224237d69c4211ca83335164c4) +Signed-off-by: Stefan Hajnoczi +--- + block/crypto.c | 1 - + block/qcow.c | 2 +- + block/qcow2.c | 5 ++--- + crypto/block-luks.c | 1 - + crypto/block-qcow.c | 6 ++---- + crypto/block.c | 3 +-- + crypto/blockpriv.h | 1 - + include/crypto/block.h | 2 -- + tests/unit/test-crypto-block.c | 4 ---- + 9 files changed, 6 insertions(+), 19 deletions(-) + +diff --git a/block/crypto.c b/block/crypto.c +index 21eed909c1..4eed3ffa6a 100644 +--- a/block/crypto.c ++++ b/block/crypto.c +@@ -363,7 +363,6 @@ static int block_crypto_open_generic(QCryptoBlockFormat format, + block_crypto_read_func, + bs, + cflags, +- 1, + errp); + + if (!crypto->block) { +diff --git a/block/qcow.c b/block/qcow.c +index ca8e1d5ec8..c2f89db055 100644 +--- a/block/qcow.c ++++ b/block/qcow.c +@@ -211,7 +211,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, + cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; + } + s->crypto = qcrypto_block_open(crypto_opts, "encrypt.", +- NULL, NULL, cflags, 1, errp); ++ NULL, NULL, cflags, errp); + if (!s->crypto) { + ret = -EINVAL; + goto fail; +diff --git a/block/qcow2.c b/block/qcow2.c +index 0e8b2f7518..0ebd455dc8 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -321,7 +321,7 @@ qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, + } + s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", + qcow2_crypto_hdr_read_func, +- bs, cflags, QCOW2_MAX_THREADS, errp); ++ bs, cflags, errp); + if (!s->crypto) { + return -EINVAL; + } +@@ -1707,8 +1707,7 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, + cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; + } + s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", +- NULL, NULL, cflags, +- QCOW2_MAX_THREADS, errp); ++ NULL, NULL, cflags, errp); + if (!s->crypto) { + ret = -EINVAL; + goto fail; +diff --git a/crypto/block-luks.c b/crypto/block-luks.c +index 3357852c0a..5b777c15d3 100644 +--- a/crypto/block-luks.c ++++ b/crypto/block-luks.c +@@ -1189,7 +1189,6 @@ qcrypto_block_luks_open(QCryptoBlock *block, + QCryptoBlockReadFunc readfunc, + void *opaque, + unsigned int flags, +- size_t n_threads, + Error **errp) + { + QCryptoBlockLUKS *luks = NULL; +diff --git a/crypto/block-qcow.c b/crypto/block-qcow.c +index 02305058e3..42e9556e42 100644 +--- a/crypto/block-qcow.c ++++ b/crypto/block-qcow.c +@@ -44,7 +44,6 @@ qcrypto_block_qcow_has_format(const uint8_t *buf G_GNUC_UNUSED, + static int + qcrypto_block_qcow_init(QCryptoBlock *block, + const char *keysecret, +- size_t n_threads, + Error **errp) + { + char *password; +@@ -100,7 +99,6 @@ qcrypto_block_qcow_open(QCryptoBlock *block, + QCryptoBlockReadFunc readfunc G_GNUC_UNUSED, + void *opaque G_GNUC_UNUSED, + unsigned int flags, +- size_t n_threads, + Error **errp) + { + if (flags & QCRYPTO_BLOCK_OPEN_NO_IO) { +@@ -115,7 +113,7 @@ qcrypto_block_qcow_open(QCryptoBlock *block, + return -1; + } + return qcrypto_block_qcow_init(block, options->u.qcow.key_secret, +- n_threads, errp); ++ errp); + } + } + +@@ -135,7 +133,7 @@ qcrypto_block_qcow_create(QCryptoBlock *block, + return -1; + } + /* QCow2 has no special header, since everything is hardwired */ +- return qcrypto_block_qcow_init(block, options->u.qcow.key_secret, 1, errp); ++ return qcrypto_block_qcow_init(block, options->u.qcow.key_secret, errp); + } + + +diff --git a/crypto/block.c b/crypto/block.c +index ba6d1cebc7..3bcc4270c3 100644 +--- a/crypto/block.c ++++ b/crypto/block.c +@@ -53,7 +53,6 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options, + QCryptoBlockReadFunc readfunc, + void *opaque, + unsigned int flags, +- size_t n_threads, + Error **errp) + { + QCryptoBlock *block = g_new0(QCryptoBlock, 1); +@@ -73,7 +72,7 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options, + block->driver = qcrypto_block_drivers[options->format]; + + if (block->driver->open(block, options, optprefix, +- readfunc, opaque, flags, n_threads, errp) < 0) ++ readfunc, opaque, flags, errp) < 0) + { + g_free(block); + return NULL; +diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h +index 4bf6043d5d..b8f77cb5eb 100644 +--- a/crypto/blockpriv.h ++++ b/crypto/blockpriv.h +@@ -59,7 +59,6 @@ struct QCryptoBlockDriver { + QCryptoBlockReadFunc readfunc, + void *opaque, + unsigned int flags, +- size_t n_threads, + Error **errp); + + int (*create)(QCryptoBlock *block, +diff --git a/include/crypto/block.h b/include/crypto/block.h +index 92e823c9f2..5b5d039800 100644 +--- a/include/crypto/block.h ++++ b/include/crypto/block.h +@@ -76,7 +76,6 @@ typedef enum { + * @readfunc: callback for reading data from the volume + * @opaque: data to pass to @readfunc + * @flags: bitmask of QCryptoBlockOpenFlags values +- * @n_threads: allow concurrent I/O from up to @n_threads threads + * @errp: pointer to a NULL-initialized error object + * + * Create a new block encryption object for an existing +@@ -113,7 +112,6 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options, + QCryptoBlockReadFunc readfunc, + void *opaque, + unsigned int flags, +- size_t n_threads, + Error **errp); + + typedef enum { +diff --git a/tests/unit/test-crypto-block.c b/tests/unit/test-crypto-block.c +index 6cfc817a92..42cfab6067 100644 +--- a/tests/unit/test-crypto-block.c ++++ b/tests/unit/test-crypto-block.c +@@ -303,7 +303,6 @@ static void test_block(gconstpointer opaque) + test_block_read_func, + &header, + 0, +- 1, + NULL); + g_assert(blk == NULL); + +@@ -312,7 +311,6 @@ static void test_block(gconstpointer opaque) + test_block_read_func, + &header, + QCRYPTO_BLOCK_OPEN_NO_IO, +- 1, + &error_abort); + + g_assert(qcrypto_block_get_cipher(blk) == NULL); +@@ -327,7 +325,6 @@ static void test_block(gconstpointer opaque) + test_block_read_func, + &header, + 0, +- 1, + &error_abort); + g_assert(blk); + +@@ -384,7 +381,6 @@ test_luks_bad_header(gconstpointer data) + test_block_read_func, + &buf, + 0, +- 1, + &err); + g_assert(!blk); + g_assert(err); +-- +2.39.3 + diff --git a/SOURCES/kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch b/SOURCES/kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch deleted file mode 100644 index 735f2a3..0000000 --- a/SOURCES/kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch +++ /dev/null @@ -1,75 +0,0 @@ -From ac9dc8ea241ef6d3a0447d696620d4d4053b71bf Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 4 Dec 2023 11:42:59 -0500 -Subject: [PATCH 080/101] dma-helpers: don't lock AioContext in dma_blk_cb() - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [11/26] a8580463ba6aee4ca248c0b947b9e72bd9e87aab (kmwolf/centos-qemu-kvm) - -Commit abfcd2760b3e ("dma-helpers: prevent dma_blk_cb() vs -dma_aio_cancel() race") acquired the AioContext lock inside dma_blk_cb() -to avoid a race with scsi_device_purge_requests() running in the main -loop thread. - -The SCSI code no longer calls dma_aio_cancel() from the main loop thread -while I/O is running in the IOThread AioContext. Therefore it is no -longer necessary to take this lock to protect DMAAIOCB fields. The -->cb() function also does not require the lock because blk_aio_*() and -friends do not need the AioContext lock. - -Both hw/ide/core.c and hw/ide/macio.c also call dma_blk_io() but don't -rely on it taking the AioContext lock, so this change is safe. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Reviewed-by: Kevin Wolf -Message-ID: <20231204164259.1515217-5-stefanha@redhat.com> -Signed-off-by: Kevin Wolf ---- - system/dma-helpers.c | 7 ++----- - 1 file changed, 2 insertions(+), 5 deletions(-) - -diff --git a/system/dma-helpers.c b/system/dma-helpers.c -index 36211acc7e..528117f256 100644 ---- a/system/dma-helpers.c -+++ b/system/dma-helpers.c -@@ -119,13 +119,12 @@ static void dma_blk_cb(void *opaque, int ret) - - trace_dma_blk_cb(dbs, ret); - -- aio_context_acquire(ctx); - dbs->acb = NULL; - dbs->offset += dbs->iov.size; - - if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { - dma_complete(dbs, ret); -- goto out; -+ return; - } - dma_blk_unmap(dbs); - -@@ -168,7 +167,7 @@ static void dma_blk_cb(void *opaque, int ret) - trace_dma_map_wait(dbs); - dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs); - cpu_register_map_client(dbs->bh); -- goto out; -+ return; - } - - if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) { -@@ -179,8 +178,6 @@ static void dma_blk_cb(void *opaque, int ret) - dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, - dma_blk_cb, dbs, dbs->io_func_opaque); - assert(dbs->acb); --out: -- aio_context_release(ctx); - } - - static void dma_aio_cancel(BlockAIOCB *acb) --- -2.39.3 - diff --git a/SOURCES/kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch b/SOURCES/kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch deleted file mode 100644 index dbe48d7..0000000 --- a/SOURCES/kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch +++ /dev/null @@ -1,228 +0,0 @@ -From 71aa0219f7c84cbf175eb2a091d48d5fd5daa40b Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:26 +0800 -Subject: [PATCH 047/101] docs/devel: Add VFIO iommufd backend documentation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [46/67] 6cf49d00e87788f894d690a985bb6798eae24505 (eauger1/centos-qemu-kvm) - -Suggested-by: Cédric Le Goater -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Zhenzhong Duan -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 98dad2b01931f6064c6c4b48ca3c2a1d9f542cd8) -Signed-off-by: Eric Auger ---- - MAINTAINERS | 1 + - docs/devel/index-internals.rst | 1 + - docs/devel/vfio-iommufd.rst | 166 +++++++++++++++++++++++++++++++++ - 3 files changed, 168 insertions(+) - create mode 100644 docs/devel/vfio-iommufd.rst - -diff --git a/MAINTAINERS b/MAINTAINERS -index ca70bb4e64..0ddb20a35f 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -2176,6 +2176,7 @@ F: backends/iommufd.c - F: include/sysemu/iommufd.h - F: include/qemu/chardev_open.h - F: util/chardev_open.c -+F: docs/devel/vfio-iommufd.rst - - vhost - M: Michael S. Tsirkin -diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst -index 6f81df92bc..3def4a138b 100644 ---- a/docs/devel/index-internals.rst -+++ b/docs/devel/index-internals.rst -@@ -18,5 +18,6 @@ Details about QEMU's various subsystems including how to add features to them. - s390-dasd-ipl - tracing - vfio-migration -+ vfio-iommufd - writing-monitor-commands - virtio-backends -diff --git a/docs/devel/vfio-iommufd.rst b/docs/devel/vfio-iommufd.rst -new file mode 100644 -index 0000000000..3d1c11f175 ---- /dev/null -+++ b/docs/devel/vfio-iommufd.rst -@@ -0,0 +1,166 @@ -+=============================== -+IOMMUFD BACKEND usage with VFIO -+=============================== -+ -+(Same meaning for backend/container/BE) -+ -+With the introduction of iommufd, the Linux kernel provides a generic -+interface for user space drivers to propagate their DMA mappings to kernel -+for assigned devices. While the legacy kernel interface is group-centric, -+the new iommufd interface is device-centric, relying on device fd and iommufd. -+ -+To support both interfaces in the QEMU VFIO device, introduce a base container -+to abstract the common part of VFIO legacy and iommufd container. So that the -+generic VFIO code can use either container. -+ -+The base container implements generic functions such as memory_listener and -+address space management whereas the derived container implements callbacks -+specific to either legacy or iommufd. Each container has its own way to setup -+secure context and dma management interface. The below diagram shows how it -+looks like with both containers. -+ -+:: -+ -+ VFIO AddressSpace/Memory -+ +-------+ +----------+ +-----+ +-----+ -+ | pci | | platform | | ap | | ccw | -+ +---+---+ +----+-----+ +--+--+ +--+--+ +----------------------+ -+ | | | | | AddressSpace | -+ | | | | +------------+---------+ -+ +---V-----------V-----------V--------V----+ / -+ | VFIOAddressSpace | <------------+ -+ | | | MemoryListener -+ | VFIOContainerBase list | -+ +-------+----------------------------+----+ -+ | | -+ | | -+ +-------V------+ +--------V----------+ -+ | iommufd | | vfio legacy | -+ | container | | container | -+ +-------+------+ +--------+----------+ -+ | | -+ | /dev/iommu | /dev/vfio/vfio -+ | /dev/vfio/devices/vfioX | /dev/vfio/$group_id -+ Userspace | | -+ ============+============================+=========================== -+ Kernel | device fd | -+ +---------------+ | group/container fd -+ | (BIND_IOMMUFD | | (SET_CONTAINER/SET_IOMMU) -+ | ATTACH_IOAS) | | device fd -+ | | | -+ | +-------V------------V-----------------+ -+ iommufd | | vfio | -+ (map/unmap | +---------+--------------------+-------+ -+ ioas_copy) | | | map/unmap -+ | | | -+ +------V------+ +-----V------+ +------V--------+ -+ | iommfd core | | device | | vfio iommu | -+ +-------------+ +------------+ +---------------+ -+ -+* Secure Context setup -+ -+ - iommufd BE: uses device fd and iommufd to setup secure context -+ (bind_iommufd, attach_ioas) -+ - vfio legacy BE: uses group fd and container fd to setup secure context -+ (set_container, set_iommu) -+ -+* Device access -+ -+ - iommufd BE: device fd is opened through ``/dev/vfio/devices/vfioX`` -+ - vfio legacy BE: device fd is retrieved from group fd ioctl -+ -+* DMA Mapping flow -+ -+ 1. VFIOAddressSpace receives MemoryRegion add/del via MemoryListener -+ 2. VFIO populates DMA map/unmap via the container BEs -+ * iommufd BE: uses iommufd -+ * vfio legacy BE: uses container fd -+ -+Example configuration -+===================== -+ -+Step 1: configure the host device -+--------------------------------- -+ -+It's exactly same as the VFIO device with legacy VFIO container. -+ -+Step 2: configure QEMU -+---------------------- -+ -+Interactions with the ``/dev/iommu`` are abstracted by a new iommufd -+object (compiled in with the ``CONFIG_IOMMUFD`` option). -+ -+Any QEMU device (e.g. VFIO device) wishing to use ``/dev/iommu`` must -+be linked with an iommufd object. It gets a new optional property -+named iommufd which allows to pass an iommufd object. Take ``vfio-pci`` -+device for example: -+ -+.. code-block:: bash -+ -+ -object iommufd,id=iommufd0 -+ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0 -+ -+Note the ``/dev/iommu`` and VFIO cdev can be externally opened by a -+management layer. In such a case the fd is passed, the fd supports a -+string naming the fd or a number, for example: -+ -+.. code-block:: bash -+ -+ -object iommufd,id=iommufd0,fd=22 -+ -device vfio-pci,iommufd=iommufd0,fd=23 -+ -+If the ``fd`` property is not passed, the fd is opened by QEMU. -+ -+If no ``iommufd`` object is passed to the ``vfio-pci`` device, iommufd -+is not used and the user gets the behavior based on the legacy VFIO -+container: -+ -+.. code-block:: bash -+ -+ -device vfio-pci,host=0000:02:00.0 -+ -+Supported platform -+================== -+ -+Supports x86, ARM and s390x currently. -+ -+Caveats -+======= -+ -+Dirty page sync -+--------------- -+ -+Dirty page sync with iommufd backend is unsupported yet, live migration is -+disabled by default. But it can be force enabled like below, low efficient -+though. -+ -+.. code-block:: bash -+ -+ -object iommufd,id=iommufd0 -+ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0,enable-migration=on -+ -+P2P DMA -+------- -+ -+PCI p2p DMA is unsupported as IOMMUFD doesn't support mapping hardware PCI -+BAR region yet. Below warning shows for assigned PCI device, it's not a bug. -+ -+.. code-block:: none -+ -+ qemu-system-x86_64: warning: IOMMU_IOAS_MAP failed: Bad address, PCI BAR? -+ qemu-system-x86_64: vfio_container_dma_map(0x560cb6cb1620, 0xe000000021000, 0x3000, 0x7f32ed55c000) = -14 (Bad address) -+ -+FD passing with mdev -+-------------------- -+ -+``vfio-pci`` device checks sysfsdev property to decide if backend is a mdev. -+If FD passing is used, there is no way to know that and the mdev is treated -+like a real PCI device. There is an error as below if user wants to enable -+RAM discarding for mdev. -+ -+.. code-block:: none -+ -+ qemu-system-x86_64: -device vfio-pci,iommufd=iommufd0,x-balloon-allowed=on,fd=9: vfio VFIO_FD9: x-balloon-allowed only potentially compatible with mdev devices -+ -+``vfio-ap`` and ``vfio-ccw`` devices don't have same issue as their backend -+devices are always mdev and RAM discarding is force enabled. --- -2.39.3 - diff --git a/SOURCES/kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch b/SOURCES/kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch deleted file mode 100644 index 80adc69..0000000 --- a/SOURCES/kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch +++ /dev/null @@ -1,98 +0,0 @@ -From fc69df3a70bed5722643cc16828ca20beae3a20d Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:08 -0500 -Subject: [PATCH 091/101] docs: remove AioContext lock from IOThread docs - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [22/26] ab89cda483e74ded983d26e1c6e50217405e0a55 (kmwolf/centos-qemu-kvm) - -Encourage the use of locking primitives and stop mentioning the -AioContext lock since it is being removed. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Message-ID: <20231205182011.1976568-12-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - docs/devel/multiple-iothreads.txt | 47 +++++++++++-------------------- - 1 file changed, 16 insertions(+), 31 deletions(-) - -diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt -index a3e949f6b3..4865196bde 100644 ---- a/docs/devel/multiple-iothreads.txt -+++ b/docs/devel/multiple-iothreads.txt -@@ -88,27 +88,18 @@ loop, depending on which AioContext instance the caller passes in. - - How to synchronize with an IOThread - ----------------------------------- --AioContext is not thread-safe so some rules must be followed when using file --descriptors, event notifiers, timers, or BHs across threads: -+Variables that can be accessed by multiple threads require some form of -+synchronization such as qemu_mutex_lock(), rcu_read_lock(), etc. - --1. AioContext functions can always be called safely. They handle their --own locking internally. -- --2. Other threads wishing to access the AioContext must use --aio_context_acquire()/aio_context_release() for mutual exclusion. Once the --context is acquired no other thread can access it or run event loop iterations --in this AioContext. -- --Legacy code sometimes nests aio_context_acquire()/aio_context_release() calls. --Do not use nesting anymore, it is incompatible with the BDRV_POLL_WHILE() macro --used in the block layer and can lead to hangs. -- --There is currently no lock ordering rule if a thread needs to acquire multiple --AioContexts simultaneously. Therefore, it is only safe for code holding the --QEMU global mutex to acquire other AioContexts. -+AioContext functions like aio_set_fd_handler(), aio_set_event_notifier(), -+aio_bh_new(), and aio_timer_new() are thread-safe. They can be used to trigger -+activity in an IOThread. - - Side note: the best way to schedule a function call across threads is to call --aio_bh_schedule_oneshot(). No acquire/release or locking is needed. -+aio_bh_schedule_oneshot(). -+ -+The main loop thread can wait synchronously for a condition using -+AIO_WAIT_WHILE(). - - AioContext and the block layer - ------------------------------ -@@ -124,22 +115,16 @@ Block layer code must therefore expect to run in an IOThread and avoid using - old APIs that implicitly use the main loop. See the "How to program for - IOThreads" above for information on how to do that. - --If main loop code such as a QMP function wishes to access a BlockDriverState --it must first call aio_context_acquire(bdrv_get_aio_context(bs)) to ensure --that callbacks in the IOThread do not run in parallel. -- - Code running in the monitor typically needs to ensure that past - requests from the guest are completed. When a block device is running - in an IOThread, the IOThread can also process requests from the guest - (via ioeventfd). To achieve both objects, wrap the code between - bdrv_drained_begin() and bdrv_drained_end(), thus creating a "drained --section". The functions must be called between aio_context_acquire() --and aio_context_release(). You can freely release and re-acquire the --AioContext within a drained section. -- --Long-running jobs (usually in the form of coroutines) are best scheduled in --the BlockDriverState's AioContext to avoid the need to acquire/release around --each bdrv_*() call. The functions bdrv_add/remove_aio_context_notifier, --or alternatively blk_add/remove_aio_context_notifier if you use BlockBackends, --can be used to get a notification whenever bdrv_try_change_aio_context() moves a -+section". -+ -+Long-running jobs (usually in the form of coroutines) are often scheduled in -+the BlockDriverState's AioContext. The functions -+bdrv_add/remove_aio_context_notifier, or alternatively -+blk_add/remove_aio_context_notifier if you use BlockBackends, can be used to -+get a notification whenever bdrv_try_change_aio_context() moves a - BlockDriverState to a different AioContext. --- -2.39.3 - diff --git a/SOURCES/kvm-graph-lock-remove-AioContext-locking.patch b/SOURCES/kvm-graph-lock-remove-AioContext-locking.patch deleted file mode 100644 index 2fff9ba..0000000 --- a/SOURCES/kvm-graph-lock-remove-AioContext-locking.patch +++ /dev/null @@ -1,1190 +0,0 @@ -From 57d96b5774fab588c6bb6812ef8ef281ffe018d7 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:02 -0500 -Subject: [PATCH 085/101] graph-lock: remove AioContext locking - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [16/26] 9575a8b834aaaa03abaf869e96f0808172e87824 (kmwolf/centos-qemu-kvm) - -Stop acquiring/releasing the AioContext lock in -bdrv_graph_wrlock()/bdrv_graph_unlock() since the lock no longer has any -effect. - -The distinction between bdrv_graph_wrunlock() and -bdrv_graph_wrunlock_ctx() becomes meaningless and they can be collapsed -into one function. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Reviewed-by: Kevin Wolf -Message-ID: <20231205182011.1976568-6-stefanha@redhat.com> -Signed-off-by: Kevin Wolf ---- - block.c | 50 +++++++++++++++--------------- - block/backup.c | 4 +-- - block/blklogwrites.c | 8 ++--- - block/blkverify.c | 4 +-- - block/block-backend.c | 11 +++---- - block/commit.c | 16 +++++----- - block/graph-lock.c | 44 ++------------------------ - block/mirror.c | 22 ++++++------- - block/qcow2.c | 4 +-- - block/quorum.c | 8 ++--- - block/replication.c | 14 ++++----- - block/snapshot.c | 4 +-- - block/stream.c | 12 +++---- - block/vmdk.c | 20 ++++++------ - blockdev.c | 8 ++--- - blockjob.c | 12 +++---- - include/block/graph-lock.h | 21 ++----------- - scripts/block-coroutine-wrapper.py | 4 +-- - tests/unit/test-bdrv-drain.c | 40 ++++++++++++------------ - tests/unit/test-bdrv-graph-mod.c | 20 ++++++------ - 20 files changed, 133 insertions(+), 193 deletions(-) - -diff --git a/block.c b/block.c -index bfb0861ec6..25e1ebc606 100644 ---- a/block.c -+++ b/block.c -@@ -1708,12 +1708,12 @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, - open_failed: - bs->drv = NULL; - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - if (bs->file != NULL) { - bdrv_unref_child(bs, bs->file); - assert(!bs->file); - } -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - g_free(bs->opaque); - bs->opaque = NULL; -@@ -3575,9 +3575,9 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, - - bdrv_ref(drain_bs); - bdrv_drained_begin(drain_bs); -- bdrv_graph_wrlock(backing_hd); -+ bdrv_graph_wrlock(); - ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp); -- bdrv_graph_wrunlock(backing_hd); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(drain_bs); - bdrv_unref(drain_bs); - -@@ -3790,13 +3790,13 @@ BdrvChild *bdrv_open_child(const char *filename, - return NULL; - } - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - ctx = bdrv_get_aio_context(bs); - aio_context_acquire(ctx); - child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, - errp); - aio_context_release(ctx); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - return child; - } -@@ -4650,9 +4650,9 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) - aio_context_release(ctx); - } - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - tran_commit(tran); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { - BlockDriverState *bs = bs_entry->state.bs; -@@ -4669,9 +4669,9 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) - goto cleanup; - - abort: -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - tran_abort(tran); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { - if (bs_entry->prepared) { -@@ -4852,12 +4852,12 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, - } - - bdrv_graph_rdunlock_main_loop(); -- bdrv_graph_wrlock(new_child_bs); -+ bdrv_graph_wrlock(); - - ret = bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing, - tran, errp); - -- bdrv_graph_wrunlock_ctx(ctx); -+ bdrv_graph_wrunlock(); - - if (old_ctx != ctx) { - aio_context_release(ctx); -@@ -5209,14 +5209,14 @@ static void bdrv_close(BlockDriverState *bs) - bs->drv = NULL; - } - -- bdrv_graph_wrlock(bs); -+ bdrv_graph_wrlock(); - QLIST_FOREACH_SAFE(child, &bs->children, next, next) { - bdrv_unref_child(bs, child); - } - - assert(!bs->backing); - assert(!bs->file); -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - - g_free(bs->opaque); - bs->opaque = NULL; -@@ -5509,9 +5509,9 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp) - bdrv_graph_rdunlock_main_loop(); - - bdrv_drained_begin(child_bs); -- bdrv_graph_wrlock(bs); -+ bdrv_graph_wrlock(); - ret = bdrv_replace_node_common(bs, child_bs, true, true, errp); -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(child_bs); - - return ret; -@@ -5561,7 +5561,7 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - aio_context_acquire(old_context); - new_context = NULL; - -- bdrv_graph_wrlock(bs_top); -+ bdrv_graph_wrlock(); - - child = bdrv_attach_child_noperm(bs_new, bs_top, "backing", - &child_of_bds, bdrv_backing_role(bs_new), -@@ -5593,7 +5593,7 @@ out: - tran_finalize(tran, ret); - - bdrv_refresh_limits(bs_top, NULL, NULL); -- bdrv_graph_wrunlock(bs_top); -+ bdrv_graph_wrunlock(); - - bdrv_drained_end(bs_top); - bdrv_drained_end(bs_new); -@@ -5620,7 +5620,7 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, - bdrv_ref(old_bs); - bdrv_drained_begin(old_bs); - bdrv_drained_begin(new_bs); -- bdrv_graph_wrlock(new_bs); -+ bdrv_graph_wrlock(); - - bdrv_replace_child_tran(child, new_bs, tran); - -@@ -5631,7 +5631,7 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, - - tran_finalize(tran, ret); - -- bdrv_graph_wrunlock(new_bs); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(old_bs); - bdrv_drained_end(new_bs); - bdrv_unref(old_bs); -@@ -5718,9 +5718,9 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, - bdrv_ref(bs); - bdrv_drained_begin(bs); - bdrv_drained_begin(new_node_bs); -- bdrv_graph_wrlock(new_node_bs); -+ bdrv_graph_wrlock(); - ret = bdrv_replace_node(bs, new_node_bs, errp); -- bdrv_graph_wrunlock(new_node_bs); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(new_node_bs); - bdrv_drained_end(bs); - bdrv_unref(bs); -@@ -5975,7 +5975,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, - - bdrv_ref(top); - bdrv_drained_begin(base); -- bdrv_graph_wrlock(base); -+ bdrv_graph_wrlock(); - - if (!top->drv || !base->drv) { - goto exit_wrlock; -@@ -6015,7 +6015,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, - * That's a FIXME. - */ - bdrv_replace_node_common(top, base, false, false, &local_err); -- bdrv_graph_wrunlock(base); -+ bdrv_graph_wrunlock(); - - if (local_err) { - error_report_err(local_err); -@@ -6052,7 +6052,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, - goto exit; - - exit_wrlock: -- bdrv_graph_wrunlock(base); -+ bdrv_graph_wrunlock(); - exit: - bdrv_drained_end(base); - bdrv_unref(top); -diff --git a/block/backup.c b/block/backup.c -index 8aae5836d7..ec29d6b810 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -496,10 +496,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, - block_copy_set_speed(bcs, speed); - - /* Required permissions are taken by copy-before-write filter target */ -- bdrv_graph_wrlock(target); -+ bdrv_graph_wrlock(); - block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, - &error_abort); -- bdrv_graph_wrunlock(target); -+ bdrv_graph_wrunlock(); - - return &job->common; - -diff --git a/block/blklogwrites.c b/block/blklogwrites.c -index 3678f6cf42..7207b2e757 100644 ---- a/block/blklogwrites.c -+++ b/block/blklogwrites.c -@@ -251,9 +251,9 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, - ret = 0; - fail_log: - if (ret < 0) { -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, s->log_file); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - s->log_file = NULL; - } - fail: -@@ -265,10 +265,10 @@ static void blk_log_writes_close(BlockDriverState *bs) - { - BDRVBlkLogWritesState *s = bs->opaque; - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, s->log_file); - s->log_file = NULL; -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - } - - static int64_t coroutine_fn GRAPH_RDLOCK -diff --git a/block/blkverify.c b/block/blkverify.c -index 9b17c46644..ec45d8335e 100644 ---- a/block/blkverify.c -+++ b/block/blkverify.c -@@ -151,10 +151,10 @@ static void blkverify_close(BlockDriverState *bs) - { - BDRVBlkverifyState *s = bs->opaque; - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, s->test_file); - s->test_file = NULL; -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - } - - static int64_t coroutine_fn GRAPH_RDLOCK -diff --git a/block/block-backend.c b/block/block-backend.c -index ec21148806..abac4e0235 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -889,7 +889,6 @@ void blk_remove_bs(BlockBackend *blk) - { - ThrottleGroupMember *tgm = &blk->public.throttle_group_member; - BdrvChild *root; -- AioContext *ctx; - - GLOBAL_STATE_CODE(); - -@@ -919,10 +918,9 @@ void blk_remove_bs(BlockBackend *blk) - root = blk->root; - blk->root = NULL; - -- ctx = bdrv_get_aio_context(root->bs); -- bdrv_graph_wrlock(root->bs); -+ bdrv_graph_wrlock(); - bdrv_root_unref_child(root); -- bdrv_graph_wrunlock_ctx(ctx); -+ bdrv_graph_wrunlock(); - } - - /* -@@ -933,16 +931,15 @@ void blk_remove_bs(BlockBackend *blk) - int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) - { - ThrottleGroupMember *tgm = &blk->public.throttle_group_member; -- AioContext *ctx = bdrv_get_aio_context(bs); - - GLOBAL_STATE_CODE(); - bdrv_ref(bs); -- bdrv_graph_wrlock(bs); -+ bdrv_graph_wrlock(); - blk->root = bdrv_root_attach_child(bs, "root", &child_root, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - blk->perm, blk->shared_perm, - blk, errp); -- bdrv_graph_wrunlock_ctx(ctx); -+ bdrv_graph_wrunlock(); - if (blk->root == NULL) { - return -EPERM; - } -diff --git a/block/commit.c b/block/commit.c -index 69cc75be0c..1dd7a65ffb 100644 ---- a/block/commit.c -+++ b/block/commit.c -@@ -100,9 +100,9 @@ static void commit_abort(Job *job) - bdrv_graph_rdunlock_main_loop(); - - bdrv_drained_begin(commit_top_backing_bs); -- bdrv_graph_wrlock(commit_top_backing_bs); -+ bdrv_graph_wrlock(); - bdrv_replace_node(s->commit_top_bs, commit_top_backing_bs, &error_abort); -- bdrv_graph_wrunlock(commit_top_backing_bs); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(commit_top_backing_bs); - - bdrv_unref(s->commit_top_bs); -@@ -339,7 +339,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, - * this is the responsibility of the interface (i.e. whoever calls - * commit_start()). - */ -- bdrv_graph_wrlock(top); -+ bdrv_graph_wrlock(); - s->base_overlay = bdrv_find_overlay(top, base); - assert(s->base_overlay); - -@@ -370,19 +370,19 @@ void commit_start(const char *job_id, BlockDriverState *bs, - ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, - iter_shared_perms, errp); - if (ret < 0) { -- bdrv_graph_wrunlock(top); -+ bdrv_graph_wrunlock(); - goto fail; - } - } - - if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) { -- bdrv_graph_wrunlock(top); -+ bdrv_graph_wrunlock(); - goto fail; - } - s->chain_frozen = true; - - ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp); -- bdrv_graph_wrunlock(top); -+ bdrv_graph_wrunlock(); - - if (ret < 0) { - goto fail; -@@ -434,9 +434,9 @@ fail: - * otherwise this would fail because of lack of permissions. */ - if (commit_top_bs) { - bdrv_drained_begin(top); -- bdrv_graph_wrlock(top); -+ bdrv_graph_wrlock(); - bdrv_replace_node(commit_top_bs, top, &error_abort); -- bdrv_graph_wrunlock(top); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(top); - } - } -diff --git a/block/graph-lock.c b/block/graph-lock.c -index 079e878d9b..c81162b147 100644 ---- a/block/graph-lock.c -+++ b/block/graph-lock.c -@@ -106,27 +106,12 @@ static uint32_t reader_count(void) - return rd; - } - --void no_coroutine_fn bdrv_graph_wrlock(BlockDriverState *bs) -+void no_coroutine_fn bdrv_graph_wrlock(void) - { -- AioContext *ctx = NULL; -- - GLOBAL_STATE_CODE(); - assert(!qatomic_read(&has_writer)); - assert(!qemu_in_coroutine()); - -- /* -- * Release only non-mainloop AioContext. The mainloop often relies on the -- * BQL and doesn't lock the main AioContext before doing things. -- */ -- if (bs) { -- ctx = bdrv_get_aio_context(bs); -- if (ctx != qemu_get_aio_context()) { -- aio_context_release(ctx); -- } else { -- ctx = NULL; -- } -- } -- - /* Make sure that constantly arriving new I/O doesn't cause starvation */ - bdrv_drain_all_begin_nopoll(); - -@@ -155,27 +140,13 @@ void no_coroutine_fn bdrv_graph_wrlock(BlockDriverState *bs) - } while (reader_count() >= 1); - - bdrv_drain_all_end(); -- -- if (ctx) { -- aio_context_acquire(bdrv_get_aio_context(bs)); -- } - } - --void no_coroutine_fn bdrv_graph_wrunlock_ctx(AioContext *ctx) -+void no_coroutine_fn bdrv_graph_wrunlock(void) - { - GLOBAL_STATE_CODE(); - assert(qatomic_read(&has_writer)); - -- /* -- * Release only non-mainloop AioContext. The mainloop often relies on the -- * BQL and doesn't lock the main AioContext before doing things. -- */ -- if (ctx && ctx != qemu_get_aio_context()) { -- aio_context_release(ctx); -- } else { -- ctx = NULL; -- } -- - WITH_QEMU_LOCK_GUARD(&aio_context_list_lock) { - /* - * No need for memory barriers, this works in pair with -@@ -197,17 +168,6 @@ void no_coroutine_fn bdrv_graph_wrunlock_ctx(AioContext *ctx) - * progress. - */ - aio_bh_poll(qemu_get_aio_context()); -- -- if (ctx) { -- aio_context_acquire(ctx); -- } --} -- --void no_coroutine_fn bdrv_graph_wrunlock(BlockDriverState *bs) --{ -- AioContext *ctx = bs ? bdrv_get_aio_context(bs) : NULL; -- -- bdrv_graph_wrunlock_ctx(ctx); - } - - void coroutine_fn bdrv_graph_co_rdlock(void) -diff --git a/block/mirror.c b/block/mirror.c -index cd9d3ad4a8..51f9e2f17c 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -764,7 +764,7 @@ static int mirror_exit_common(Job *job) - * check for an op blocker on @to_replace, and we have our own - * there. - */ -- bdrv_graph_wrlock(target_bs); -+ bdrv_graph_wrlock(); - if (bdrv_recurse_can_replace(src, to_replace)) { - bdrv_replace_node(to_replace, target_bs, &local_err); - } else { -@@ -773,7 +773,7 @@ static int mirror_exit_common(Job *job) - "would not lead to an abrupt change of visible data", - to_replace->node_name, target_bs->node_name); - } -- bdrv_graph_wrunlock(target_bs); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(to_replace); - if (local_err) { - error_report_err(local_err); -@@ -796,9 +796,9 @@ static int mirror_exit_common(Job *job) - * valid. - */ - block_job_remove_all_bdrv(bjob); -- bdrv_graph_wrlock(mirror_top_bs); -+ bdrv_graph_wrlock(); - bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort); -- bdrv_graph_wrunlock(mirror_top_bs); -+ bdrv_graph_wrunlock(); - - bdrv_drained_end(target_bs); - bdrv_unref(target_bs); -@@ -1914,13 +1914,13 @@ static BlockJob *mirror_start_job( - */ - bdrv_disable_dirty_bitmap(s->dirty_bitmap); - -- bdrv_graph_wrlock(bs); -+ bdrv_graph_wrlock(); - ret = block_job_add_bdrv(&s->common, "source", bs, 0, - BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE | - BLK_PERM_CONSISTENT_READ, - errp); - if (ret < 0) { -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - goto fail; - } - -@@ -1965,17 +1965,17 @@ static BlockJob *mirror_start_job( - ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, - iter_shared_perms, errp); - if (ret < 0) { -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - goto fail; - } - } - - if (bdrv_freeze_backing_chain(mirror_top_bs, target, errp) < 0) { -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - goto fail; - } - } -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - - QTAILQ_INIT(&s->ops_in_flight); - -@@ -2001,12 +2001,12 @@ fail: - - bs_opaque->stop = true; - bdrv_drained_begin(bs); -- bdrv_graph_wrlock(bs); -+ bdrv_graph_wrlock(); - assert(mirror_top_bs->backing->bs == bs); - bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing, - &error_abort); - bdrv_replace_node(mirror_top_bs, bs, &error_abort); -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(bs); - - bdrv_unref(mirror_top_bs); -diff --git a/block/qcow2.c b/block/qcow2.c -index 7968735346..d91b7b91d3 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -2813,9 +2813,9 @@ qcow2_do_close(BlockDriverState *bs, bool close_data_file) - if (close_data_file && has_data_file(bs)) { - GLOBAL_STATE_CODE(); - bdrv_graph_rdunlock_main_loop(); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, s->data_file); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - s->data_file = NULL; - bdrv_graph_rdlock_main_loop(); - } -diff --git a/block/quorum.c b/block/quorum.c -index 505b8b3e18..db8fe891c4 100644 ---- a/block/quorum.c -+++ b/block/quorum.c -@@ -1037,14 +1037,14 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, - - close_exit: - /* cleanup on error */ -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - for (i = 0; i < s->num_children; i++) { - if (!opened[i]) { - continue; - } - bdrv_unref_child(bs, s->children[i]); - } -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - g_free(s->children); - g_free(opened); - exit: -@@ -1057,11 +1057,11 @@ static void quorum_close(BlockDriverState *bs) - BDRVQuorumState *s = bs->opaque; - int i; - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - for (i = 0; i < s->num_children; i++) { - bdrv_unref_child(bs, s->children[i]); - } -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - g_free(s->children); - } -diff --git a/block/replication.c b/block/replication.c -index 5ded5f1ca9..424b537ff7 100644 ---- a/block/replication.c -+++ b/block/replication.c -@@ -560,7 +560,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - return; - } - -- bdrv_graph_wrlock(bs); -+ bdrv_graph_wrlock(); - - bdrv_ref(hidden_disk->bs); - s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk", -@@ -568,7 +568,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - &local_err); - if (local_err) { - error_propagate(errp, local_err); -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - aio_context_release(aio_context); - return; - } -@@ -579,7 +579,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - BDRV_CHILD_DATA, &local_err); - if (local_err) { - error_propagate(errp, local_err); -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - aio_context_release(aio_context); - return; - } -@@ -592,7 +592,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - if (!top_bs || !bdrv_is_root_node(top_bs) || - !check_top_bs(top_bs, bs)) { - error_setg(errp, "No top_bs or it is invalid"); -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - reopen_backing_file(bs, false, NULL); - aio_context_release(aio_context); - return; -@@ -600,7 +600,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - bdrv_op_block_all(top_bs, s->blocker); - bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker); - -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - - s->backup_job = backup_job_create( - NULL, s->secondary_disk->bs, s->hidden_disk->bs, -@@ -691,12 +691,12 @@ static void replication_done(void *opaque, int ret) - if (ret == 0) { - s->stage = BLOCK_REPLICATION_DONE; - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, s->secondary_disk); - s->secondary_disk = NULL; - bdrv_unref_child(bs, s->hidden_disk); - s->hidden_disk = NULL; -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - s->error = 0; - } else { -diff --git a/block/snapshot.c b/block/snapshot.c -index ec8cf4810b..e486d3e205 100644 ---- a/block/snapshot.c -+++ b/block/snapshot.c -@@ -290,9 +290,9 @@ int bdrv_snapshot_goto(BlockDriverState *bs, - } - - /* .bdrv_open() will re-attach it */ -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, fallback); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp); - open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err); -diff --git a/block/stream.c b/block/stream.c -index 01fe7c0f16..048c2d282f 100644 ---- a/block/stream.c -+++ b/block/stream.c -@@ -99,9 +99,9 @@ static int stream_prepare(Job *job) - } - } - -- bdrv_graph_wrlock(s->target_bs); -+ bdrv_graph_wrlock(); - bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err); -- bdrv_graph_wrunlock(s->target_bs); -+ bdrv_graph_wrunlock(); - - /* - * This call will do I/O, so the graph can change again from here on. -@@ -366,10 +366,10 @@ void stream_start(const char *job_id, BlockDriverState *bs, - * already have our own plans. Also don't allow resize as the image size is - * queried only at the job start and then cached. - */ -- bdrv_graph_wrlock(bs); -+ bdrv_graph_wrlock(); - if (block_job_add_bdrv(&s->common, "active node", bs, 0, - basic_flags | BLK_PERM_WRITE, errp)) { -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - goto fail; - } - -@@ -389,11 +389,11 @@ void stream_start(const char *job_id, BlockDriverState *bs, - ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, - basic_flags, errp); - if (ret < 0) { -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - goto fail; - } - } -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - - s->base_overlay = base_overlay; - s->above_base = above_base; -diff --git a/block/vmdk.c b/block/vmdk.c -index d6971c7067..bf78e12383 100644 ---- a/block/vmdk.c -+++ b/block/vmdk.c -@@ -272,7 +272,7 @@ static void vmdk_free_extents(BlockDriverState *bs) - BDRVVmdkState *s = bs->opaque; - VmdkExtent *e; - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - for (i = 0; i < s->num_extents; i++) { - e = &s->extents[i]; - g_free(e->l1_table); -@@ -283,7 +283,7 @@ static void vmdk_free_extents(BlockDriverState *bs) - bdrv_unref_child(bs, e->file); - } - } -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - g_free(s->extents); - } -@@ -1247,9 +1247,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, - 0, 0, 0, 0, 0, &extent, errp); - if (ret < 0) { - bdrv_graph_rdunlock_main_loop(); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, extent_file); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - bdrv_graph_rdlock_main_loop(); - goto out; - } -@@ -1266,9 +1266,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, - g_free(buf); - if (ret) { - bdrv_graph_rdunlock_main_loop(); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, extent_file); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - bdrv_graph_rdlock_main_loop(); - goto out; - } -@@ -1277,9 +1277,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, - ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp); - if (ret) { - bdrv_graph_rdunlock_main_loop(); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, extent_file); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - bdrv_graph_rdlock_main_loop(); - goto out; - } -@@ -1287,9 +1287,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, - } else { - error_setg(errp, "Unsupported extent type '%s'", type); - bdrv_graph_rdunlock_main_loop(); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, extent_file); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - bdrv_graph_rdlock_main_loop(); - ret = -ENOTSUP; - goto out; -diff --git a/blockdev.c b/blockdev.c -index c91f49e7b6..9e1381169d 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1611,9 +1611,9 @@ static void external_snapshot_abort(void *opaque) - } - - bdrv_drained_begin(state->new_bs); -- bdrv_graph_wrlock(state->old_bs); -+ bdrv_graph_wrlock(); - bdrv_replace_node(state->new_bs, state->old_bs, &error_abort); -- bdrv_graph_wrunlock(state->old_bs); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(state->new_bs); - - bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */ -@@ -3657,7 +3657,7 @@ void qmp_x_blockdev_change(const char *parent, const char *child, - BlockDriverState *parent_bs, *new_bs = NULL; - BdrvChild *p_child; - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - - parent_bs = bdrv_lookup_bs(parent, parent, errp); - if (!parent_bs) { -@@ -3693,7 +3693,7 @@ void qmp_x_blockdev_change(const char *parent, const char *child, - } - - out: -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - } - - BlockJobInfoList *qmp_query_block_jobs(Error **errp) -diff --git a/blockjob.c b/blockjob.c -index b7a29052b9..7310412313 100644 ---- a/blockjob.c -+++ b/blockjob.c -@@ -199,7 +199,7 @@ void block_job_remove_all_bdrv(BlockJob *job) - * to process an already freed BdrvChild. - */ - aio_context_release(job->job.aio_context); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - aio_context_acquire(job->job.aio_context); - while (job->nodes) { - GSList *l = job->nodes; -@@ -212,7 +212,7 @@ void block_job_remove_all_bdrv(BlockJob *job) - - g_slist_free_1(l); - } -- bdrv_graph_wrunlock_ctx(job->job.aio_context); -+ bdrv_graph_wrunlock(); - } - - bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) -@@ -514,7 +514,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, - int ret; - GLOBAL_STATE_CODE(); - -- bdrv_graph_wrlock(bs); -+ bdrv_graph_wrlock(); - - if (job_id == NULL && !(flags & JOB_INTERNAL)) { - job_id = bdrv_get_device_name(bs); -@@ -523,7 +523,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, - job = job_create(job_id, &driver->job_driver, txn, bdrv_get_aio_context(bs), - flags, cb, opaque, errp); - if (job == NULL) { -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - return NULL; - } - -@@ -563,11 +563,11 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, - goto fail; - } - -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - return job; - - fail: -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - job_early_fail(&job->job); - return NULL; - } -diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h -index 22b5db1ed9..d7545e82d0 100644 ---- a/include/block/graph-lock.h -+++ b/include/block/graph-lock.h -@@ -110,34 +110,17 @@ void unregister_aiocontext(AioContext *ctx); - * - * The wrlock can only be taken from the main loop, with BQL held, as only the - * main loop is allowed to modify the graph. -- * -- * If @bs is non-NULL, its AioContext is temporarily released. -- * -- * This function polls. Callers must not hold the lock of any AioContext other -- * than the current one and the one of @bs. - */ - void no_coroutine_fn TSA_ACQUIRE(graph_lock) TSA_NO_TSA --bdrv_graph_wrlock(BlockDriverState *bs); -+bdrv_graph_wrlock(void); - - /* - * bdrv_graph_wrunlock: - * Write finished, reset global has_writer to 0 and restart - * all readers that are waiting. -- * -- * If @bs is non-NULL, its AioContext is temporarily released. -- */ --void no_coroutine_fn TSA_RELEASE(graph_lock) TSA_NO_TSA --bdrv_graph_wrunlock(BlockDriverState *bs); -- --/* -- * bdrv_graph_wrunlock_ctx: -- * Write finished, reset global has_writer to 0 and restart -- * all readers that are waiting. -- * -- * If @ctx is non-NULL, its lock is temporarily released. - */ - void no_coroutine_fn TSA_RELEASE(graph_lock) TSA_NO_TSA --bdrv_graph_wrunlock_ctx(AioContext *ctx); -+bdrv_graph_wrunlock(void); - - /* - * bdrv_graph_co_rdlock: -diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py -index a38e5833fb..38364fa557 100644 ---- a/scripts/block-coroutine-wrapper.py -+++ b/scripts/block-coroutine-wrapper.py -@@ -261,8 +261,8 @@ def gen_no_co_wrapper(func: FuncDecl) -> str: - graph_lock=' bdrv_graph_rdlock_main_loop();' - graph_unlock=' bdrv_graph_rdunlock_main_loop();' - elif func.graph_wrlock: -- graph_lock=' bdrv_graph_wrlock(NULL);' -- graph_unlock=' bdrv_graph_wrunlock(NULL);' -+ graph_lock=' bdrv_graph_wrlock();' -+ graph_unlock=' bdrv_graph_wrunlock();' - - return f"""\ - /* -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index 704d1a3f36..d9754dfebc 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -807,9 +807,9 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, - tjob->bs = src; - job = &tjob->common; - -- bdrv_graph_wrlock(target); -+ bdrv_graph_wrlock(); - block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort); -- bdrv_graph_wrunlock(target); -+ bdrv_graph_wrunlock(); - - switch (result) { - case TEST_JOB_SUCCESS: -@@ -991,11 +991,11 @@ static void bdrv_test_top_close(BlockDriverState *bs) - { - BdrvChild *c, *next_c; - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) { - bdrv_unref_child(bs, c); - } -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - } - - static int coroutine_fn GRAPH_RDLOCK -@@ -1085,10 +1085,10 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, - - null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &error_abort); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, - BDRV_CHILD_DATA, &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - /* This child will be the one to pass to requests through to, and - * it will stall until a drain occurs */ -@@ -1096,21 +1096,21 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, - &error_abort); - child_bs->total_sectors = 65536 >> BDRV_SECTOR_BITS; - /* Takes our reference to child_bs */ -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - tts->wait_child = bdrv_attach_child(bs, child_bs, "wait-child", - &child_of_bds, - BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY, - &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - /* This child is just there to be deleted - * (for detach_instead_of_delete == true) */ - null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &error_abort); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, BDRV_CHILD_DATA, - &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); - blk_insert_bs(blk, bs, &error_abort); -@@ -1193,14 +1193,14 @@ static void no_coroutine_fn detach_indirect_bh(void *opaque) - - bdrv_dec_in_flight(data->child_b->bs); - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(data->parent_b, data->child_b); - - bdrv_ref(data->c); - data->child_c = bdrv_attach_child(data->parent_b, data->c, "PB-C", - &child_of_bds, BDRV_CHILD_DATA, - &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - } - - static void coroutine_mixed_fn detach_by_parent_aio_cb(void *opaque, int ret) -@@ -1298,7 +1298,7 @@ static void TSA_NO_TSA test_detach_indirect(bool by_parent_cb) - /* Set child relationships */ - bdrv_ref(b); - bdrv_ref(a); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - child_b = bdrv_attach_child(parent_b, b, "PB-B", &child_of_bds, - BDRV_CHILD_DATA, &error_abort); - child_a = bdrv_attach_child(parent_b, a, "PB-A", &child_of_bds, -@@ -1308,7 +1308,7 @@ static void TSA_NO_TSA test_detach_indirect(bool by_parent_cb) - bdrv_attach_child(parent_a, a, "PA-A", - by_parent_cb ? &child_of_bds : &detach_by_driver_cb_class, - BDRV_CHILD_DATA, &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - g_assert_cmpint(parent_a->refcnt, ==, 1); - g_assert_cmpint(parent_b->refcnt, ==, 1); -@@ -1727,7 +1727,7 @@ static void test_drop_intermediate_poll(void) - * Establish the chain last, so the chain links are the first - * elements in the BDS.parents lists - */ -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - for (i = 0; i < 3; i++) { - if (i) { - /* Takes the reference to chain[i - 1] */ -@@ -1735,7 +1735,7 @@ static void test_drop_intermediate_poll(void) - &chain_child_class, BDRV_CHILD_COW, &error_abort); - } - } -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - job = block_job_create("job", &test_simple_job_driver, NULL, job_node, - 0, BLK_PERM_ALL, 0, 0, NULL, NULL, &error_abort); -@@ -1982,10 +1982,10 @@ static void do_test_replace_child_mid_drain(int old_drain_count, - new_child_bs->total_sectors = 1; - - bdrv_ref(old_child_bs); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds, - BDRV_CHILD_COW, &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - parent_s->setup_completed = true; - - for (i = 0; i < old_drain_count; i++) { -@@ -2016,9 +2016,9 @@ static void do_test_replace_child_mid_drain(int old_drain_count, - g_assert(parent_bs->quiesce_counter == old_drain_count); - bdrv_drained_begin(old_child_bs); - bdrv_drained_begin(new_child_bs); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_replace_node(old_child_bs, new_child_bs, &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(new_child_bs); - bdrv_drained_end(old_child_bs); - g_assert(parent_bs->quiesce_counter == new_drain_count); -diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c -index 074adcbb93..8ee6ef38d8 100644 ---- a/tests/unit/test-bdrv-graph-mod.c -+++ b/tests/unit/test-bdrv-graph-mod.c -@@ -137,10 +137,10 @@ static void test_update_perm_tree(void) - - blk_insert_bs(root, bs, &error_abort); - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_attach_child(filter, bs, "child", &child_of_bds, - BDRV_CHILD_DATA, &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - aio_context_acquire(qemu_get_aio_context()); - ret = bdrv_append(filter, bs, NULL); -@@ -206,11 +206,11 @@ static void test_should_update_child(void) - - bdrv_set_backing_hd(target, bs, &error_abort); - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - g_assert(target->backing->bs == bs); - bdrv_attach_child(filter, target, "target", &child_of_bds, - BDRV_CHILD_DATA, &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - aio_context_acquire(qemu_get_aio_context()); - bdrv_append(filter, bs, &error_abort); - aio_context_release(qemu_get_aio_context()); -@@ -248,7 +248,7 @@ static void test_parallel_exclusive_write(void) - bdrv_ref(base); - bdrv_ref(fl1); - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_attach_child(top, fl1, "backing", &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - &error_abort); -@@ -260,7 +260,7 @@ static void test_parallel_exclusive_write(void) - &error_abort); - - bdrv_replace_node(fl1, fl2, &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - bdrv_drained_end(fl2); - bdrv_drained_end(fl1); -@@ -367,7 +367,7 @@ static void test_parallel_perm_update(void) - */ - bdrv_ref(base); - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_attach_child(top, ws, "file", &child_of_bds, BDRV_CHILD_DATA, - &error_abort); - c_fl1 = bdrv_attach_child(ws, fl1, "first", &child_of_bds, -@@ -380,7 +380,7 @@ static void test_parallel_perm_update(void) - bdrv_attach_child(fl2, base, "backing", &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - /* Select fl1 as first child to be active */ - s->selected = c_fl1; -@@ -434,11 +434,11 @@ static void test_append_greedy_filter(void) - BlockDriverState *base = no_perm_node("base"); - BlockDriverState *fl = exclusive_writer_node("fl1"); - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_attach_child(top, base, "backing", &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - aio_context_acquire(qemu_get_aio_context()); - bdrv_append(fl, base, &error_abort); --- -2.39.3 - diff --git a/SOURCES/kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch b/SOURCES/kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch deleted file mode 100644 index 4fb4844..0000000 --- a/SOURCES/kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch +++ /dev/null @@ -1,94 +0,0 @@ -From a5b4eec5f456b1ca3fe753e1d76f96cf3f8914ef Mon Sep 17 00:00:00 2001 -From: David Hildenbrand -Date: Wed, 17 Jan 2024 14:55:53 +0100 -Subject: [PATCH 01/22] hv-balloon: use get_min_alignment() to express 32 GiB - alignment - -RH-Author: David Hildenbrand -RH-MergeRequest: 221: memory-device: reintroduce memory region size check -RH-Jira: RHEL-20341 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Igor Mammedov -RH-Commit: [1/2] cbe092fe549552928270892253b31cd8fe199825 - -https://issues.redhat.com/browse/RHEL-20341 - -Let's implement the get_min_alignment() callback for memory devices, and -copy for the device memory region the alignment of the host memory -region. This mimics what virtio-mem does, and allows for re-introducing -proper alignment checks for the memory region size (where we don't care -about additional device requirements) in memory device core. - -Message-ID: <20240117135554.787344-2-david@redhat.com> -Reviewed-by: Maciej S. Szmigiero -Signed-off-by: David Hildenbrand -(cherry picked from commit f77c5f38f49c71bc14cf1019ac92b0b95f572414) -Signed-off-by: David Hildenbrand ---- - hw/hyperv/hv-balloon.c | 37 +++++++++++++++++++++---------------- - 1 file changed, 21 insertions(+), 16 deletions(-) - -diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c -index 66f297c1d7..0829c495b0 100644 ---- a/hw/hyperv/hv-balloon.c -+++ b/hw/hyperv/hv-balloon.c -@@ -1476,22 +1476,7 @@ static void hv_balloon_ensure_mr(HvBalloon *balloon) - balloon->mr = g_new0(MemoryRegion, 1); - memory_region_init(balloon->mr, OBJECT(balloon), TYPE_HV_BALLOON, - memory_region_size(hostmem_mr)); -- -- /* -- * The VM can indicate an alignment up to 32 GiB. Memory device core can -- * usually only handle/guarantee 1 GiB alignment. The user will have to -- * specify a larger maxmem eventually. -- * -- * The memory device core will warn the user in case maxmem might have to be -- * increased and will fail plugging the device if there is not sufficient -- * space after alignment. -- * -- * TODO: we could do the alignment ourselves in a slightly bigger region. -- * But this feels better, although the warning might be annoying. Maybe -- * we can optimize that in the future (e.g., with such a device on the -- * cmdline place/size the device memory region differently. -- */ -- balloon->mr->align = MAX(32 * GiB, memory_region_get_alignment(hostmem_mr)); -+ balloon->mr->align = memory_region_get_alignment(hostmem_mr); - } - - static void hv_balloon_free_mr(HvBalloon *balloon) -@@ -1653,6 +1638,25 @@ static MemoryRegion *hv_balloon_md_get_memory_region(MemoryDeviceState *md, - return balloon->mr; - } - -+static uint64_t hv_balloon_md_get_min_alignment(const MemoryDeviceState *md) -+{ -+ /* -+ * The VM can indicate an alignment up to 32 GiB. Memory device core can -+ * usually only handle/guarantee 1 GiB alignment. The user will have to -+ * specify a larger maxmem eventually. -+ * -+ * The memory device core will warn the user in case maxmem might have to be -+ * increased and will fail plugging the device if there is not sufficient -+ * space after alignment. -+ * -+ * TODO: we could do the alignment ourselves in a slightly bigger region. -+ * But this feels better, although the warning might be annoying. Maybe -+ * we can optimize that in the future (e.g., with such a device on the -+ * cmdline place/size the device memory region differently. -+ */ -+ return 32 * GiB; -+} -+ - static void hv_balloon_md_fill_device_info(const MemoryDeviceState *md, - MemoryDeviceInfo *info) - { -@@ -1765,5 +1769,6 @@ static void hv_balloon_class_init(ObjectClass *klass, void *data) - mdc->get_memory_region = hv_balloon_md_get_memory_region; - mdc->decide_memslots = hv_balloon_decide_memslots; - mdc->get_memslots = hv_balloon_get_memslots; -+ mdc->get_min_alignment = hv_balloon_md_get_min_alignment; - mdc->fill_device_info = hv_balloon_md_fill_device_info; - } --- -2.39.3 - diff --git a/SOURCES/kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch b/SOURCES/kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch deleted file mode 100644 index 84f6108..0000000 --- a/SOURCES/kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch +++ /dev/null @@ -1,42 +0,0 @@ -From ceaee9c4372bbdc4196cb6808515047388f7aa26 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 21 Nov 2023 16:44:18 +0800 -Subject: [PATCH 039/101] hw/arm: Activate IOMMUFD for virt machines -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [38/67] 0a059ae661616e95eb8455e17f35774495cae8e7 (eauger1/centos-qemu-kvm) - -Signed-off-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 0970238343af45a8b547695bfc22f18d4eb7da7e) -Signed-off-by: Eric Auger ---- - hw/arm/Kconfig | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig -index 3ada335a24..660f49db49 100644 ---- a/hw/arm/Kconfig -+++ b/hw/arm/Kconfig -@@ -8,6 +8,7 @@ config ARM_VIRT - imply TPM_TIS_SYSBUS - imply TPM_TIS_I2C - imply NVDIMM -+ imply IOMMUFD - select ARM_GIC - select ACPI - select ARM_SMMUV3 --- -2.39.3 - diff --git a/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch b/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch deleted file mode 100644 index 76ab341..0000000 --- a/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch +++ /dev/null @@ -1,88 +0,0 @@ -From e670722b9a6460d41497688d820d5a9a9b51d8e9 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Tue, 9 Jan 2024 11:36:42 +1000 -Subject: [PATCH 001/101] hw/arm/virt: Add properties to disable high memory - regions - -RH-Author: Gavin Shan -RH-MergeRequest: 210: hw/arm/virt: Add properties to disable high memory regions -RH-Jira: RHEL-19738 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Auger -RH-Commit: [1/1] 4097ba5133a67126e30b84202cb40df4e019c5f4 - -Upstream: RHEL-only -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=57927352 - -There are 3 high memory regions for GICv3 or GICv4 redistributor, PCI -ECAM and PCI MMIO. Each of them has a property introduced by upstream -commit 6a48c64eec ("hw/arm/virt: Add properties to disable high memory -regions") so that the corresponding high memory region can be disabled. - -It's notable that another property ("compact-highmem") introduced by -upstream commit f40408a9fe ("hw/arm/virt: Add 'compact-highmem' property") -so that the compact high memory region layout during assignment can be -disabled, compatible to the old machine types. However, we don't have -the compatible issue since the compact high memory region layout is -always kept as disabled until RHEL9.2.0 machine type and onwards. - -Expose those 3 properties: "highmem-redists", "highmem-ecam" and -"highmem-mmio". The property "compact-highmem" is kept as hidden. - -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 24 +++++++++++++++++++++++- - 1 file changed, 23 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 5cab00b4cd..60f117f0d2 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2456,6 +2456,7 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) - - vms->highmem_compact = value; - } -+#endif /* disabled for RHEL */ - - static bool virt_get_highmem_redists(Object *obj, Error **errp) - { -@@ -2498,7 +2499,6 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) - - vms->highmem_mmio = value; - } --#endif /* disabled for RHEL */ - - static bool virt_get_its(Object *obj, Error **errp) - { -@@ -3521,6 +3521,28 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - "Set on/off to enable/disable using " - "physical address space above 32 bits"); - -+ object_class_property_add_bool(oc, "highmem-redists", -+ virt_get_highmem_redists, -+ virt_set_highmem_redists); -+ object_class_property_set_description(oc, "highmem-redists", -+ "Set on/off to enable/disable high " -+ "memory region for GICv3 or GICv4 " -+ "redistributor"); -+ -+ object_class_property_add_bool(oc, "highmem-ecam", -+ virt_get_highmem_ecam, -+ virt_set_highmem_ecam); -+ object_class_property_set_description(oc, "highmem-ecam", -+ "Set on/off to enable/disable high " -+ "memory region for PCI ECAM"); -+ -+ object_class_property_add_bool(oc, "highmem-mmio", -+ virt_get_highmem_mmio, -+ virt_set_highmem_mmio); -+ object_class_property_set_description(oc, "highmem-mmio", -+ "Set on/off to enable/disable high " -+ "memory region for PCI MMIO"); -+ - object_class_property_add_str(oc, "gic-version", virt_get_gic_version, - virt_set_gic_version); - object_class_property_set_description(oc, "gic-version", --- -2.39.3 - diff --git a/SOURCES/kvm-hw-arm-virt-Avoid-unexpected-warning-from-Linux-gues.patch b/SOURCES/kvm-hw-arm-virt-Avoid-unexpected-warning-from-Linux-gues.patch new file mode 100644 index 0000000..29991d5 --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Avoid-unexpected-warning-from-Linux-gues.patch @@ -0,0 +1,120 @@ +From 41c4083269ec772b406c6c57b496ca2011f928c7 Mon Sep 17 00:00:00 2001 +From: Zhenyu Zhang +Date: Tue, 9 Jul 2024 23:08:59 -0400 +Subject: [PATCH 2/2] hw/arm/virt: Avoid unexpected warning from Linux guest on + host with Fujitsu CPUs + +RH-Author: zhenyzha +RH-MergeRequest: 256: hw/arm/virt: Avoid unexpected warning from Linux guest on host with Fujitsu CPUs +RH-Jira: RHEL-39936 +RH-Acked-by: Gavin Shan +RH-Acked-by: Sebastian Ott +RH-Acked-by: Cornelia Huck +RH-Commit: [1/1] fdf156fd05b219a06e2e2ca409fff0f728c1e2cf (zhenyzha/qemu-kvm) + +JIRA: https://issues.redhat.com/browse/RHEL-39936 + +Multiple warning messages and corresponding backtraces are observed when Linux +guest is booted on the host with Fujitsu CPUs. One of them is shown as below. + +[ 0.032443] ------------[ cut here ]------------ +[ 0.032446] uart-pl011 9000000.pl011: ARCH_DMA_MINALIGN smaller than +CTR_EL0.CWG (128 < 256) +[ 0.032454] WARNING: CPU: 0 PID: 1 at arch/arm64/mm/dma-mapping.c:54 +arch_setup_dma_ops+0xbc/0xcc +[ 0.032470] Modules linked in: +[ 0.032475] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-452.el9.aarch64 +[ 0.032481] Hardware name: linux,dummy-virt (DT) +[ 0.032484] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +[ 0.032490] pc : arch_setup_dma_ops+0xbc/0xcc +[ 0.032496] lr : arch_setup_dma_ops+0xbc/0xcc +[ 0.032501] sp : ffff80008003b860 +[ 0.032503] x29: ffff80008003b860 x28: 0000000000000000 x27: ffffaae4b949049c +[ 0.032510] x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 +[ 0.032517] x23: 0000000000000100 x22: 0000000000000000 x21: 0000000000000000 +[ 0.032523] x20: 0000000100000000 x19: ffff2f06c02ea400 x18: ffffffffffffffff +[ 0.032529] x17: 00000000208a5f76 x16: 000000006589dbcb x15: ffffaae4ba071c89 +[ 0.032535] x14: 0000000000000000 x13: ffffaae4ba071c84 x12: 455f525443206e61 +[ 0.032541] x11: 68742072656c6c61 x10: 0000000000000029 x9 : ffffaae4b7d21da4 +[ 0.032547] x8 : 0000000000000029 x7 : 4c414e494d5f414d x6 : 0000000000000029 +[ 0.032553] x5 : 000000000000000f x4 : ffffaae4b9617a00 x3 : 0000000000000001 +[ 0.032558] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff2f06c029be40 +[ 0.032564] Call trace: +[ 0.032566] arch_setup_dma_ops+0xbc/0xcc +[ 0.032572] of_dma_configure_id+0x138/0x300 +[ 0.032591] amba_dma_configure+0x34/0xc0 +[ 0.032600] really_probe+0x78/0x3dc +[ 0.032614] __driver_probe_device+0x108/0x160 +[ 0.032619] driver_probe_device+0x44/0x114 +[ 0.032624] __device_attach_driver+0xb8/0x14c +[ 0.032629] bus_for_each_drv+0x88/0xe4 +[ 0.032634] __device_attach+0xb0/0x1e0 +[ 0.032638] device_initial_probe+0x18/0x20 +[ 0.032643] bus_probe_device+0xa8/0xb0 +[ 0.032648] device_add+0x4b4/0x6c0 +[ 0.032652] amba_device_try_add.part.0+0x48/0x360 +[ 0.032657] amba_device_add+0x104/0x144 +[ 0.032662] of_amba_device_create.isra.0+0x100/0x1c4 +[ 0.032666] of_platform_bus_create+0x294/0x35c +[ 0.032669] of_platform_populate+0x5c/0x150 +[ 0.032672] of_platform_default_populate_init+0xd0/0xec +[ 0.032697] do_one_initcall+0x4c/0x2e0 +[ 0.032701] do_initcalls+0x100/0x13c +[ 0.032707] kernel_init_freeable+0x1c8/0x21c +[ 0.032712] kernel_init+0x28/0x140 +[ 0.032731] ret_from_fork+0x10/0x20 +[ 0.032735] ---[ end trace 0000000000000000 ]--- + +In Linux, a check is applied to every device which is exposed through +device-tree node. The warning message is raised when the device isn't +DMA coherent and the cache line size is larger than ARCH_DMA_MINALIGN +(128 bytes). The cache line is sorted from CTR_EL0[CWG], which corresponds +to 256 bytes on the guest CPUs. The DMA coherent capability is claimed +through 'dma-coherent' in their device-tree nodes or parent nodes. +This happens even when the device doesn't implement or use DMA at all, +for legacy reasons. + +Fix the issue by adding 'dma-coherent' property to the device-tree root +node, meaning all devices are capable of DMA coherent by default. +This both suppresses the spurious kernel warnings and also guards +against possible future QEMU bugs where we add a DMA-capable device +and forget to mark it as dma-coherent. + +Signed-off-by: Zhenyu Zhang +Reviewed-by: Gavin Shan +Reviewed-by: Donald Dutile +Message-id: 20240612020506.307793-1-zhenyzha@redhat.com +[PMM: tweaked commit message] +Signed-off-by: Peter Maydell +(cherry picked from commit dda533087ad5559674ff486e7031c88dc01e0abd) +Signed-off-by: Zhenyu Zhang +--- + hw/arm/virt.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 3f0496cdb9..6ece67f11d 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -330,6 +330,17 @@ static void create_fdt(VirtMachineState *vms) + qemu_fdt_setprop_cell(fdt, "/", "#size-cells", 0x2); + qemu_fdt_setprop_string(fdt, "/", "model", "linux,dummy-virt"); + ++ /* ++ * For QEMU, all DMA is coherent. Advertising this in the root node ++ * has two benefits: ++ * ++ * - It avoids potential bugs where we forget to mark a DMA ++ * capable device as being dma-coherent ++ * - It avoids spurious warnings from the Linux kernel about ++ * devices which can't do DMA at all ++ */ ++ qemu_fdt_setprop(fdt, "/", "dma-coherent", NULL, 0); ++ + /* /chosen must exist for load_dtb to fill in necessary properties later */ + qemu_fdt_add_subnode(fdt, "/chosen"); + if (vms->dtb_randomness) { +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-arm-virt-Fix-compats.patch b/SOURCES/kvm-hw-arm-virt-Fix-compats.patch deleted file mode 100644 index 7e3af18..0000000 --- a/SOURCES/kvm-hw-arm-virt-Fix-compats.patch +++ /dev/null @@ -1,132 +0,0 @@ -From 3f58194f8642a71c47d91d3c00a34faf44ea2c11 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Wed, 3 Jan 2024 05:57:38 -0500 -Subject: [PATCH] hw/arm/virt: Fix compats - -RH-Author: Eric Auger -RH-MergeRequest: 209: hw/arm/virt: Fix compats -RH-Jira: RHEL-17168 -RH-Acked-by: Gavin Shan -RH-Acked-by: Sebastian Ott -RH-Commit: [1/1] bcdf6493bbd6d7b52b0b88ff44441d22aeddfde2 (eauger1/centos-qemu-kvm) - -arm_rhel_compat is not added for virt-rhel9.4.0 machine causing -the efi-virtio.rom to be looked for when instantiating a virtio-net-pci -device and it won't be found since not shipped on ARM. This is a -regression compared to 9.2. - -Actually we do not need any rom file for any virtio-net-pci variant -because edk2 already brings the functionality. So for 9.4 onwards, we -want to set romfiles to "" for all of them. - -However at the moment we apply arm_rhel_compat from the latest -rhel*_virt_options(). This is not aligned with the generic compat -usage which sets compats for a given machine type to accomodate for -changes that occured after its advent. Here we are somehow abusing -the compat infra to set general driver options that should apply for -all machines. On top of that this is really error prone and we have -forgotten to add arm_rhel_compat several times in the past. - -So let's introduce set_arm_rhel_compat() being called before any -*virt_options in the non abstract machine class. That way the setting -will apply to any machine type without any need to add it in any -future machine types. - -For < 9.4 machines we don't really care keeping non void romfiles -for transitional and non transitional devices because anyway this was -not working. So let's keep things simple and apply the new defaults for -all RHEL9 machine types. - -Finally, to follow the generic pattern we should set hw_compat_rhel_9_0 -in 9.0 machine as it is done on x86 or ccw. This has no consequence on -aarch64 because it only contains x86 stuff but that helps understanding -the consistency. - -Signed-off-by: Eric Auger ---- - hw/arm/virt.c | 43 +++++++++++++++++++++++++++++-------------- - 1 file changed, 29 insertions(+), 14 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 0b17c94ad7..5cab00b4cd 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -111,11 +111,39 @@ - DEFINE_VIRT_MACHINE_LATEST(major, minor, false) - #endif /* disabled for RHEL */ - -+/* -+ * This variable is for changes to properties that are RHEL specific, -+ * different to the current upstream and to be applied to the latest -+ * machine type. They may be overriden by older machine compats. -+ * -+ * virtio-net-pci variant romfiles are not needed because edk2 does -+ * fully support the pxe boot. Besides virtio romfiles are not shipped -+ * on rhel/aarch64. -+ */ -+GlobalProperty arm_rhel_compat[] = { -+ {"virtio-net-pci", "romfile", "" }, -+ {"virtio-net-pci-transitional", "romfile", "" }, -+ {"virtio-net-pci-non-transitional", "romfile", "" }, -+}; -+const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); -+ -+/* -+ * This cannot be called from the rhel_virt_class_init() because -+ * TYPE_RHEL_MACHINE is abstract and mc->compat_props g_ptr_array_new() -+ * only is called on virt-rhelm.n.s non abstract class init. -+ */ -+static void arm_rhel_compat_set(MachineClass *mc) -+{ -+ compat_props_add(mc->compat_props, arm_rhel_compat, -+ arm_rhel_compat_len); -+} -+ - #define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \ - static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \ - void *data) \ - { \ - MachineClass *mc = MACHINE_CLASS(oc); \ -+ arm_rhel_compat_set(mc); \ - rhel##m##n##s##_virt_options(mc); \ - mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \ - if (latest) { \ -@@ -139,19 +167,6 @@ - #define DEFINE_RHEL_MACHINE(major, minor, subminor) \ - DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false) - --/* This variable is for changes to properties that are RHEL specific, -- * different to the current upstream and to be applied to the latest -- * machine type. -- */ --GlobalProperty arm_rhel_compat[] = { -- { -- .driver = "virtio-net-pci", -- .property = "romfile", -- .value = "", -- }, --}; --const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); -- - /* Number of external interrupt lines to configure the GIC with */ - #define NUM_IRQS 256 - -@@ -3639,7 +3654,6 @@ static void rhel920_virt_options(MachineClass *mc) - { - rhel940_virt_options(mc); - -- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); -@@ -3653,6 +3667,7 @@ static void rhel900_virt_options(MachineClass *mc) - rhel920_virt_options(mc); - - compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); -+ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); - - /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ - vmc->no_tcg_lpa2 = true; --- -2.39.3 - diff --git a/SOURCES/kvm-hw-arm-virt-Fix-spurious-call-to-arm_virt_compat_set.patch b/SOURCES/kvm-hw-arm-virt-Fix-spurious-call-to-arm_virt_compat_set.patch new file mode 100644 index 0000000..8128a4e --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Fix-spurious-call-to-arm_virt_compat_set.patch @@ -0,0 +1,59 @@ +From e3360c415f7de923d27c3167260a93cb679afabe Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 6 May 2024 15:09:43 +0200 +Subject: [PATCH 1/2] hw/arm/virt: Fix spurious call to arm_virt_compat_set() + +RH-Author: Eric Auger +RH-MergeRequest: 238: hw/arm/virt: Fix spurious call to arm_virt_compat_set() +RH-Jira: RHEL-34945 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck +RH-Acked-by: Gavin Shan +RH-Commit: [1/1] a858a3e1dff12b28e14f7e4bd2b896a9f06eacbb (eauger1/centos-qemu-kvm) + +JIRA: https://issues.redhat.com/browse/RHEL-34945 +Status: RHEL-only + +Downstream, we apply arm_rhel_compat in place of arm_virt_compat. +This is done though arm_rhel_compat_set() transparently called in +DEFINE_RHEL_MACHINE_LATEST(). So there is no need to call +arm_virt_compat_set() in rhel_machine_class_init(). Besides +this triggers a "GLib: g_ptr_array_add: assertion 'rarray' failed" +warning. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index f1af9495c6..3f0496cdb9 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -85,6 +85,7 @@ + #include "hw/char/pl011.h" + #include "qemu/guest-random.h" + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static GlobalProperty arm_virt_compat[] = { + { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "48" }, + }; +@@ -101,7 +102,6 @@ static void arm_virt_compat_set(MachineClass *mc) + arm_virt_compat_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ + static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ + void *data) \ +@@ -3536,7 +3536,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + { + MachineClass *mc = MACHINE_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); +- arm_virt_compat_set(mc); + + mc->family = "virt-rhel-Z"; + mc->init = machvirt_init; +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch b/SOURCES/kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch deleted file mode 100644 index 4770a58..0000000 --- a/SOURCES/kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 4c1d07995a7afb6fae68a7e7a8b6b6c94fa0a7bb Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Mon, 12 Feb 2024 10:37:54 +0100 -Subject: [PATCH 5/6] hw/arm/virt: deprecate virt-rhel9.{0,2}.0 machine types - -RH-Author: Cornelia Huck -RH-MergeRequest: 225: hw/arm/virt: deprecate virt-rhel9.{0,2}.0 machine types -RH-Jira: RHEL-24988 -RH-Acked-by: Sebastian Ott -RH-Acked-by: Eric Auger -RH-Commit: [1/1] f15579db44808fa8a2d7bc01b3915aa59c064411 (cohuck/qemu-kvm-c9s) - -Jira: https://issues.redhat.com/browse/RHEL-24988 -Upstream: RHEL only - -We do not plan to support any machine types prior to 9.4.0; leave them -in, but mark as deprecated. - -Signed-off-by: Cornelia Huck ---- - hw/arm/virt.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 60f117f0d2..943c563391 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3679,6 +3679,10 @@ static void rhel920_virt_options(MachineClass *mc) - compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); -+ -+ /* RHEL 9.4 is the first supported release */ -+ mc->deprecation_reason = -+ "machine types for versions prior to 9.4 are deprecated"; - } - DEFINE_RHEL_MACHINE(9, 2, 0) - --- -2.39.3 - diff --git a/SOURCES/kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch b/SOURCES/kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch deleted file mode 100644 index 81c20e5..0000000 --- a/SOURCES/kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 7a6be312c11911bdd2ce82566be22a3e014947c2 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 21 Nov 2023 16:44:20 +0800 -Subject: [PATCH 041/101] hw/i386: Activate IOMMUFD for q35 machines -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [40/67] b15764ab24fd57389a8d219736613484acd7d29e (eauger1/centos-qemu-kvm) - -Signed-off-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 64ad06f6eba66c514477f490bcba409439a480d8) -Signed-off-by: Eric Auger ---- - hw/i386/Kconfig | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig -index 55850791df..a1846be6f7 100644 ---- a/hw/i386/Kconfig -+++ b/hw/i386/Kconfig -@@ -95,6 +95,7 @@ config Q35 - imply E1000E_PCI_EXPRESS - imply VMPORT - imply VMMOUSE -+ imply IOMMUFD - select PC_PCI - select PC_ACPI - select PCI_EXPRESS_Q35 --- -2.39.3 - diff --git a/SOURCES/kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch b/SOURCES/kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch new file mode 100644 index 0000000..ee2f88e --- /dev/null +++ b/SOURCES/kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch @@ -0,0 +1,73 @@ +From e74980be81d641736ea9d44d0fe9af02af63a220 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:40 -0500 +Subject: [PATCH 083/100] hw/i386: Add support for loading BIOS using + guest_memfd + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [83/91] 7b77d212ef7d83b66ad9d8348179ee84e64fb911 (bonzini/rhel-qemu-kvm) + +When guest_memfd is enabled, the BIOS is generally part of the initial +encrypted guest image and will be accessed as private guest memory. Add +the necessary changes to set up the associated RAM region with a +guest_memfd backend to allow for this. + +Current support centers around using -bios to load the BIOS data. +Support for loading the BIOS via pflash requires additional enablement +since those interfaces rely on the use of ROM memory regions which make +use of the KVM_MEM_READONLY memslot flag, which is not supported for +guest_memfd-backed memslots. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-29-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit fc7a69e177e4ba26d11fcf47b853f85115b35a11) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86-common.c | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c +index 35fe6eabea..6cbb76c25c 100644 +--- a/hw/i386/x86-common.c ++++ b/hw/i386/x86-common.c +@@ -969,8 +969,13 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + (bios_size % 65536) != 0) { + goto bios_error; + } +- memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, +- &error_fatal); ++ if (machine_require_guest_memfd(MACHINE(x86ms))) { ++ memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios", ++ bios_size, &error_fatal); ++ } else { ++ memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", ++ bios_size, &error_fatal); ++ } + if (sev_enabled()) { + /* + * The concept of a "reset" simply doesn't exist for +@@ -991,9 +996,11 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + } + g_free(filename); + +- /* map the last 128KB of the BIOS in ISA space */ +- x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, +- !isapc_ram_fw); ++ if (!machine_require_guest_memfd(MACHINE(x86ms))) { ++ /* map the last 128KB of the BIOS in ISA space */ ++ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, ++ !isapc_ram_fw); ++ } + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch b/SOURCES/kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch new file mode 100644 index 0000000..1fafe03 --- /dev/null +++ b/SOURCES/kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch @@ -0,0 +1,106 @@ +From c1e615d6b8f609b72a94ffe6d31a9848a41744ef Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Tue, 30 Apr 2024 17:06:39 +0200 +Subject: [PATCH 038/100] hw/i386: Have x86_bios_rom_init() take + X86MachineState rather than MachineState +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [38/91] 59f388b1dffc5d0aa2f0fff768194d755bc3efbb (bonzini/rhel-qemu-kvm) + +The function creates and leaks two MemoryRegion objects regarding the BIOS which +will be moved into X86MachineState in the next steps to avoid the leakage. + +Signed-off-by: Bernhard Beschow +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240430150643.111976-3-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 848351840148f8c3b53ddf6210194506547d3ffd) +Signed-off-by: Paolo Bonzini +--- + hw/i386/microvm.c | 2 +- + hw/i386/pc_sysfw.c | 4 ++-- + hw/i386/x86.c | 4 ++-- + include/hw/i386/x86.h | 2 +- + 4 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c +index 61a772dfe6..fec63cacfa 100644 +--- a/hw/i386/microvm.c ++++ b/hw/i386/microvm.c +@@ -278,7 +278,7 @@ static void microvm_devices_init(MicrovmMachineState *mms) + default_firmware = x86_machine_is_acpi_enabled(x86ms) + ? MICROVM_BIOS_FILENAME + : MICROVM_QBOOT_FILENAME; +- x86_bios_rom_init(MACHINE(mms), default_firmware, get_system_memory(), true); ++ x86_bios_rom_init(x86ms, default_firmware, get_system_memory(), true); + } + + static void microvm_memory_init(MicrovmMachineState *mms) +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 3efabbbab2..ef7dea9798 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -206,7 +206,7 @@ void pc_system_firmware_init(PCMachineState *pcms, + BlockBackend *pflash_blk[ARRAY_SIZE(pcms->flash)]; + + if (!pcmc->pci_enabled) { +- x86_bios_rom_init(MACHINE(pcms), "bios.bin", rom_memory, true); ++ x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, true); + return; + } + +@@ -227,7 +227,7 @@ void pc_system_firmware_init(PCMachineState *pcms, + + if (!pflash_blk[0]) { + /* Machine property pflash0 not set, use ROM mode */ +- x86_bios_rom_init(MACHINE(pcms), "bios.bin", rom_memory, false); ++ x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, false); + } else { + if (kvm_enabled() && !kvm_readonly_mem_enabled()) { + /* +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 2a4f3ee285..6d3c72f124 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1128,7 +1128,7 @@ void x86_load_linux(X86MachineState *x86ms, + nb_option_roms++; + } + +-void x86_bios_rom_init(MachineState *ms, const char *default_firmware, ++void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + MemoryRegion *rom_memory, bool isapc_ram_fw) + { + const char *bios_name; +@@ -1138,7 +1138,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, + ssize_t ret; + + /* BIOS load */ +- bios_name = ms->firmware ?: default_firmware; ++ bios_name = MACHINE(x86ms)->firmware ?: default_firmware; + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + if (filename) { + bios_size = get_image_size(filename); +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index 4dc30dcb4d..cb07618d19 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -116,7 +116,7 @@ void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, + void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); + +-void x86_bios_rom_init(MachineState *ms, const char *default_firmware, ++void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + MemoryRegion *rom_memory, bool isapc_ram_fw); + + void x86_load_linux(X86MachineState *x86ms, +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch b/SOURCES/kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch new file mode 100644 index 0000000..a789fb7 --- /dev/null +++ b/SOURCES/kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch @@ -0,0 +1,51 @@ +From 7bb1f124413891bc5d2187f12cd19da6e794904b Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 3 Apr 2024 10:59:53 -0400 +Subject: [PATCH 010/100] hw/i386/acpi: Set PCAT_COMPAT bit only when pic is + not disabled + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [10/91] 62110e4bf52cb3e106c8d2a902bbd31548beba00 (bonzini/rhel-qemu-kvm) + +A value 1 of PCAT_COMPAT (bit 0) of MADT.Flags indicates that the system +also has a PC-AT-compatible dual-8259 setup, i.e., the PIC. When PIC +is not enabled (pic=off) for x86 machine, the PCAT_COMPAT bit needs to +be cleared. The PIC probe should then print: + + [ 0.155970] Using NULL legacy PIC + +However, no such log printed in guest kernel unless PCAT_COMPAT is +cleared. + +Signed-off-by: Xiaoyao Li +Message-ID: <20240403145953.3082491-1-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 292dd287e78e0cbafde9d1522c729349d132d844) +Signed-off-by: Paolo Bonzini +--- + hw/i386/acpi-common.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/acpi-common.c b/hw/i386/acpi-common.c +index 20f19269da..0cc2919bb8 100644 +--- a/hw/i386/acpi-common.c ++++ b/hw/i386/acpi-common.c +@@ -107,7 +107,9 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker, + acpi_table_begin(&table, table_data); + /* Local APIC Address */ + build_append_int_noprefix(table_data, APIC_DEFAULT_ADDRESS, 4); +- build_append_int_noprefix(table_data, 1 /* PCAT_COMPAT */, 4); /* Flags */ ++ /* Flags. bit 0: PCAT_COMPAT */ ++ build_append_int_noprefix(table_data, ++ x86ms->pic != ON_OFF_AUTO_OFF ? 1 : 0 , 4); + + for (i = 0; i < apic_ids->len; i++) { + pc_madt_cpu_entry(i, apic_ids, table_data, false); +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-pc-Defer-smbios_set_defaults-to-machine_done.patch b/SOURCES/kvm-hw-i386-pc-Defer-smbios_set_defaults-to-machine_done.patch deleted file mode 100644 index 5470bdf..0000000 --- a/SOURCES/kvm-hw-i386-pc-Defer-smbios_set_defaults-to-machine_done.patch +++ /dev/null @@ -1,186 +0,0 @@ -From ea2e2368dcf4140be47288472f2c2a094358e0c7 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Thu, 8 Feb 2024 23:03:45 +0100 -Subject: [PATCH 03/20] hw/i386/pc: Defer smbios_set_defaults() to machine_done -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Igor Mammedov -RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS -RH-Jira: RHEL-21705 -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [1/18] 9d4c1d1a910fec7d310429d6fc0b10c798932db7 - -JIRA: https://issues.redhat.com/browse/RHEL-21705 - -commit: a0204a5ed091dfe79aced7ec8f3ce1931fd25816 -Author: Bernhard Beschow - - Handling most of smbios data generation in the machine_done notifier is similar - to how the ARM virt machine handles it which also calls smbios_set_defaults() - there. The result is that all pc machines are freed from explicitly worrying - about smbios setup. - - Signed-off-by: Bernhard Beschow - Reviewed-by: Philippe Mathieu-Daudé - Message-ID: <20240208220349.4948-6-shentey@gmail.com> - Signed-off-by: Philippe Mathieu-Daudé - -Conflicts: hw/i386/pc_q35.c, hw/i386/pc_piix.c - due to missing 4d3457fef9 (w/i386/pc: Merge pc_guest_info_init() into pc_machine_initfn()) - and different signature of smbios_set_defaults() downstream -Fixup: hw/i386/fw_cfg.c to account for downstream changes smbios_set_defaults() - -Signed-off-by: Igor Mammedov ---- - hw/i386/fw_cfg.c | 14 +++++++++++++- - hw/i386/fw_cfg.h | 3 ++- - hw/i386/pc.c | 2 +- - hw/i386/pc_piix.c | 12 ------------ - hw/i386/pc_q35.c | 11 ----------- - include/hw/i386/pc.h | 1 - - 6 files changed, 16 insertions(+), 27 deletions(-) - -diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c -index 7362daa45a..6a5466faf0 100644 ---- a/hw/i386/fw_cfg.c -+++ b/hw/i386/fw_cfg.c -@@ -48,15 +48,27 @@ const char *fw_cfg_arch_key_name(uint16_t key) - return NULL; - } - --void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg) -+void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) - { - #ifdef CONFIG_SMBIOS - uint8_t *smbios_tables, *smbios_anchor; - size_t smbios_tables_len, smbios_anchor_len; - struct smbios_phys_mem_area *mem_array; - unsigned i, array_count; -+ MachineState *ms = MACHINE(pcms); -+ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); -+ MachineClass *mc = MACHINE_GET_CLASS(pcms); - X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu); - -+ if (pcmc->smbios_defaults) { -+ /* These values are guest ABI, do not change */ -+ smbios_set_defaults("QEMU", mc->desc, mc->name, -+ pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, -+ pcmc->smbios_stream_product, -+ pcmc->smbios_stream_version, -+ pcms->smbios_entry_point_type); -+ } -+ - /* tell smbios about cpuid version and features */ - smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); - -diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h -index 86ca7c1c0c..1e1de6b4a3 100644 ---- a/hw/i386/fw_cfg.h -+++ b/hw/i386/fw_cfg.h -@@ -10,6 +10,7 @@ - #define HW_I386_FW_CFG_H - - #include "hw/boards.h" -+#include "hw/i386/pc.h" - #include "hw/nvram/fw_cfg.h" - - #define FW_CFG_IO_BASE 0x510 -@@ -22,7 +23,7 @@ - FWCfgState *fw_cfg_arch_create(MachineState *ms, - uint16_t boot_cpus, - uint16_t apic_id_limit); --void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg); -+void fw_cfg_build_smbios(PCMachineState *ms, FWCfgState *fw_cfg); - void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg); - void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg); - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index a1faa9e92c..16de2a59e8 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -847,7 +847,7 @@ void pc_machine_done(Notifier *notifier, void *data) - - acpi_setup(); - if (x86ms->fw_cfg) { -- fw_cfg_build_smbios(MACHINE(pcms), x86ms->fw_cfg); -+ fw_cfg_build_smbios(pcms, x86ms->fw_cfg); - fw_cfg_build_feature_control(MACHINE(pcms), x86ms->fw_cfg); - /* update FW_CFG_NB_CPUS to account for -device added CPUs */ - fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 09d02cc91f..7344b35cf1 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -36,7 +36,6 @@ - #include "hw/rtc/mc146818rtc.h" - #include "hw/southbridge/piix.h" - #include "hw/display/ramfb.h" --#include "hw/firmware/smbios.h" - #include "hw/pci/pci.h" - #include "hw/pci/pci_ids.h" - #include "hw/usb.h" -@@ -233,17 +232,6 @@ static void pc_init1(MachineState *machine, - - pc_guest_info_init(pcms); - -- if (pcmc->smbios_defaults) { -- MachineClass *mc = MACHINE_GET_CLASS(machine); -- /* These values are guest ABI, do not change */ -- smbios_set_defaults("Red Hat", "KVM", -- mc->desc, pcmc->smbios_legacy_mode, -- pcmc->smbios_uuid_encoded, -- pcmc->smbios_stream_product, -- pcmc->smbios_stream_version, -- pcms->smbios_entry_point_type); -- } -- - /* allocate ram and load rom/bios */ - if (!xen_enabled()) { - pc_memory_init(pcms, system_memory, rom_memory, hole64_size); -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index c6967e1846..9a22ff5dd6 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -45,7 +45,6 @@ - #include "hw/i386/amd_iommu.h" - #include "hw/i386/intel_iommu.h" - #include "hw/display/ramfb.h" --#include "hw/firmware/smbios.h" - #include "hw/ide/pci.h" - #include "hw/ide/ahci.h" - #include "hw/intc/ioapic.h" -@@ -201,16 +200,6 @@ static void pc_q35_init(MachineState *machine) - - pc_guest_info_init(pcms); - -- if (pcmc->smbios_defaults) { -- /* These values are guest ABI, do not change */ -- smbios_set_defaults("Red Hat", "KVM", -- mc->desc, pcmc->smbios_legacy_mode, -- pcmc->smbios_uuid_encoded, -- pcmc->smbios_stream_product, -- pcmc->smbios_stream_version, -- pcms->smbios_entry_point_type); -- } -- - /* create pci host bus */ - phb = OBJECT(qdev_new(TYPE_Q35_HOST_DEVICE)); - -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 37644ede7e..c286c10bc3 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -12,7 +12,6 @@ - #include "hw/hotplug.h" - #include "qom/object.h" - #include "hw/i386/sgx-epc.h" --#include "hw/firmware/smbios.h" - #include "hw/cxl/cxl.h" - - #define HPET_INTCAP "hpet-intcap" --- -2.39.3 - diff --git a/SOURCES/kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch b/SOURCES/kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch new file mode 100644 index 0000000..021db3d --- /dev/null +++ b/SOURCES/kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch @@ -0,0 +1,164 @@ +From fd6de3c5e97bdf13a39342fc71815a20c66867ae Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Wed, 8 May 2024 19:55:07 +0200 +Subject: [PATCH 043/100] hw/i386/pc_sysfw: Alias rather than copy isa-bios + region + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [43/91] f64dab2a091838a10a9b94e3d09ea11432b0809f (bonzini/rhel-qemu-kvm) + +In the -bios case the "isa-bios" memory region is an alias to the BIOS mapped +to the top of the 4G memory boundary. Do the same in the -pflash case, but only +for new machine versions for migration compatibility. This establishes common +behavior and makes pflash commands work in the "isa-bios" region which some +real-world legacy bioses rely on. + +Note that in the sev_enabled() case, the "isa-bios" memory region in the -pflash +case will now also point to encrypted memory, just like it already does in the +-bios case. + +When running `info mtree` before and after this commit with +`qemu-system-x86_64 -S -drive \ +if=pflash,format=raw,readonly=on,file=/usr/share/qemu/bios-256k.bin` and running +`diff -u before.mtree after.mtree` results in the following changes in the +memory tree: + +| --- before.mtree +| +++ after.mtree +| @@ -71,7 +71,7 @@ +| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci +| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem +| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom +| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios +| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff +| 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff +| 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff +| 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff +| @@ -108,7 +108,7 @@ +| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci +| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem +| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom +| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios +| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff +| 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff +| 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff +| 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff +| @@ -131,11 +131,14 @@ +| memory-region: pc.ram +| 0000000000000000-0000000007ffffff (prio 0, ram): pc.ram +| +| +memory-region: system.flash0 +| + 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0 +| + +| memory-region: pci +| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci +| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem +| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom +| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios +| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff +| +| memory-region: smram +| 00000000000a0000-00000000000bffff (prio 0, ram): alias smram-low @pc.ram 00000000000a0000-00000000000bffff + +Note that in both cases the "system" memory region contains the entry + + 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0 + +but the "system.flash0" memory region only appears standalone when "isa-bios" is +an alias. + +Signed-off-by: Bernhard Beschow +Message-ID: <20240508175507.22270-7-shentey@gmail.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a44ea3fa7f2aa1d809fdca1b84a52695b53d8ad0) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 1 + + hw/i386/pc_piix.c | 1 + + hw/i386/pc_q35.c | 1 + + hw/i386/pc_sysfw.c | 8 +++++++- + include/hw/i386/pc.h | 1 + + 5 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 1a34bc4522..660a59c63b 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1967,6 +1967,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + pcmc->has_reserved_memory = true; + pcmc->enforce_aligned_dimm = true; + pcmc->enforce_amd_1tb_hole = true; ++ pcmc->isa_bios_alias = true; + /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported + * to be used at the moment, 32K should be enough for a while. */ + pcmc->acpi_data_size = 0x20000 + 0x8000; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index bef3e8b73e..dbb7f2ed17 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -975,6 +975,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + m->alias = "pc"; + m->is_default = 1; + m->smp_props.prefer_sockets = true; ++ pcmc->isa_bios_alias = false; + } + + static void pc_init_rhel760(MachineState *machine) +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index dedc86eec9..f9900ad798 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -735,6 +735,7 @@ static void pc_q35_machine_rhel940_options(MachineClass *m) + m->desc = "RHEL-9.4.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.4.0"; ++ pcmc->isa_bios_alias = false; + + compat_props_add(m->compat_props, pc_rhel_9_5_compat, + pc_rhel_9_5_compat_len); +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 82d37cb376..ac88ad4eb9 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -135,6 +135,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + MemoryRegion *rom_memory) + { + X86MachineState *x86ms = X86_MACHINE(pcms); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + hwaddr total_size = 0; + int i; + BlockBackend *blk; +@@ -184,7 +185,12 @@ static void pc_system_flash_map(PCMachineState *pcms, + + if (i == 0) { + flash_mem = pflash_cfi01_get_memory(system_flash); +- pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); ++ if (pcmc->isa_bios_alias) { ++ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem, ++ true); ++ } else { ++ pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); ++ } + + /* Encrypt the pflash boot ROM */ + if (sev_enabled()) { +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 467e7fb52f..3f53ec73ac 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -122,6 +122,7 @@ struct PCMachineClass { + bool enforce_aligned_dimm; + bool broken_reserved_end; + bool enforce_amd_1tb_hole; ++ bool isa_bios_alias; + + /* generate legacy CPU hotplug AML */ + bool legacy_cpu_hotplug; +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch b/SOURCES/kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch new file mode 100644 index 0000000..4188fd3 --- /dev/null +++ b/SOURCES/kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch @@ -0,0 +1,53 @@ +From 9bf1d368c4b53139db39649833d475e097fc98d1 Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Mon, 22 Apr 2024 22:06:22 +0200 +Subject: [PATCH 039/100] hw/i386/pc_sysfw: Remove unused parameter from + pc_isa_bios_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [39/91] c0019dc2706a8e3f40486fd4a4c0dd1fbe23237b (bonzini/rhel-qemu-kvm) + +Signed-off-by: Bernhard Beschow +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240422200625.2768-2-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit f4b63768b91811cdcf1fb7b270587123251dfea5) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_sysfw.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index ef7dea9798..59c7a81692 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -41,8 +41,7 @@ + #define FLASH_SECTOR_SIZE 4096 + + static void pc_isa_bios_init(MemoryRegion *rom_memory, +- MemoryRegion *flash_mem, +- int ram_size) ++ MemoryRegion *flash_mem) + { + int isa_bios_size; + MemoryRegion *isa_bios; +@@ -186,7 +185,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + + if (i == 0) { + flash_mem = pflash_cfi01_get_memory(system_flash); +- pc_isa_bios_init(rom_memory, flash_mem, size); ++ pc_isa_bios_init(rom_memory, flash_mem); + + /* Encrypt the pflash boot ROM */ + if (sev_enabled()) { +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch b/SOURCES/kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch new file mode 100644 index 0000000..a543c79 --- /dev/null +++ b/SOURCES/kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch @@ -0,0 +1,158 @@ +From e6472ff46cbed97c2a238a8ef7d321351931333a Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:30 -0500 +Subject: [PATCH 070/100] hw/i386/sev: Add function to get SEV metadata from + OVMF header + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [70/91] ba818dade96119c8a51ca1fb222f4f69e2752396 (bonzini/rhel-qemu-kvm) + +A recent version of OVMF expanded the reset vector GUID list to add +SEV-specific metadata GUID. The SEV metadata describes the reserved +memory regions such as the secrets and CPUID page used during the SEV-SNP +guest launch. + +The pc_system_get_ovmf_sev_metadata_ptr() is used to retieve the SEV +metadata pointer from the OVMF GUID list. + +Signed-off-by: Brijesh Singh +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-19-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit f3c30c575d34122573b7370a7da5ca3a27dde481) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_sysfw.c | 4 ++++ + include/hw/i386/pc.h | 26 ++++++++++++++++++++++++++ + target/i386/sev-sysemu-stub.c | 4 ++++ + target/i386/sev.c | 32 ++++++++++++++++++++++++++++++++ + target/i386/sev.h | 2 ++ + 5 files changed, 68 insertions(+) + +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index ac88ad4eb9..9b8671c441 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -260,6 +260,10 @@ void x86_firmware_configure(void *ptr, int size) + pc_system_parse_ovmf_flash(ptr, size); + + if (sev_enabled()) { ++ ++ /* Copy the SEV metadata table (if it exists) */ ++ pc_system_parse_sev_metadata(ptr, size); ++ + ret = sev_es_save_reset_vector(ptr, size); + if (ret) { + error_report("failed to locate and/or save reset vector"); +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 3f53ec73ac..94b49310f5 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -167,6 +167,32 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level); + #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" + #define PCI_HOST_PROP_SMM_RANGES "smm-ranges" + ++typedef enum { ++ SEV_DESC_TYPE_UNDEF, ++ /* The section contains the region that must be validated by the VMM. */ ++ SEV_DESC_TYPE_SNP_SEC_MEM, ++ /* The section contains the SNP secrets page */ ++ SEV_DESC_TYPE_SNP_SECRETS, ++ /* The section contains address that can be used as a CPUID page */ ++ SEV_DESC_TYPE_CPUID, ++ ++} ovmf_sev_metadata_desc_type; ++ ++typedef struct __attribute__((__packed__)) OvmfSevMetadataDesc { ++ uint32_t base; ++ uint32_t len; ++ ovmf_sev_metadata_desc_type type; ++} OvmfSevMetadataDesc; ++ ++typedef struct __attribute__((__packed__)) OvmfSevMetadata { ++ uint8_t signature[4]; ++ uint32_t len; ++ uint32_t version; ++ uint32_t num_desc; ++ OvmfSevMetadataDesc descs[]; ++} OvmfSevMetadata; ++ ++OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void); + + void pc_pci_as_mapping_init(MemoryRegion *system_memory, + MemoryRegion *pci_address_space); +diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c +index 96e1c15cc3..fc1c57c411 100644 +--- a/target/i386/sev-sysemu-stub.c ++++ b/target/i386/sev-sysemu-stub.c +@@ -67,3 +67,7 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict) + { + monitor_printf(mon, "SEV is not available in this QEMU\n"); + } ++ ++void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size) ++{ ++} +diff --git a/target/i386/sev.c b/target/i386/sev.c +index e84e4395a5..17281bb2c7 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -597,6 +597,38 @@ SevCapability *qmp_query_sev_capabilities(Error **errp) + return sev_get_capabilities(errp); + } + ++static OvmfSevMetadata *ovmf_sev_metadata_table; ++ ++#define OVMF_SEV_META_DATA_GUID "dc886566-984a-4798-A75e-5585a7bf67cc" ++typedef struct __attribute__((__packed__)) OvmfSevMetadataOffset { ++ uint32_t offset; ++} OvmfSevMetadataOffset; ++ ++OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void) ++{ ++ return ovmf_sev_metadata_table; ++} ++ ++void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size) ++{ ++ OvmfSevMetadata *metadata; ++ OvmfSevMetadataOffset *data; ++ ++ if (!pc_system_ovmf_table_find(OVMF_SEV_META_DATA_GUID, (uint8_t **)&data, ++ NULL)) { ++ return; ++ } ++ ++ metadata = (OvmfSevMetadata *)(flash_ptr + flash_size - data->offset); ++ if (memcmp(metadata->signature, "ASEV", 4) != 0 || ++ metadata->len < sizeof(OvmfSevMetadata) || ++ metadata->len > flash_size - data->offset) { ++ return; ++ } ++ ++ ovmf_sev_metadata_table = g_memdup2(metadata, metadata->len); ++} ++ + static SevAttestationReport *sev_get_attestation_report(const char *mnonce, + Error **errp) + { +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 5dc4767b1e..cc12824dd6 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -66,4 +66,6 @@ int sev_inject_launch_secret(const char *hdr, const char *secret, + int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size); + void sev_es_set_reset_vector(CPUState *cpu); + ++void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size); ++ + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch b/SOURCES/kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch new file mode 100644 index 0000000..c5a7a28 --- /dev/null +++ b/SOURCES/kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch @@ -0,0 +1,165 @@ +From 226cf6c3d3e2fd1a35422043dbe0b73d1216df83 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:36 -0500 +Subject: [PATCH 073/100] hw/i386/sev: Add support to encrypt BIOS when SEV-SNP + is enabled + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [73/91] 844afd322c12c3e8992cf6ec692c94e70747bd0c (bonzini/rhel-qemu-kvm) + +As with SEV, an SNP guest requires that the BIOS be part of the initial +encrypted/measured guest payload. Extend sev_encrypt_flash() to handle +the SNP case and plumb through the GPA of the BIOS location since this +is needed for SNP. + +Signed-off-by: Brijesh Singh +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-25-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 77d1abd91e5352ad30ae2f83790f95fa6a3c0b6b) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_sysfw.c | 12 +++++++----- + hw/i386/x86-common.c | 2 +- + include/hw/i386/x86.h | 2 +- + target/i386/sev-sysemu-stub.c | 2 +- + target/i386/sev.c | 5 +++-- + target/i386/sev.h | 2 +- + 6 files changed, 14 insertions(+), 11 deletions(-) + +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 9b8671c441..7cdbafc8d2 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -148,6 +148,8 @@ static void pc_system_flash_map(PCMachineState *pcms, + assert(PC_MACHINE_GET_CLASS(pcms)->pci_enabled); + + for (i = 0; i < ARRAY_SIZE(pcms->flash); i++) { ++ hwaddr gpa; ++ + system_flash = pcms->flash[i]; + blk = pflash_cfi01_get_blk(system_flash); + if (!blk) { +@@ -177,11 +179,11 @@ static void pc_system_flash_map(PCMachineState *pcms, + } + + total_size += size; ++ gpa = 0x100000000ULL - total_size; /* where the flash is mapped */ + qdev_prop_set_uint32(DEVICE(system_flash), "num-blocks", + size / FLASH_SECTOR_SIZE); + sysbus_realize_and_unref(SYS_BUS_DEVICE(system_flash), &error_fatal); +- sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0, +- 0x100000000ULL - total_size); ++ sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0, gpa); + + if (i == 0) { + flash_mem = pflash_cfi01_get_memory(system_flash); +@@ -196,7 +198,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + if (sev_enabled()) { + flash_ptr = memory_region_get_ram_ptr(flash_mem); + flash_size = memory_region_size(flash_mem); +- x86_firmware_configure(flash_ptr, flash_size); ++ x86_firmware_configure(gpa, flash_ptr, flash_size); + } + } + } +@@ -249,7 +251,7 @@ void pc_system_firmware_init(PCMachineState *pcms, + pc_system_flash_cleanup_unused(pcms); + } + +-void x86_firmware_configure(void *ptr, int size) ++void x86_firmware_configure(hwaddr gpa, void *ptr, int size) + { + int ret; + +@@ -270,6 +272,6 @@ void x86_firmware_configure(void *ptr, int size) + exit(1); + } + +- sev_encrypt_flash(ptr, size, &error_fatal); ++ sev_encrypt_flash(gpa, ptr, size, &error_fatal); + } + } +diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c +index 67b03c913a..35fe6eabea 100644 +--- a/hw/i386/x86-common.c ++++ b/hw/i386/x86-common.c +@@ -981,7 +981,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + */ + void *ptr = memory_region_get_ram_ptr(&x86ms->bios); + load_image_size(filename, ptr, bios_size); +- x86_firmware_configure(ptr, bios_size); ++ x86_firmware_configure(0x100000000ULL - bios_size, ptr, bios_size); + } else { + memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); + ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index b006f16b8d..d43cb3908e 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -154,6 +154,6 @@ void ioapic_init_gsi(GSIState *gsi_state, Object *parent); + DeviceState *ioapic_init_secondary(GSIState *gsi_state); + + /* pc_sysfw.c */ +-void x86_firmware_configure(void *ptr, int size); ++void x86_firmware_configure(hwaddr gpa, void *ptr, int size); + + #endif +diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c +index fc1c57c411..d5bf886e79 100644 +--- a/target/i386/sev-sysemu-stub.c ++++ b/target/i386/sev-sysemu-stub.c +@@ -42,7 +42,7 @@ void qmp_sev_inject_launch_secret(const char *packet_header, const char *secret, + error_setg(errp, "SEV is not available in this QEMU"); + } + +-int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) ++int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + { + g_assert_not_reached(); + } +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 06401f0526..7b5c4b4874 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1484,7 +1484,7 @@ static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + int +-sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) ++sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + { + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + +@@ -1841,7 +1841,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + /* zero the excess data so the measurement can be reliably calculated */ + memset(padded_ht->padding, 0, sizeof(padded_ht->padding)); + +- if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) { ++ if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, ++ sizeof(*padded_ht), errp) < 0) { + ret = false; + } + +diff --git a/target/i386/sev.h b/target/i386/sev.h +index cc12824dd6..858005a119 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -59,7 +59,7 @@ uint32_t sev_get_cbit_position(void); + uint32_t sev_get_reduced_phys_bits(void); + bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp); + +-int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp); ++int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp); + int sev_inject_launch_secret(const char *hdr, const char *secret, + uint64_t gpa, Error **errp); + +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch b/SOURCES/kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch new file mode 100644 index 0000000..050a522 --- /dev/null +++ b/SOURCES/kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch @@ -0,0 +1,123 @@ +From a20b2e3e52b9589ac1abc8b9b818d526c86368cf Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:39 -0500 +Subject: [PATCH 082/100] hw/i386/sev: Use guest_memfd for legacy ROMs + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [82/91] a591e85e00c353009803b143c80852b8c9b1f15e (bonzini/rhel-qemu-kvm) + +Current SNP guest kernels will attempt to access these regions with +with C-bit set, so guest_memfd is needed to handle that. Otherwise, +kvm_convert_memory() will fail when the guest kernel tries to access it +and QEMU attempts to call KVM_SET_MEMORY_ATTRIBUTES to set these ranges +to private. + +Whether guests should actually try to access ROM regions in this way (or +need to deal with legacy ROM regions at all), is a separate issue to be +addressed on kernel side, but current SNP guest kernels will exhibit +this behavior and so this handling is needed to allow QEMU to continue +running existing SNP guest kernels. + +Signed-off-by: Michael Roth +[pankaj: Added sev_snp_enabled() check] +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-28-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 413a67450750e0459efeffc3db3ba9759c3e381c) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 14 ++++++++++---- + hw/i386/pc_sysfw.c | 19 +++++++++++++------ + 2 files changed, 23 insertions(+), 10 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 0aca0cc79e..b25d075b59 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -62,6 +62,7 @@ + #include "hw/mem/memory-device.h" + #include "e820_memory_layout.h" + #include "trace.h" ++#include "sev.h" + #include CONFIG_DEVICES + + #ifdef CONFIG_XEN_EMU +@@ -1173,10 +1174,15 @@ void pc_memory_init(PCMachineState *pcms, + pc_system_firmware_init(pcms, rom_memory); + + option_rom_mr = g_malloc(sizeof(*option_rom_mr)); +- memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, +- &error_fatal); +- if (pcmc->pci_enabled) { +- memory_region_set_readonly(option_rom_mr, true); ++ if (machine_require_guest_memfd(machine)) { ++ memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom", ++ PC_ROM_SIZE, &error_fatal); ++ } else { ++ memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, ++ &error_fatal); ++ if (pcmc->pci_enabled) { ++ memory_region_set_readonly(option_rom_mr, true); ++ } + } + memory_region_add_subregion_overlap(rom_memory, + PC_ROM_MIN_VGA, +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 7cdbafc8d2..ef80281d28 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -40,8 +40,8 @@ + + #define FLASH_SECTOR_SIZE 4096 + +-static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, +- MemoryRegion *flash_mem) ++static void pc_isa_bios_init(PCMachineState *pcms, MemoryRegion *isa_bios, ++ MemoryRegion *rom_memory, MemoryRegion *flash_mem) + { + int isa_bios_size; + uint64_t flash_size; +@@ -51,8 +51,13 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, + + /* map the last 128KB of the BIOS in ISA space */ + isa_bios_size = MIN(flash_size, 128 * KiB); +- memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, +- &error_fatal); ++ if (machine_require_guest_memfd(MACHINE(pcms))) { ++ memory_region_init_ram_guest_memfd(isa_bios, NULL, "isa-bios", ++ isa_bios_size, &error_fatal); ++ } else { ++ memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, ++ &error_fatal); ++ } + memory_region_add_subregion_overlap(rom_memory, + 0x100000 - isa_bios_size, + isa_bios, +@@ -65,7 +70,9 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, + ((uint8_t*)flash_ptr) + (flash_size - isa_bios_size), + isa_bios_size); + +- memory_region_set_readonly(isa_bios, true); ++ if (!machine_require_guest_memfd(current_machine)) { ++ memory_region_set_readonly(isa_bios, true); ++ } + } + + static PFlashCFI01 *pc_pflash_create(PCMachineState *pcms, +@@ -191,7 +198,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + x86_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem, + true); + } else { +- pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); ++ pc_isa_bios_init(pcms, &x86ms->isa_bios, rom_memory, flash_mem); + } + + /* Encrypt the pflash boot ROM */ +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch b/SOURCES/kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch new file mode 100644 index 0000000..7b03cb4 --- /dev/null +++ b/SOURCES/kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch @@ -0,0 +1,58 @@ +From 4331180aa09e44550ff8de781c618bae5e99bb70 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Tue, 9 Apr 2024 18:07:43 -0500 +Subject: [PATCH 025/100] hw/i386/sev: Use legacy SEV VM types for older + machine types + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [25/91] 8c73cd312736ccb0818b4d3216fd13712f21f3c9 (bonzini/rhel-qemu-kvm) + +Newer 9.1 machine types will default to using the KVM_SEV_INIT2 API for +creating SEV/SEV-ES going forward. However, this API results in guest +measurement changes which are generally not expected for users of these +older guest types and can cause disruption if they switch to a newer +QEMU/kernel version. Avoid this by continuing to use the older +KVM_SEV_INIT/KVM_SEV_ES_INIT APIs for older machine types. + +Signed-off-by: Michael Roth +Message-ID: <20240409230743.962513-4-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit ea7fbd37537b3a598335c21ccb2ea674630fc810) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 1 + + target/i386/sev.c | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index b9fde3cec1..1a34bc4522 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -351,6 +351,7 @@ const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + GlobalProperty pc_rhel_9_5_compat[] = { + /* pc_rhel_9_5_compat from pc_compat_pc_9_0 (backported from 9.1) */ + { TYPE_X86_CPU, "guest-phys-bits", "0" }, ++ { "sev-guest", "legacy-vm-type", "true" }, + }; + const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat); + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index f4ee317cb0..d30b68c11e 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1417,6 +1417,7 @@ sev_guest_instance_init(Object *obj) + object_property_add_uint32_ptr(obj, "reduced-phys-bits", + &sev->reduced_phys_bits, + OBJ_PROP_FLAG_READWRITE); ++ object_apply_compat_props(obj); + } + + /* sev guest info */ +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-split-x86.c-in-multiple-parts.patch b/SOURCES/kvm-hw-i386-split-x86.c-in-multiple-parts.patch new file mode 100644 index 0000000..40ca52b --- /dev/null +++ b/SOURCES/kvm-hw-i386-split-x86.c-in-multiple-parts.patch @@ -0,0 +1,2301 @@ +From bf2206fae2e640da9de7fc0648b4b90ad3ddfbe3 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 9 May 2024 19:00:41 +0200 +Subject: [PATCH 046/100] hw/i386: split x86.c in multiple parts + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [46/91] 3d6e8364aa9b691c25bdcf54a30b116da5d33874 (bonzini/rhel-qemu-kvm) + +Keep the basic X86MachineState definition in x86.c. Move out functions that +are only needed by other files: x86-common.c for the pc and microvm machines, +x86-cpu.c for those used by accelerator code. + +Signed-off-by: Paolo Bonzini +Reviewed-by: Zhao Liu +Message-ID: <20240509170044.190795-11-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit b061f0598b9231f7992aff4fcdf3f336f9747d11) +Signed-off-by: Paolo Bonzini +--- + hw/i386/meson.build | 4 +- + hw/i386/x86-common.c | 1007 +++++++++++++++++++++++++++++++++++++++ + hw/i386/x86-cpu.c | 97 ++++ + hw/i386/x86.c | 1052 +---------------------------------------- + include/hw/i386/x86.h | 6 +- + 5 files changed, 1113 insertions(+), 1053 deletions(-) + create mode 100644 hw/i386/x86-common.c + create mode 100644 hw/i386/x86-cpu.c + +diff --git a/hw/i386/meson.build b/hw/i386/meson.build +index d9da676038..3437da0aad 100644 +--- a/hw/i386/meson.build ++++ b/hw/i386/meson.build +@@ -4,6 +4,7 @@ i386_ss.add(files( + 'e820_memory_layout.c', + 'multiboot.c', + 'x86.c', ++ 'x86-cpu.c', + )) + + i386_ss.add(when: 'CONFIG_APIC', if_true: files('vapic.c')) +@@ -12,7 +13,7 @@ i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'), + i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'), + if_false: files('amd_iommu-stub.c')) + i386_ss.add(when: 'CONFIG_I440FX', if_true: files('pc_piix.c')) +-i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('microvm.c', 'acpi-microvm.c', 'microvm-dt.c')) ++i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('x86-common.c', 'microvm.c', 'acpi-microvm.c', 'microvm-dt.c')) + i386_ss.add(when: 'CONFIG_Q35', if_true: files('pc_q35.c')) + i386_ss.add(when: 'CONFIG_VMMOUSE', if_true: files('vmmouse.c')) + i386_ss.add(when: 'CONFIG_VMPORT', if_true: files('vmport.c')) +@@ -22,6 +23,7 @@ i386_ss.add(when: 'CONFIG_SGX', if_true: files('sgx-epc.c','sgx.c'), + + i386_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-common.c')) + i386_ss.add(when: 'CONFIG_PC', if_true: files( ++ 'x86-common.c', + 'pc.c', + 'pc_sysfw.c', + 'acpi-build.c', +diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c +new file mode 100644 +index 0000000000..67b03c913a +--- /dev/null ++++ b/hw/i386/x86-common.c +@@ -0,0 +1,1007 @@ ++/* ++ * Copyright (c) 2003-2004 Fabrice Bellard ++ * Copyright (c) 2019, 2024 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "qemu/osdep.h" ++#include "qemu/error-report.h" ++#include "qemu/cutils.h" ++#include "qemu/units.h" ++#include "qemu/datadir.h" ++#include "qapi/error.h" ++#include "sysemu/numa.h" ++#include "sysemu/sysemu.h" ++#include "sysemu/xen.h" ++#include "trace.h" ++ ++#include "hw/i386/x86.h" ++#include "target/i386/cpu.h" ++#include "hw/rtc/mc146818rtc.h" ++#include "target/i386/sev.h" ++ ++#include "hw/acpi/cpu_hotplug.h" ++#include "hw/irq.h" ++#include "hw/loader.h" ++#include "multiboot.h" ++#include "elf.h" ++#include "standard-headers/asm-x86/bootparam.h" ++#include CONFIG_DEVICES ++#include "kvm/kvm_i386.h" ++ ++#ifdef CONFIG_XEN_EMU ++#include "hw/xen/xen.h" ++#include "hw/i386/kvm/xen_evtchn.h" ++#endif ++ ++/* Physical Address of PVH entry point read from kernel ELF NOTE */ ++static size_t pvh_start_addr; ++ ++static void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp) ++{ ++ Object *cpu = object_new(MACHINE(x86ms)->cpu_type); ++ ++ if (!object_property_set_uint(cpu, "apic-id", apic_id, errp)) { ++ goto out; ++ } ++ qdev_realize(DEVICE(cpu), NULL, errp); ++ ++out: ++ object_unref(cpu); ++} ++ ++void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) ++{ ++ int i; ++ const CPUArchIdList *possible_cpus; ++ MachineState *ms = MACHINE(x86ms); ++ MachineClass *mc = MACHINE_GET_CLASS(x86ms); ++ ++ x86_cpu_set_default_version(default_cpu_version); ++ ++ /* ++ * Calculates the limit to CPU APIC ID values ++ * ++ * Limit for the APIC ID value, so that all ++ * CPU APIC IDs are < x86ms->apic_id_limit. ++ * ++ * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create(). ++ */ ++ x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, ++ ms->smp.max_cpus - 1) + 1; ++ ++ /* ++ * Can we support APIC ID 255 or higher? With KVM, that requires ++ * both in-kernel lapic and X2APIC userspace API. ++ * ++ * kvm_enabled() must go first to ensure that kvm_* references are ++ * not emitted for the linker to consume (kvm_enabled() is ++ * a literal `0` in configurations where kvm_* aren't defined) ++ */ ++ if (kvm_enabled() && x86ms->apic_id_limit > 255 && ++ kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) { ++ error_report("current -smp configuration requires kernel " ++ "irqchip and X2APIC API support."); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (kvm_enabled()) { ++ kvm_set_max_apic_id(x86ms->apic_id_limit); ++ } ++ ++ if (!kvm_irqchip_in_kernel()) { ++ apic_set_max_apic_id(x86ms->apic_id_limit); ++ } ++ ++ possible_cpus = mc->possible_cpu_arch_ids(ms); ++ for (i = 0; i < ms->smp.cpus; i++) { ++ x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); ++ } ++} ++ ++void x86_rtc_set_cpus_count(ISADevice *s, uint16_t cpus_count) ++{ ++ MC146818RtcState *rtc = MC146818_RTC(s); ++ ++ if (cpus_count > 0xff) { ++ /* ++ * If the number of CPUs can't be represented in 8 bits, the ++ * BIOS must use "FW_CFG_NB_CPUS". Set RTC field to 0 just ++ * to make old BIOSes fail more predictably. ++ */ ++ mc146818rtc_set_cmos_data(rtc, 0x5f, 0); ++ } else { ++ mc146818rtc_set_cmos_data(rtc, 0x5f, cpus_count - 1); ++ } ++} ++ ++static int x86_apic_cmp(const void *a, const void *b) ++{ ++ CPUArchId *apic_a = (CPUArchId *)a; ++ CPUArchId *apic_b = (CPUArchId *)b; ++ ++ return apic_a->arch_id - apic_b->arch_id; ++} ++ ++/* ++ * returns pointer to CPUArchId descriptor that matches CPU's apic_id ++ * in ms->possible_cpus->cpus, if ms->possible_cpus->cpus has no ++ * entry corresponding to CPU's apic_id returns NULL. ++ */ ++static CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx) ++{ ++ CPUArchId apic_id, *found_cpu; ++ ++ apic_id.arch_id = id; ++ found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus, ++ ms->possible_cpus->len, sizeof(*ms->possible_cpus->cpus), ++ x86_apic_cmp); ++ if (found_cpu && idx) { ++ *idx = found_cpu - ms->possible_cpus->cpus; ++ } ++ return found_cpu; ++} ++ ++void x86_cpu_plug(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ CPUArchId *found_cpu; ++ Error *local_err = NULL; ++ X86CPU *cpu = X86_CPU(dev); ++ X86MachineState *x86ms = X86_MACHINE(hotplug_dev); ++ ++ if (x86ms->acpi_dev) { ++ hotplug_handler_plug(x86ms->acpi_dev, dev, &local_err); ++ if (local_err) { ++ goto out; ++ } ++ } ++ ++ /* increment the number of CPUs */ ++ x86ms->boot_cpus++; ++ if (x86ms->rtc) { ++ x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); ++ } ++ if (x86ms->fw_cfg) { ++ fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); ++ } ++ ++ found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); ++ found_cpu->cpu = CPU(dev); ++out: ++ error_propagate(errp, local_err); ++} ++ ++void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ int idx = -1; ++ X86CPU *cpu = X86_CPU(dev); ++ X86MachineState *x86ms = X86_MACHINE(hotplug_dev); ++ ++ if (!x86ms->acpi_dev) { ++ error_setg(errp, "CPU hot unplug not supported without ACPI"); ++ return; ++ } ++ ++ x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); ++ assert(idx != -1); ++ if (idx == 0) { ++ error_setg(errp, "Boot CPU is unpluggable"); ++ return; ++ } ++ ++ hotplug_handler_unplug_request(x86ms->acpi_dev, dev, ++ errp); ++} ++ ++void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ CPUArchId *found_cpu; ++ Error *local_err = NULL; ++ X86CPU *cpu = X86_CPU(dev); ++ X86MachineState *x86ms = X86_MACHINE(hotplug_dev); ++ ++ hotplug_handler_unplug(x86ms->acpi_dev, dev, &local_err); ++ if (local_err) { ++ goto out; ++ } ++ ++ found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); ++ found_cpu->cpu = NULL; ++ qdev_unrealize(dev); ++ ++ /* decrement the number of CPUs */ ++ x86ms->boot_cpus--; ++ /* Update the number of CPUs in CMOS */ ++ x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); ++ fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); ++ out: ++ error_propagate(errp, local_err); ++} ++ ++void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ int idx; ++ CPUState *cs; ++ CPUArchId *cpu_slot; ++ X86CPUTopoIDs topo_ids; ++ X86CPU *cpu = X86_CPU(dev); ++ CPUX86State *env = &cpu->env; ++ MachineState *ms = MACHINE(hotplug_dev); ++ X86MachineState *x86ms = X86_MACHINE(hotplug_dev); ++ unsigned int smp_cores = ms->smp.cores; ++ unsigned int smp_threads = ms->smp.threads; ++ X86CPUTopoInfo topo_info; ++ ++ if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { ++ error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", ++ ms->cpu_type); ++ return; ++ } ++ ++ if (x86ms->acpi_dev) { ++ Error *local_err = NULL; ++ ++ hotplug_handler_pre_plug(HOTPLUG_HANDLER(x86ms->acpi_dev), dev, ++ &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ } ++ ++ init_topo_info(&topo_info, x86ms); ++ ++ env->nr_dies = ms->smp.dies; ++ ++ /* ++ * If APIC ID is not set, ++ * set it based on socket/die/core/thread properties. ++ */ ++ if (cpu->apic_id == UNASSIGNED_APIC_ID) { ++ int max_socket = (ms->smp.max_cpus - 1) / ++ smp_threads / smp_cores / ms->smp.dies; ++ ++ /* ++ * die-id was optional in QEMU 4.0 and older, so keep it optional ++ * if there's only one die per socket. ++ */ ++ if (cpu->die_id < 0 && ms->smp.dies == 1) { ++ cpu->die_id = 0; ++ } ++ ++ if (cpu->socket_id < 0) { ++ error_setg(errp, "CPU socket-id is not set"); ++ return; ++ } else if (cpu->socket_id > max_socket) { ++ error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u", ++ cpu->socket_id, max_socket); ++ return; ++ } ++ if (cpu->die_id < 0) { ++ error_setg(errp, "CPU die-id is not set"); ++ return; ++ } else if (cpu->die_id > ms->smp.dies - 1) { ++ error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", ++ cpu->die_id, ms->smp.dies - 1); ++ return; ++ } ++ if (cpu->core_id < 0) { ++ error_setg(errp, "CPU core-id is not set"); ++ return; ++ } else if (cpu->core_id > (smp_cores - 1)) { ++ error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u", ++ cpu->core_id, smp_cores - 1); ++ return; ++ } ++ if (cpu->thread_id < 0) { ++ error_setg(errp, "CPU thread-id is not set"); ++ return; ++ } else if (cpu->thread_id > (smp_threads - 1)) { ++ error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u", ++ cpu->thread_id, smp_threads - 1); ++ return; ++ } ++ ++ topo_ids.pkg_id = cpu->socket_id; ++ topo_ids.die_id = cpu->die_id; ++ topo_ids.core_id = cpu->core_id; ++ topo_ids.smt_id = cpu->thread_id; ++ cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); ++ } ++ ++ cpu_slot = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); ++ if (!cpu_slot) { ++ x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); ++ error_setg(errp, ++ "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" ++ " APIC ID %" PRIu32 ", valid index range 0:%d", ++ topo_ids.pkg_id, topo_ids.die_id, topo_ids.core_id, topo_ids.smt_id, ++ cpu->apic_id, ms->possible_cpus->len - 1); ++ return; ++ } ++ ++ if (cpu_slot->cpu) { ++ error_setg(errp, "CPU[%d] with APIC ID %" PRIu32 " exists", ++ idx, cpu->apic_id); ++ return; ++ } ++ ++ /* if 'address' properties socket-id/core-id/thread-id are not set, set them ++ * so that machine_query_hotpluggable_cpus would show correct values ++ */ ++ /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() ++ * once -smp refactoring is complete and there will be CPU private ++ * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ ++ x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); ++ if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) { ++ error_setg(errp, "property socket-id: %u doesn't match set apic-id:" ++ " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, ++ topo_ids.pkg_id); ++ return; ++ } ++ cpu->socket_id = topo_ids.pkg_id; ++ ++ if (cpu->die_id != -1 && cpu->die_id != topo_ids.die_id) { ++ error_setg(errp, "property die-id: %u doesn't match set apic-id:" ++ " 0x%x (die-id: %u)", cpu->die_id, cpu->apic_id, topo_ids.die_id); ++ return; ++ } ++ cpu->die_id = topo_ids.die_id; ++ ++ if (cpu->core_id != -1 && cpu->core_id != topo_ids.core_id) { ++ error_setg(errp, "property core-id: %u doesn't match set apic-id:" ++ " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, ++ topo_ids.core_id); ++ return; ++ } ++ cpu->core_id = topo_ids.core_id; ++ ++ if (cpu->thread_id != -1 && cpu->thread_id != topo_ids.smt_id) { ++ error_setg(errp, "property thread-id: %u doesn't match set apic-id:" ++ " 0x%x (thread-id: %u)", cpu->thread_id, cpu->apic_id, ++ topo_ids.smt_id); ++ return; ++ } ++ cpu->thread_id = topo_ids.smt_id; ++ ++ /* ++ * kvm_enabled() must go first to ensure that kvm_* references are ++ * not emitted for the linker to consume (kvm_enabled() is ++ * a literal `0` in configurations where kvm_* aren't defined) ++ */ ++ if (kvm_enabled() && hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && ++ !kvm_hv_vpindex_settable()) { ++ error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX"); ++ return; ++ } ++ ++ cs = CPU(cpu); ++ cs->cpu_index = idx; ++ ++ numa_cpu_pre_plug(cpu_slot, dev, errp); ++} ++ ++static long get_file_size(FILE *f) ++{ ++ long where, size; ++ ++ /* XXX: on Unix systems, using fstat() probably makes more sense */ ++ ++ where = ftell(f); ++ fseek(f, 0, SEEK_END); ++ size = ftell(f); ++ fseek(f, where, SEEK_SET); ++ ++ return size; ++} ++ ++void gsi_handler(void *opaque, int n, int level) ++{ ++ GSIState *s = opaque; ++ ++ trace_x86_gsi_interrupt(n, level); ++ switch (n) { ++ case 0 ... ISA_NUM_IRQS - 1: ++ if (s->i8259_irq[n]) { ++ /* Under KVM, Kernel will forward to both PIC and IOAPIC */ ++ qemu_set_irq(s->i8259_irq[n], level); ++ } ++ /* fall through */ ++ case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1: ++#ifdef CONFIG_XEN_EMU ++ /* ++ * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC ++ * routing actually works properly under Xen). And then to ++ * *either* the PIRQ handling or the I/OAPIC depending on ++ * whether the former wants it. ++ */ ++ if (xen_mode == XEN_EMULATE && xen_evtchn_set_gsi(n, level)) { ++ break; ++ } ++#endif ++ qemu_set_irq(s->ioapic_irq[n], level); ++ break; ++ case IO_APIC_SECONDARY_IRQBASE ++ ... IO_APIC_SECONDARY_IRQBASE + IOAPIC_NUM_PINS - 1: ++ qemu_set_irq(s->ioapic2_irq[n - IO_APIC_SECONDARY_IRQBASE], level); ++ break; ++ } ++} ++ ++void ioapic_init_gsi(GSIState *gsi_state, Object *parent) ++{ ++ DeviceState *dev; ++ SysBusDevice *d; ++ unsigned int i; ++ ++ assert(parent); ++ if (kvm_ioapic_in_kernel()) { ++ dev = qdev_new(TYPE_KVM_IOAPIC); ++ } else { ++ dev = qdev_new(TYPE_IOAPIC); ++ } ++ object_property_add_child(parent, "ioapic", OBJECT(dev)); ++ d = SYS_BUS_DEVICE(dev); ++ sysbus_realize_and_unref(d, &error_fatal); ++ sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS); ++ ++ for (i = 0; i < IOAPIC_NUM_PINS; i++) { ++ gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i); ++ } ++} ++ ++DeviceState *ioapic_init_secondary(GSIState *gsi_state) ++{ ++ DeviceState *dev; ++ SysBusDevice *d; ++ unsigned int i; ++ ++ dev = qdev_new(TYPE_IOAPIC); ++ d = SYS_BUS_DEVICE(dev); ++ sysbus_realize_and_unref(d, &error_fatal); ++ sysbus_mmio_map(d, 0, IO_APIC_SECONDARY_ADDRESS); ++ ++ for (i = 0; i < IOAPIC_NUM_PINS; i++) { ++ gsi_state->ioapic2_irq[i] = qdev_get_gpio_in(dev, i); ++ } ++ return dev; ++} ++ ++/* ++ * The entry point into the kernel for PVH boot is different from ++ * the native entry point. The PVH entry is defined by the x86/HVM ++ * direct boot ABI and is available in an ELFNOTE in the kernel binary. ++ * ++ * This function is passed to load_elf() when it is called from ++ * load_elfboot() which then additionally checks for an ELF Note of ++ * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to ++ * parse the PVH entry address from the ELF Note. ++ * ++ * Due to trickery in elf_opts.h, load_elf() is actually available as ++ * load_elf32() or load_elf64() and this routine needs to be able ++ * to deal with being called as 32 or 64 bit. ++ * ++ * The address of the PVH entry point is saved to the 'pvh_start_addr' ++ * global variable. (although the entry point is 32-bit, the kernel ++ * binary can be either 32-bit or 64-bit). ++ */ ++static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64) ++{ ++ size_t *elf_note_data_addr; ++ ++ /* Check if ELF Note header passed in is valid */ ++ if (arg1 == NULL) { ++ return 0; ++ } ++ ++ if (is64) { ++ struct elf64_note *nhdr64 = (struct elf64_note *)arg1; ++ uint64_t nhdr_size64 = sizeof(struct elf64_note); ++ uint64_t phdr_align = *(uint64_t *)arg2; ++ uint64_t nhdr_namesz = nhdr64->n_namesz; ++ ++ elf_note_data_addr = ++ ((void *)nhdr64) + nhdr_size64 + ++ QEMU_ALIGN_UP(nhdr_namesz, phdr_align); ++ ++ pvh_start_addr = *elf_note_data_addr; ++ } else { ++ struct elf32_note *nhdr32 = (struct elf32_note *)arg1; ++ uint32_t nhdr_size32 = sizeof(struct elf32_note); ++ uint32_t phdr_align = *(uint32_t *)arg2; ++ uint32_t nhdr_namesz = nhdr32->n_namesz; ++ ++ elf_note_data_addr = ++ ((void *)nhdr32) + nhdr_size32 + ++ QEMU_ALIGN_UP(nhdr_namesz, phdr_align); ++ ++ pvh_start_addr = *(uint32_t *)elf_note_data_addr; ++ } ++ ++ return pvh_start_addr; ++} ++ ++static bool load_elfboot(const char *kernel_filename, ++ int kernel_file_size, ++ uint8_t *header, ++ size_t pvh_xen_start_addr, ++ FWCfgState *fw_cfg) ++{ ++ uint32_t flags = 0; ++ uint32_t mh_load_addr = 0; ++ uint32_t elf_kernel_size = 0; ++ uint64_t elf_entry; ++ uint64_t elf_low, elf_high; ++ int kernel_size; ++ ++ if (ldl_p(header) != 0x464c457f) { ++ return false; /* no elfboot */ ++ } ++ ++ bool elf_is64 = header[EI_CLASS] == ELFCLASS64; ++ flags = elf_is64 ? ++ ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags; ++ ++ if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */ ++ error_report("elfboot unsupported flags = %x", flags); ++ exit(1); ++ } ++ ++ uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY; ++ kernel_size = load_elf(kernel_filename, read_pvh_start_addr, ++ NULL, &elf_note_type, &elf_entry, ++ &elf_low, &elf_high, NULL, 0, I386_ELF_MACHINE, ++ 0, 0); ++ ++ if (kernel_size < 0) { ++ error_report("Error while loading elf kernel"); ++ exit(1); ++ } ++ mh_load_addr = elf_low; ++ elf_kernel_size = elf_high - elf_low; ++ ++ if (pvh_start_addr == 0) { ++ error_report("Error loading uncompressed kernel without PVH ELF Note"); ++ exit(1); ++ } ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); ++ ++ return true; ++} ++ ++void x86_load_linux(X86MachineState *x86ms, ++ FWCfgState *fw_cfg, ++ int acpi_data_size, ++ bool pvh_enabled) ++{ ++ bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled; ++ uint16_t protocol; ++ int setup_size, kernel_size, cmdline_size; ++ int dtb_size, setup_data_offset; ++ uint32_t initrd_max; ++ uint8_t header[8192], *setup, *kernel; ++ hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; ++ FILE *f; ++ char *vmode; ++ MachineState *machine = MACHINE(x86ms); ++ struct setup_data *setup_data; ++ const char *kernel_filename = machine->kernel_filename; ++ const char *initrd_filename = machine->initrd_filename; ++ const char *dtb_filename = machine->dtb; ++ const char *kernel_cmdline = machine->kernel_cmdline; ++ SevKernelLoaderContext sev_load_ctx = {}; ++ ++ /* Align to 16 bytes as a paranoia measure */ ++ cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; ++ ++ /* load the kernel header */ ++ f = fopen(kernel_filename, "rb"); ++ if (!f) { ++ fprintf(stderr, "qemu: could not open kernel file '%s': %s\n", ++ kernel_filename, strerror(errno)); ++ exit(1); ++ } ++ ++ kernel_size = get_file_size(f); ++ if (!kernel_size || ++ fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != ++ MIN(ARRAY_SIZE(header), kernel_size)) { ++ fprintf(stderr, "qemu: could not load kernel '%s': %s\n", ++ kernel_filename, strerror(errno)); ++ exit(1); ++ } ++ ++ /* kernel protocol version */ ++ if (ldl_p(header + 0x202) == 0x53726448) { ++ protocol = lduw_p(header + 0x206); ++ } else { ++ /* ++ * This could be a multiboot kernel. If it is, let's stop treating it ++ * like a Linux kernel. ++ * Note: some multiboot images could be in the ELF format (the same of ++ * PVH), so we try multiboot first since we check the multiboot magic ++ * header before to load it. ++ */ ++ if (load_multiboot(x86ms, fw_cfg, f, kernel_filename, initrd_filename, ++ kernel_cmdline, kernel_size, header)) { ++ return; ++ } ++ /* ++ * Check if the file is an uncompressed kernel file (ELF) and load it, ++ * saving the PVH entry point used by the x86/HVM direct boot ABI. ++ * If load_elfboot() is successful, populate the fw_cfg info. ++ */ ++ if (pvh_enabled && ++ load_elfboot(kernel_filename, kernel_size, ++ header, pvh_start_addr, fw_cfg)) { ++ fclose(f); ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, ++ strlen(kernel_cmdline) + 1); ++ fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, ++ header, sizeof(header)); ++ ++ /* load initrd */ ++ if (initrd_filename) { ++ GMappedFile *mapped_file; ++ gsize initrd_size; ++ gchar *initrd_data; ++ GError *gerr = NULL; ++ ++ mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); ++ if (!mapped_file) { ++ fprintf(stderr, "qemu: error reading initrd %s: %s\n", ++ initrd_filename, gerr->message); ++ exit(1); ++ } ++ x86ms->initrd_mapped_file = mapped_file; ++ ++ initrd_data = g_mapped_file_get_contents(mapped_file); ++ initrd_size = g_mapped_file_get_length(mapped_file); ++ initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; ++ if (initrd_size >= initrd_max) { ++ fprintf(stderr, "qemu: initrd is too large, cannot support." ++ "(max: %"PRIu32", need %"PRId64")\n", ++ initrd_max, (uint64_t)initrd_size); ++ exit(1); ++ } ++ ++ initrd_addr = (initrd_max - initrd_size) & ~4095; ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, ++ initrd_size); ++ } ++ ++ option_rom[nb_option_roms].bootindex = 0; ++ option_rom[nb_option_roms].name = "pvh.bin"; ++ nb_option_roms++; ++ ++ return; ++ } ++ protocol = 0; ++ } ++ ++ if (protocol < 0x200 || !(header[0x211] & 0x01)) { ++ /* Low kernel */ ++ real_addr = 0x90000; ++ cmdline_addr = 0x9a000 - cmdline_size; ++ prot_addr = 0x10000; ++ } else if (protocol < 0x202) { ++ /* High but ancient kernel */ ++ real_addr = 0x90000; ++ cmdline_addr = 0x9a000 - cmdline_size; ++ prot_addr = 0x100000; ++ } else { ++ /* High and recent kernel */ ++ real_addr = 0x10000; ++ cmdline_addr = 0x20000; ++ prot_addr = 0x100000; ++ } ++ ++ /* highest address for loading the initrd */ ++ if (protocol >= 0x20c && ++ lduw_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { ++ /* ++ * Linux has supported initrd up to 4 GB for a very long time (2007, ++ * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), ++ * though it only sets initrd_max to 2 GB to "work around bootloader ++ * bugs". Luckily, QEMU firmware(which does something like bootloader) ++ * has supported this. ++ * ++ * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can ++ * be loaded into any address. ++ * ++ * In addition, initrd_max is uint32_t simply because QEMU doesn't ++ * support the 64-bit boot protocol (specifically the ext_ramdisk_image ++ * field). ++ * ++ * Therefore here just limit initrd_max to UINT32_MAX simply as well. ++ */ ++ initrd_max = UINT32_MAX; ++ } else if (protocol >= 0x203) { ++ initrd_max = ldl_p(header + 0x22c); ++ } else { ++ initrd_max = 0x37ffffff; ++ } ++ ++ if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) { ++ initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; ++ } ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1); ++ fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); ++ sev_load_ctx.cmdline_data = (char *)kernel_cmdline; ++ sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1; ++ ++ if (protocol >= 0x202) { ++ stl_p(header + 0x228, cmdline_addr); ++ } else { ++ stw_p(header + 0x20, 0xA33F); ++ stw_p(header + 0x22, cmdline_addr - real_addr); ++ } ++ ++ /* handle vga= parameter */ ++ vmode = strstr(kernel_cmdline, "vga="); ++ if (vmode) { ++ unsigned int video_mode; ++ const char *end; ++ int ret; ++ /* skip "vga=" */ ++ vmode += 4; ++ if (!strncmp(vmode, "normal", 6)) { ++ video_mode = 0xffff; ++ } else if (!strncmp(vmode, "ext", 3)) { ++ video_mode = 0xfffe; ++ } else if (!strncmp(vmode, "ask", 3)) { ++ video_mode = 0xfffd; ++ } else { ++ ret = qemu_strtoui(vmode, &end, 0, &video_mode); ++ if (ret != 0 || (*end && *end != ' ')) { ++ fprintf(stderr, "qemu: invalid 'vga=' kernel parameter.\n"); ++ exit(1); ++ } ++ } ++ stw_p(header + 0x1fa, video_mode); ++ } ++ ++ /* loader type */ ++ /* ++ * High nybble = B reserved for QEMU; low nybble is revision number. ++ * If this code is substantially changed, you may want to consider ++ * incrementing the revision. ++ */ ++ if (protocol >= 0x200) { ++ header[0x210] = 0xB0; ++ } ++ /* heap */ ++ if (protocol >= 0x201) { ++ header[0x211] |= 0x80; /* CAN_USE_HEAP */ ++ stw_p(header + 0x224, cmdline_addr - real_addr - 0x200); ++ } ++ ++ /* load initrd */ ++ if (initrd_filename) { ++ GMappedFile *mapped_file; ++ gsize initrd_size; ++ gchar *initrd_data; ++ GError *gerr = NULL; ++ ++ if (protocol < 0x200) { ++ fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); ++ exit(1); ++ } ++ ++ mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); ++ if (!mapped_file) { ++ fprintf(stderr, "qemu: error reading initrd %s: %s\n", ++ initrd_filename, gerr->message); ++ exit(1); ++ } ++ x86ms->initrd_mapped_file = mapped_file; ++ ++ initrd_data = g_mapped_file_get_contents(mapped_file); ++ initrd_size = g_mapped_file_get_length(mapped_file); ++ if (initrd_size >= initrd_max) { ++ fprintf(stderr, "qemu: initrd is too large, cannot support." ++ "(max: %"PRIu32", need %"PRId64")\n", ++ initrd_max, (uint64_t)initrd_size); ++ exit(1); ++ } ++ ++ initrd_addr = (initrd_max - initrd_size) & ~4095; ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); ++ sev_load_ctx.initrd_data = initrd_data; ++ sev_load_ctx.initrd_size = initrd_size; ++ ++ stl_p(header + 0x218, initrd_addr); ++ stl_p(header + 0x21c, initrd_size); ++ } ++ ++ /* load kernel and setup */ ++ setup_size = header[0x1f1]; ++ if (setup_size == 0) { ++ setup_size = 4; ++ } ++ setup_size = (setup_size + 1) * 512; ++ if (setup_size > kernel_size) { ++ fprintf(stderr, "qemu: invalid kernel header\n"); ++ exit(1); ++ } ++ kernel_size -= setup_size; ++ ++ setup = g_malloc(setup_size); ++ kernel = g_malloc(kernel_size); ++ fseek(f, 0, SEEK_SET); ++ if (fread(setup, 1, setup_size, f) != setup_size) { ++ fprintf(stderr, "fread() failed\n"); ++ exit(1); ++ } ++ if (fread(kernel, 1, kernel_size, f) != kernel_size) { ++ fprintf(stderr, "fread() failed\n"); ++ exit(1); ++ } ++ fclose(f); ++ ++ /* append dtb to kernel */ ++ if (dtb_filename) { ++ if (protocol < 0x209) { ++ fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); ++ exit(1); ++ } ++ ++ dtb_size = get_image_size(dtb_filename); ++ if (dtb_size <= 0) { ++ fprintf(stderr, "qemu: error reading dtb %s: %s\n", ++ dtb_filename, strerror(errno)); ++ exit(1); ++ } ++ ++ setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); ++ kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; ++ kernel = g_realloc(kernel, kernel_size); ++ ++ stq_p(header + 0x250, prot_addr + setup_data_offset); ++ ++ setup_data = (struct setup_data *)(kernel + setup_data_offset); ++ setup_data->next = 0; ++ setup_data->type = cpu_to_le32(SETUP_DTB); ++ setup_data->len = cpu_to_le32(dtb_size); ++ ++ load_image_size(dtb_filename, setup_data->data, dtb_size); ++ } ++ ++ /* ++ * If we're starting an encrypted VM, it will be OVMF based, which uses the ++ * efi stub for booting and doesn't require any values to be placed in the ++ * kernel header. We therefore don't update the header so the hash of the ++ * kernel on the other side of the fw_cfg interface matches the hash of the ++ * file the user passed in. ++ */ ++ if (!sev_enabled()) { ++ memcpy(setup, header, MIN(sizeof(header), setup_size)); ++ } ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); ++ sev_load_ctx.kernel_data = (char *)kernel; ++ sev_load_ctx.kernel_size = kernel_size; ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); ++ sev_load_ctx.setup_data = (char *)setup; ++ sev_load_ctx.setup_size = setup_size; ++ ++ if (sev_enabled()) { ++ sev_add_kernel_loader_hashes(&sev_load_ctx, &error_fatal); ++ } ++ ++ option_rom[nb_option_roms].bootindex = 0; ++ option_rom[nb_option_roms].name = "linuxboot.bin"; ++ if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { ++ option_rom[nb_option_roms].name = "linuxboot_dma.bin"; ++ } ++ nb_option_roms++; ++} ++ ++void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, ++ MemoryRegion *bios, bool read_only) ++{ ++ uint64_t bios_size = memory_region_size(bios); ++ uint64_t isa_bios_size = MIN(bios_size, 128 * KiB); ++ ++ memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, ++ bios_size - isa_bios_size, isa_bios_size); ++ memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size, ++ isa_bios, 1); ++ memory_region_set_readonly(isa_bios, read_only); ++} ++ ++void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, ++ MemoryRegion *rom_memory, bool isapc_ram_fw) ++{ ++ const char *bios_name; ++ char *filename; ++ int bios_size; ++ ssize_t ret; ++ ++ /* BIOS load */ ++ bios_name = MACHINE(x86ms)->firmware ?: default_firmware; ++ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); ++ if (filename) { ++ bios_size = get_image_size(filename); ++ } else { ++ bios_size = -1; ++ } ++ if (bios_size <= 0 || ++ (bios_size % 65536) != 0) { ++ goto bios_error; ++ } ++ memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, ++ &error_fatal); ++ if (sev_enabled()) { ++ /* ++ * The concept of a "reset" simply doesn't exist for ++ * confidential computing guests, we have to destroy and ++ * re-launch them instead. So there is no need to register ++ * the firmware as rom to properly re-initialize on reset. ++ * Just go for a straight file load instead. ++ */ ++ void *ptr = memory_region_get_ram_ptr(&x86ms->bios); ++ load_image_size(filename, ptr, bios_size); ++ x86_firmware_configure(ptr, bios_size); ++ } else { ++ memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); ++ ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); ++ if (ret != 0) { ++ goto bios_error; ++ } ++ } ++ g_free(filename); ++ ++ /* map the last 128KB of the BIOS in ISA space */ ++ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, ++ !isapc_ram_fw); ++ ++ /* map all the bios at the top of memory */ ++ memory_region_add_subregion(rom_memory, ++ (uint32_t)(-bios_size), ++ &x86ms->bios); ++ return; ++ ++bios_error: ++ fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); ++ exit(1); ++} +diff --git a/hw/i386/x86-cpu.c b/hw/i386/x86-cpu.c +new file mode 100644 +index 0000000000..ab2920522d +--- /dev/null ++++ b/hw/i386/x86-cpu.c +@@ -0,0 +1,97 @@ ++/* ++ * Copyright (c) 2003-2004 Fabrice Bellard ++ * Copyright (c) 2019, 2024 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "qemu/osdep.h" ++#include "sysemu/whpx.h" ++#include "sysemu/cpu-timers.h" ++#include "trace.h" ++ ++#include "hw/i386/x86.h" ++#include "target/i386/cpu.h" ++#include "hw/intc/i8259.h" ++#include "hw/irq.h" ++#include "sysemu/kvm.h" ++ ++/* TSC handling */ ++uint64_t cpu_get_tsc(CPUX86State *env) ++{ ++ return cpus_get_elapsed_ticks(); ++} ++ ++/* IRQ handling */ ++static void pic_irq_request(void *opaque, int irq, int level) ++{ ++ CPUState *cs = first_cpu; ++ X86CPU *cpu = X86_CPU(cs); ++ ++ trace_x86_pic_interrupt(irq, level); ++ if (cpu_is_apic_enabled(cpu->apic_state) && !kvm_irqchip_in_kernel() && ++ !whpx_apic_in_platform()) { ++ CPU_FOREACH(cs) { ++ cpu = X86_CPU(cs); ++ if (apic_accept_pic_intr(cpu->apic_state)) { ++ apic_deliver_pic_intr(cpu->apic_state, level); ++ } ++ } ++ } else { ++ if (level) { ++ cpu_interrupt(cs, CPU_INTERRUPT_HARD); ++ } else { ++ cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); ++ } ++ } ++} ++ ++qemu_irq x86_allocate_cpu_irq(void) ++{ ++ return qemu_allocate_irq(pic_irq_request, NULL, 0); ++} ++ ++int cpu_get_pic_interrupt(CPUX86State *env) ++{ ++ X86CPU *cpu = env_archcpu(env); ++ int intno; ++ ++ if (!kvm_irqchip_in_kernel() && !whpx_apic_in_platform()) { ++ intno = apic_get_interrupt(cpu->apic_state); ++ if (intno >= 0) { ++ return intno; ++ } ++ /* read the irq from the PIC */ ++ if (!apic_accept_pic_intr(cpu->apic_state)) { ++ return -1; ++ } ++ } ++ ++ intno = pic_read_irq(isa_pic); ++ return intno; ++} ++ ++DeviceState *cpu_get_current_apic(void) ++{ ++ if (current_cpu) { ++ X86CPU *cpu = X86_CPU(current_cpu); ++ return cpu->apic_state; ++ } else { ++ return NULL; ++ } ++} +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index fcef652c1e..0b5cc59956 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -22,52 +22,25 @@ + */ + #include "qemu/osdep.h" + #include "qemu/error-report.h" +-#include "qemu/option.h" +-#include "qemu/cutils.h" + #include "qemu/units.h" +-#include "qemu/datadir.h" + #include "qapi/error.h" + #include "qapi/qapi-visit-common.h" +-#include "qapi/clone-visitor.h" + #include "qapi/qapi-visit-machine.h" + #include "qapi/visitor.h" + #include "sysemu/qtest.h" +-#include "sysemu/whpx.h" + #include "sysemu/numa.h" +-#include "sysemu/replay.h" +-#include "sysemu/sysemu.h" +-#include "sysemu/cpu-timers.h" +-#include "sysemu/xen.h" + #include "trace.h" + ++#include "hw/acpi/aml-build.h" + #include "hw/i386/x86.h" +-#include "target/i386/cpu.h" + #include "hw/i386/topology.h" +-#include "hw/i386/fw_cfg.h" +-#include "hw/intc/i8259.h" +-#include "hw/rtc/mc146818rtc.h" +-#include "target/i386/sev.h" + +-#include "hw/acpi/cpu_hotplug.h" +-#include "hw/irq.h" + #include "hw/nmi.h" +-#include "hw/loader.h" +-#include "multiboot.h" +-#include "elf.h" +-#include "standard-headers/asm-x86/bootparam.h" +-#include CONFIG_DEVICES + #include "kvm/kvm_i386.h" + +-#ifdef CONFIG_XEN_EMU +-#include "hw/xen/xen.h" +-#include "hw/i386/kvm/xen_evtchn.h" +-#endif + +-/* Physical Address of PVH entry point read from kernel ELF NOTE */ +-static size_t pvh_start_addr; +- +-static void init_topo_info(X86CPUTopoInfo *topo_info, +- const X86MachineState *x86ms) ++void init_topo_info(X86CPUTopoInfo *topo_info, ++ const X86MachineState *x86ms) + { + MachineState *ms = MACHINE(x86ms); + +@@ -94,355 +67,6 @@ uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, + return x86_apicid_from_cpu_idx(&topo_info, cpu_index); + } + +- +-void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp) +-{ +- Object *cpu = object_new(MACHINE(x86ms)->cpu_type); +- +- if (!object_property_set_uint(cpu, "apic-id", apic_id, errp)) { +- goto out; +- } +- qdev_realize(DEVICE(cpu), NULL, errp); +- +-out: +- object_unref(cpu); +-} +- +-void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) +-{ +- int i; +- const CPUArchIdList *possible_cpus; +- MachineState *ms = MACHINE(x86ms); +- MachineClass *mc = MACHINE_GET_CLASS(x86ms); +- +- x86_cpu_set_default_version(default_cpu_version); +- +- /* +- * Calculates the limit to CPU APIC ID values +- * +- * Limit for the APIC ID value, so that all +- * CPU APIC IDs are < x86ms->apic_id_limit. +- * +- * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create(). +- */ +- x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, +- ms->smp.max_cpus - 1) + 1; +- +- /* +- * Can we support APIC ID 255 or higher? With KVM, that requires +- * both in-kernel lapic and X2APIC userspace API. +- * +- * kvm_enabled() must go first to ensure that kvm_* references are +- * not emitted for the linker to consume (kvm_enabled() is +- * a literal `0` in configurations where kvm_* aren't defined) +- */ +- if (kvm_enabled() && x86ms->apic_id_limit > 255 && +- kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) { +- error_report("current -smp configuration requires kernel " +- "irqchip and X2APIC API support."); +- exit(EXIT_FAILURE); +- } +- +- if (kvm_enabled()) { +- kvm_set_max_apic_id(x86ms->apic_id_limit); +- } +- +- if (!kvm_irqchip_in_kernel()) { +- apic_set_max_apic_id(x86ms->apic_id_limit); +- } +- +- possible_cpus = mc->possible_cpu_arch_ids(ms); +- for (i = 0; i < ms->smp.cpus; i++) { +- x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); +- } +-} +- +-void x86_rtc_set_cpus_count(ISADevice *s, uint16_t cpus_count) +-{ +- MC146818RtcState *rtc = MC146818_RTC(s); +- +- if (cpus_count > 0xff) { +- /* +- * If the number of CPUs can't be represented in 8 bits, the +- * BIOS must use "FW_CFG_NB_CPUS". Set RTC field to 0 just +- * to make old BIOSes fail more predictably. +- */ +- mc146818rtc_set_cmos_data(rtc, 0x5f, 0); +- } else { +- mc146818rtc_set_cmos_data(rtc, 0x5f, cpus_count - 1); +- } +-} +- +-static int x86_apic_cmp(const void *a, const void *b) +-{ +- CPUArchId *apic_a = (CPUArchId *)a; +- CPUArchId *apic_b = (CPUArchId *)b; +- +- return apic_a->arch_id - apic_b->arch_id; +-} +- +-/* +- * returns pointer to CPUArchId descriptor that matches CPU's apic_id +- * in ms->possible_cpus->cpus, if ms->possible_cpus->cpus has no +- * entry corresponding to CPU's apic_id returns NULL. +- */ +-CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx) +-{ +- CPUArchId apic_id, *found_cpu; +- +- apic_id.arch_id = id; +- found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus, +- ms->possible_cpus->len, sizeof(*ms->possible_cpus->cpus), +- x86_apic_cmp); +- if (found_cpu && idx) { +- *idx = found_cpu - ms->possible_cpus->cpus; +- } +- return found_cpu; +-} +- +-void x86_cpu_plug(HotplugHandler *hotplug_dev, +- DeviceState *dev, Error **errp) +-{ +- CPUArchId *found_cpu; +- Error *local_err = NULL; +- X86CPU *cpu = X86_CPU(dev); +- X86MachineState *x86ms = X86_MACHINE(hotplug_dev); +- +- if (x86ms->acpi_dev) { +- hotplug_handler_plug(x86ms->acpi_dev, dev, &local_err); +- if (local_err) { +- goto out; +- } +- } +- +- /* increment the number of CPUs */ +- x86ms->boot_cpus++; +- if (x86ms->rtc) { +- x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); +- } +- if (x86ms->fw_cfg) { +- fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); +- } +- +- found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); +- found_cpu->cpu = CPU(dev); +-out: +- error_propagate(errp, local_err); +-} +- +-void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, +- DeviceState *dev, Error **errp) +-{ +- int idx = -1; +- X86CPU *cpu = X86_CPU(dev); +- X86MachineState *x86ms = X86_MACHINE(hotplug_dev); +- +- if (!x86ms->acpi_dev) { +- error_setg(errp, "CPU hot unplug not supported without ACPI"); +- return; +- } +- +- x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); +- assert(idx != -1); +- if (idx == 0) { +- error_setg(errp, "Boot CPU is unpluggable"); +- return; +- } +- +- hotplug_handler_unplug_request(x86ms->acpi_dev, dev, +- errp); +-} +- +-void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, +- DeviceState *dev, Error **errp) +-{ +- CPUArchId *found_cpu; +- Error *local_err = NULL; +- X86CPU *cpu = X86_CPU(dev); +- X86MachineState *x86ms = X86_MACHINE(hotplug_dev); +- +- hotplug_handler_unplug(x86ms->acpi_dev, dev, &local_err); +- if (local_err) { +- goto out; +- } +- +- found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); +- found_cpu->cpu = NULL; +- qdev_unrealize(dev); +- +- /* decrement the number of CPUs */ +- x86ms->boot_cpus--; +- /* Update the number of CPUs in CMOS */ +- x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); +- fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); +- out: +- error_propagate(errp, local_err); +-} +- +-void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, +- DeviceState *dev, Error **errp) +-{ +- int idx; +- CPUState *cs; +- CPUArchId *cpu_slot; +- X86CPUTopoIDs topo_ids; +- X86CPU *cpu = X86_CPU(dev); +- CPUX86State *env = &cpu->env; +- MachineState *ms = MACHINE(hotplug_dev); +- X86MachineState *x86ms = X86_MACHINE(hotplug_dev); +- unsigned int smp_cores = ms->smp.cores; +- unsigned int smp_threads = ms->smp.threads; +- X86CPUTopoInfo topo_info; +- +- if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { +- error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", +- ms->cpu_type); +- return; +- } +- +- if (x86ms->acpi_dev) { +- Error *local_err = NULL; +- +- hotplug_handler_pre_plug(HOTPLUG_HANDLER(x86ms->acpi_dev), dev, +- &local_err); +- if (local_err) { +- error_propagate(errp, local_err); +- return; +- } +- } +- +- init_topo_info(&topo_info, x86ms); +- +- env->nr_dies = ms->smp.dies; +- +- /* +- * If APIC ID is not set, +- * set it based on socket/die/core/thread properties. +- */ +- if (cpu->apic_id == UNASSIGNED_APIC_ID) { +- int max_socket = (ms->smp.max_cpus - 1) / +- smp_threads / smp_cores / ms->smp.dies; +- +- /* +- * die-id was optional in QEMU 4.0 and older, so keep it optional +- * if there's only one die per socket. +- */ +- if (cpu->die_id < 0 && ms->smp.dies == 1) { +- cpu->die_id = 0; +- } +- +- if (cpu->socket_id < 0) { +- error_setg(errp, "CPU socket-id is not set"); +- return; +- } else if (cpu->socket_id > max_socket) { +- error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u", +- cpu->socket_id, max_socket); +- return; +- } +- if (cpu->die_id < 0) { +- error_setg(errp, "CPU die-id is not set"); +- return; +- } else if (cpu->die_id > ms->smp.dies - 1) { +- error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", +- cpu->die_id, ms->smp.dies - 1); +- return; +- } +- if (cpu->core_id < 0) { +- error_setg(errp, "CPU core-id is not set"); +- return; +- } else if (cpu->core_id > (smp_cores - 1)) { +- error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u", +- cpu->core_id, smp_cores - 1); +- return; +- } +- if (cpu->thread_id < 0) { +- error_setg(errp, "CPU thread-id is not set"); +- return; +- } else if (cpu->thread_id > (smp_threads - 1)) { +- error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u", +- cpu->thread_id, smp_threads - 1); +- return; +- } +- +- topo_ids.pkg_id = cpu->socket_id; +- topo_ids.die_id = cpu->die_id; +- topo_ids.core_id = cpu->core_id; +- topo_ids.smt_id = cpu->thread_id; +- cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); +- } +- +- cpu_slot = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); +- if (!cpu_slot) { +- x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); +- error_setg(errp, +- "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" +- " APIC ID %" PRIu32 ", valid index range 0:%d", +- topo_ids.pkg_id, topo_ids.die_id, topo_ids.core_id, topo_ids.smt_id, +- cpu->apic_id, ms->possible_cpus->len - 1); +- return; +- } +- +- if (cpu_slot->cpu) { +- error_setg(errp, "CPU[%d] with APIC ID %" PRIu32 " exists", +- idx, cpu->apic_id); +- return; +- } +- +- /* if 'address' properties socket-id/core-id/thread-id are not set, set them +- * so that machine_query_hotpluggable_cpus would show correct values +- */ +- /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() +- * once -smp refactoring is complete and there will be CPU private +- * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ +- x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); +- if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) { +- error_setg(errp, "property socket-id: %u doesn't match set apic-id:" +- " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, +- topo_ids.pkg_id); +- return; +- } +- cpu->socket_id = topo_ids.pkg_id; +- +- if (cpu->die_id != -1 && cpu->die_id != topo_ids.die_id) { +- error_setg(errp, "property die-id: %u doesn't match set apic-id:" +- " 0x%x (die-id: %u)", cpu->die_id, cpu->apic_id, topo_ids.die_id); +- return; +- } +- cpu->die_id = topo_ids.die_id; +- +- if (cpu->core_id != -1 && cpu->core_id != topo_ids.core_id) { +- error_setg(errp, "property core-id: %u doesn't match set apic-id:" +- " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, +- topo_ids.core_id); +- return; +- } +- cpu->core_id = topo_ids.core_id; +- +- if (cpu->thread_id != -1 && cpu->thread_id != topo_ids.smt_id) { +- error_setg(errp, "property thread-id: %u doesn't match set apic-id:" +- " 0x%x (thread-id: %u)", cpu->thread_id, cpu->apic_id, +- topo_ids.smt_id); +- return; +- } +- cpu->thread_id = topo_ids.smt_id; +- +- /* +- * kvm_enabled() must go first to ensure that kvm_* references are +- * not emitted for the linker to consume (kvm_enabled() is +- * a literal `0` in configurations where kvm_* aren't defined) +- */ +- if (kvm_enabled() && hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && +- !kvm_hv_vpindex_settable()) { +- error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX"); +- return; +- } +- +- cs = CPU(cpu); +- cs->cpu_index = idx; +- +- numa_cpu_pre_plug(cpu_slot, dev, errp); +-} +- + static CpuInstanceProperties + x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) + { +@@ -528,676 +152,6 @@ static void x86_nmi(NMIState *n, int cpu_index, Error **errp) + } + } + +-static long get_file_size(FILE *f) +-{ +- long where, size; +- +- /* XXX: on Unix systems, using fstat() probably makes more sense */ +- +- where = ftell(f); +- fseek(f, 0, SEEK_END); +- size = ftell(f); +- fseek(f, where, SEEK_SET); +- +- return size; +-} +- +-/* TSC handling */ +-uint64_t cpu_get_tsc(CPUX86State *env) +-{ +- return cpus_get_elapsed_ticks(); +-} +- +-/* IRQ handling */ +-static void pic_irq_request(void *opaque, int irq, int level) +-{ +- CPUState *cs = first_cpu; +- X86CPU *cpu = X86_CPU(cs); +- +- trace_x86_pic_interrupt(irq, level); +- if (cpu_is_apic_enabled(cpu->apic_state) && !kvm_irqchip_in_kernel() && +- !whpx_apic_in_platform()) { +- CPU_FOREACH(cs) { +- cpu = X86_CPU(cs); +- if (apic_accept_pic_intr(cpu->apic_state)) { +- apic_deliver_pic_intr(cpu->apic_state, level); +- } +- } +- } else { +- if (level) { +- cpu_interrupt(cs, CPU_INTERRUPT_HARD); +- } else { +- cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); +- } +- } +-} +- +-qemu_irq x86_allocate_cpu_irq(void) +-{ +- return qemu_allocate_irq(pic_irq_request, NULL, 0); +-} +- +-int cpu_get_pic_interrupt(CPUX86State *env) +-{ +- X86CPU *cpu = env_archcpu(env); +- int intno; +- +- if (!kvm_irqchip_in_kernel() && !whpx_apic_in_platform()) { +- intno = apic_get_interrupt(cpu->apic_state); +- if (intno >= 0) { +- return intno; +- } +- /* read the irq from the PIC */ +- if (!apic_accept_pic_intr(cpu->apic_state)) { +- return -1; +- } +- } +- +- intno = pic_read_irq(isa_pic); +- return intno; +-} +- +-DeviceState *cpu_get_current_apic(void) +-{ +- if (current_cpu) { +- X86CPU *cpu = X86_CPU(current_cpu); +- return cpu->apic_state; +- } else { +- return NULL; +- } +-} +- +-void gsi_handler(void *opaque, int n, int level) +-{ +- GSIState *s = opaque; +- +- trace_x86_gsi_interrupt(n, level); +- switch (n) { +- case 0 ... ISA_NUM_IRQS - 1: +- if (s->i8259_irq[n]) { +- /* Under KVM, Kernel will forward to both PIC and IOAPIC */ +- qemu_set_irq(s->i8259_irq[n], level); +- } +- /* fall through */ +- case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1: +-#ifdef CONFIG_XEN_EMU +- /* +- * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC +- * routing actually works properly under Xen). And then to +- * *either* the PIRQ handling or the I/OAPIC depending on +- * whether the former wants it. +- */ +- if (xen_mode == XEN_EMULATE && xen_evtchn_set_gsi(n, level)) { +- break; +- } +-#endif +- qemu_set_irq(s->ioapic_irq[n], level); +- break; +- case IO_APIC_SECONDARY_IRQBASE +- ... IO_APIC_SECONDARY_IRQBASE + IOAPIC_NUM_PINS - 1: +- qemu_set_irq(s->ioapic2_irq[n - IO_APIC_SECONDARY_IRQBASE], level); +- break; +- } +-} +- +-void ioapic_init_gsi(GSIState *gsi_state, Object *parent) +-{ +- DeviceState *dev; +- SysBusDevice *d; +- unsigned int i; +- +- assert(parent); +- if (kvm_ioapic_in_kernel()) { +- dev = qdev_new(TYPE_KVM_IOAPIC); +- } else { +- dev = qdev_new(TYPE_IOAPIC); +- } +- object_property_add_child(parent, "ioapic", OBJECT(dev)); +- d = SYS_BUS_DEVICE(dev); +- sysbus_realize_and_unref(d, &error_fatal); +- sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS); +- +- for (i = 0; i < IOAPIC_NUM_PINS; i++) { +- gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i); +- } +-} +- +-DeviceState *ioapic_init_secondary(GSIState *gsi_state) +-{ +- DeviceState *dev; +- SysBusDevice *d; +- unsigned int i; +- +- dev = qdev_new(TYPE_IOAPIC); +- d = SYS_BUS_DEVICE(dev); +- sysbus_realize_and_unref(d, &error_fatal); +- sysbus_mmio_map(d, 0, IO_APIC_SECONDARY_ADDRESS); +- +- for (i = 0; i < IOAPIC_NUM_PINS; i++) { +- gsi_state->ioapic2_irq[i] = qdev_get_gpio_in(dev, i); +- } +- return dev; +-} +- +-/* +- * The entry point into the kernel for PVH boot is different from +- * the native entry point. The PVH entry is defined by the x86/HVM +- * direct boot ABI and is available in an ELFNOTE in the kernel binary. +- * +- * This function is passed to load_elf() when it is called from +- * load_elfboot() which then additionally checks for an ELF Note of +- * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to +- * parse the PVH entry address from the ELF Note. +- * +- * Due to trickery in elf_opts.h, load_elf() is actually available as +- * load_elf32() or load_elf64() and this routine needs to be able +- * to deal with being called as 32 or 64 bit. +- * +- * The address of the PVH entry point is saved to the 'pvh_start_addr' +- * global variable. (although the entry point is 32-bit, the kernel +- * binary can be either 32-bit or 64-bit). +- */ +-static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64) +-{ +- size_t *elf_note_data_addr; +- +- /* Check if ELF Note header passed in is valid */ +- if (arg1 == NULL) { +- return 0; +- } +- +- if (is64) { +- struct elf64_note *nhdr64 = (struct elf64_note *)arg1; +- uint64_t nhdr_size64 = sizeof(struct elf64_note); +- uint64_t phdr_align = *(uint64_t *)arg2; +- uint64_t nhdr_namesz = nhdr64->n_namesz; +- +- elf_note_data_addr = +- ((void *)nhdr64) + nhdr_size64 + +- QEMU_ALIGN_UP(nhdr_namesz, phdr_align); +- +- pvh_start_addr = *elf_note_data_addr; +- } else { +- struct elf32_note *nhdr32 = (struct elf32_note *)arg1; +- uint32_t nhdr_size32 = sizeof(struct elf32_note); +- uint32_t phdr_align = *(uint32_t *)arg2; +- uint32_t nhdr_namesz = nhdr32->n_namesz; +- +- elf_note_data_addr = +- ((void *)nhdr32) + nhdr_size32 + +- QEMU_ALIGN_UP(nhdr_namesz, phdr_align); +- +- pvh_start_addr = *(uint32_t *)elf_note_data_addr; +- } +- +- return pvh_start_addr; +-} +- +-static bool load_elfboot(const char *kernel_filename, +- int kernel_file_size, +- uint8_t *header, +- size_t pvh_xen_start_addr, +- FWCfgState *fw_cfg) +-{ +- uint32_t flags = 0; +- uint32_t mh_load_addr = 0; +- uint32_t elf_kernel_size = 0; +- uint64_t elf_entry; +- uint64_t elf_low, elf_high; +- int kernel_size; +- +- if (ldl_p(header) != 0x464c457f) { +- return false; /* no elfboot */ +- } +- +- bool elf_is64 = header[EI_CLASS] == ELFCLASS64; +- flags = elf_is64 ? +- ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags; +- +- if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */ +- error_report("elfboot unsupported flags = %x", flags); +- exit(1); +- } +- +- uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY; +- kernel_size = load_elf(kernel_filename, read_pvh_start_addr, +- NULL, &elf_note_type, &elf_entry, +- &elf_low, &elf_high, NULL, 0, I386_ELF_MACHINE, +- 0, 0); +- +- if (kernel_size < 0) { +- error_report("Error while loading elf kernel"); +- exit(1); +- } +- mh_load_addr = elf_low; +- elf_kernel_size = elf_high - elf_low; +- +- if (pvh_start_addr == 0) { +- error_report("Error loading uncompressed kernel without PVH ELF Note"); +- exit(1); +- } +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); +- +- return true; +-} +- +-void x86_load_linux(X86MachineState *x86ms, +- FWCfgState *fw_cfg, +- int acpi_data_size, +- bool pvh_enabled) +-{ +- bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled; +- uint16_t protocol; +- int setup_size, kernel_size, cmdline_size; +- int dtb_size, setup_data_offset; +- uint32_t initrd_max; +- uint8_t header[8192], *setup, *kernel; +- hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; +- FILE *f; +- char *vmode; +- MachineState *machine = MACHINE(x86ms); +- struct setup_data *setup_data; +- const char *kernel_filename = machine->kernel_filename; +- const char *initrd_filename = machine->initrd_filename; +- const char *dtb_filename = machine->dtb; +- const char *kernel_cmdline = machine->kernel_cmdline; +- SevKernelLoaderContext sev_load_ctx = {}; +- +- /* Align to 16 bytes as a paranoia measure */ +- cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; +- +- /* load the kernel header */ +- f = fopen(kernel_filename, "rb"); +- if (!f) { +- fprintf(stderr, "qemu: could not open kernel file '%s': %s\n", +- kernel_filename, strerror(errno)); +- exit(1); +- } +- +- kernel_size = get_file_size(f); +- if (!kernel_size || +- fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != +- MIN(ARRAY_SIZE(header), kernel_size)) { +- fprintf(stderr, "qemu: could not load kernel '%s': %s\n", +- kernel_filename, strerror(errno)); +- exit(1); +- } +- +- /* kernel protocol version */ +- if (ldl_p(header + 0x202) == 0x53726448) { +- protocol = lduw_p(header + 0x206); +- } else { +- /* +- * This could be a multiboot kernel. If it is, let's stop treating it +- * like a Linux kernel. +- * Note: some multiboot images could be in the ELF format (the same of +- * PVH), so we try multiboot first since we check the multiboot magic +- * header before to load it. +- */ +- if (load_multiboot(x86ms, fw_cfg, f, kernel_filename, initrd_filename, +- kernel_cmdline, kernel_size, header)) { +- return; +- } +- /* +- * Check if the file is an uncompressed kernel file (ELF) and load it, +- * saving the PVH entry point used by the x86/HVM direct boot ABI. +- * If load_elfboot() is successful, populate the fw_cfg info. +- */ +- if (pvh_enabled && +- load_elfboot(kernel_filename, kernel_size, +- header, pvh_start_addr, fw_cfg)) { +- fclose(f); +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, +- strlen(kernel_cmdline) + 1); +- fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, +- header, sizeof(header)); +- +- /* load initrd */ +- if (initrd_filename) { +- GMappedFile *mapped_file; +- gsize initrd_size; +- gchar *initrd_data; +- GError *gerr = NULL; +- +- mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); +- if (!mapped_file) { +- fprintf(stderr, "qemu: error reading initrd %s: %s\n", +- initrd_filename, gerr->message); +- exit(1); +- } +- x86ms->initrd_mapped_file = mapped_file; +- +- initrd_data = g_mapped_file_get_contents(mapped_file); +- initrd_size = g_mapped_file_get_length(mapped_file); +- initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; +- if (initrd_size >= initrd_max) { +- fprintf(stderr, "qemu: initrd is too large, cannot support." +- "(max: %"PRIu32", need %"PRId64")\n", +- initrd_max, (uint64_t)initrd_size); +- exit(1); +- } +- +- initrd_addr = (initrd_max - initrd_size) & ~4095; +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, +- initrd_size); +- } +- +- option_rom[nb_option_roms].bootindex = 0; +- option_rom[nb_option_roms].name = "pvh.bin"; +- nb_option_roms++; +- +- return; +- } +- protocol = 0; +- } +- +- if (protocol < 0x200 || !(header[0x211] & 0x01)) { +- /* Low kernel */ +- real_addr = 0x90000; +- cmdline_addr = 0x9a000 - cmdline_size; +- prot_addr = 0x10000; +- } else if (protocol < 0x202) { +- /* High but ancient kernel */ +- real_addr = 0x90000; +- cmdline_addr = 0x9a000 - cmdline_size; +- prot_addr = 0x100000; +- } else { +- /* High and recent kernel */ +- real_addr = 0x10000; +- cmdline_addr = 0x20000; +- prot_addr = 0x100000; +- } +- +- /* highest address for loading the initrd */ +- if (protocol >= 0x20c && +- lduw_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { +- /* +- * Linux has supported initrd up to 4 GB for a very long time (2007, +- * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), +- * though it only sets initrd_max to 2 GB to "work around bootloader +- * bugs". Luckily, QEMU firmware(which does something like bootloader) +- * has supported this. +- * +- * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can +- * be loaded into any address. +- * +- * In addition, initrd_max is uint32_t simply because QEMU doesn't +- * support the 64-bit boot protocol (specifically the ext_ramdisk_image +- * field). +- * +- * Therefore here just limit initrd_max to UINT32_MAX simply as well. +- */ +- initrd_max = UINT32_MAX; +- } else if (protocol >= 0x203) { +- initrd_max = ldl_p(header + 0x22c); +- } else { +- initrd_max = 0x37ffffff; +- } +- +- if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) { +- initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; +- } +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1); +- fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); +- sev_load_ctx.cmdline_data = (char *)kernel_cmdline; +- sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1; +- +- if (protocol >= 0x202) { +- stl_p(header + 0x228, cmdline_addr); +- } else { +- stw_p(header + 0x20, 0xA33F); +- stw_p(header + 0x22, cmdline_addr - real_addr); +- } +- +- /* handle vga= parameter */ +- vmode = strstr(kernel_cmdline, "vga="); +- if (vmode) { +- unsigned int video_mode; +- const char *end; +- int ret; +- /* skip "vga=" */ +- vmode += 4; +- if (!strncmp(vmode, "normal", 6)) { +- video_mode = 0xffff; +- } else if (!strncmp(vmode, "ext", 3)) { +- video_mode = 0xfffe; +- } else if (!strncmp(vmode, "ask", 3)) { +- video_mode = 0xfffd; +- } else { +- ret = qemu_strtoui(vmode, &end, 0, &video_mode); +- if (ret != 0 || (*end && *end != ' ')) { +- fprintf(stderr, "qemu: invalid 'vga=' kernel parameter.\n"); +- exit(1); +- } +- } +- stw_p(header + 0x1fa, video_mode); +- } +- +- /* loader type */ +- /* +- * High nybble = B reserved for QEMU; low nybble is revision number. +- * If this code is substantially changed, you may want to consider +- * incrementing the revision. +- */ +- if (protocol >= 0x200) { +- header[0x210] = 0xB0; +- } +- /* heap */ +- if (protocol >= 0x201) { +- header[0x211] |= 0x80; /* CAN_USE_HEAP */ +- stw_p(header + 0x224, cmdline_addr - real_addr - 0x200); +- } +- +- /* load initrd */ +- if (initrd_filename) { +- GMappedFile *mapped_file; +- gsize initrd_size; +- gchar *initrd_data; +- GError *gerr = NULL; +- +- if (protocol < 0x200) { +- fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); +- exit(1); +- } +- +- mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); +- if (!mapped_file) { +- fprintf(stderr, "qemu: error reading initrd %s: %s\n", +- initrd_filename, gerr->message); +- exit(1); +- } +- x86ms->initrd_mapped_file = mapped_file; +- +- initrd_data = g_mapped_file_get_contents(mapped_file); +- initrd_size = g_mapped_file_get_length(mapped_file); +- if (initrd_size >= initrd_max) { +- fprintf(stderr, "qemu: initrd is too large, cannot support." +- "(max: %"PRIu32", need %"PRId64")\n", +- initrd_max, (uint64_t)initrd_size); +- exit(1); +- } +- +- initrd_addr = (initrd_max - initrd_size) & ~4095; +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); +- sev_load_ctx.initrd_data = initrd_data; +- sev_load_ctx.initrd_size = initrd_size; +- +- stl_p(header + 0x218, initrd_addr); +- stl_p(header + 0x21c, initrd_size); +- } +- +- /* load kernel and setup */ +- setup_size = header[0x1f1]; +- if (setup_size == 0) { +- setup_size = 4; +- } +- setup_size = (setup_size + 1) * 512; +- if (setup_size > kernel_size) { +- fprintf(stderr, "qemu: invalid kernel header\n"); +- exit(1); +- } +- kernel_size -= setup_size; +- +- setup = g_malloc(setup_size); +- kernel = g_malloc(kernel_size); +- fseek(f, 0, SEEK_SET); +- if (fread(setup, 1, setup_size, f) != setup_size) { +- fprintf(stderr, "fread() failed\n"); +- exit(1); +- } +- if (fread(kernel, 1, kernel_size, f) != kernel_size) { +- fprintf(stderr, "fread() failed\n"); +- exit(1); +- } +- fclose(f); +- +- /* append dtb to kernel */ +- if (dtb_filename) { +- if (protocol < 0x209) { +- fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); +- exit(1); +- } +- +- dtb_size = get_image_size(dtb_filename); +- if (dtb_size <= 0) { +- fprintf(stderr, "qemu: error reading dtb %s: %s\n", +- dtb_filename, strerror(errno)); +- exit(1); +- } +- +- setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); +- kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; +- kernel = g_realloc(kernel, kernel_size); +- +- stq_p(header + 0x250, prot_addr + setup_data_offset); +- +- setup_data = (struct setup_data *)(kernel + setup_data_offset); +- setup_data->next = 0; +- setup_data->type = cpu_to_le32(SETUP_DTB); +- setup_data->len = cpu_to_le32(dtb_size); +- +- load_image_size(dtb_filename, setup_data->data, dtb_size); +- } +- +- /* +- * If we're starting an encrypted VM, it will be OVMF based, which uses the +- * efi stub for booting and doesn't require any values to be placed in the +- * kernel header. We therefore don't update the header so the hash of the +- * kernel on the other side of the fw_cfg interface matches the hash of the +- * file the user passed in. +- */ +- if (!sev_enabled()) { +- memcpy(setup, header, MIN(sizeof(header), setup_size)); +- } +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); +- sev_load_ctx.kernel_data = (char *)kernel; +- sev_load_ctx.kernel_size = kernel_size; +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); +- sev_load_ctx.setup_data = (char *)setup; +- sev_load_ctx.setup_size = setup_size; +- +- if (sev_enabled()) { +- sev_add_kernel_loader_hashes(&sev_load_ctx, &error_fatal); +- } +- +- option_rom[nb_option_roms].bootindex = 0; +- option_rom[nb_option_roms].name = "linuxboot.bin"; +- if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { +- option_rom[nb_option_roms].name = "linuxboot_dma.bin"; +- } +- nb_option_roms++; +-} +- +-void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, +- MemoryRegion *bios, bool read_only) +-{ +- uint64_t bios_size = memory_region_size(bios); +- uint64_t isa_bios_size = MIN(bios_size, 128 * KiB); +- +- memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, +- bios_size - isa_bios_size, isa_bios_size); +- memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size, +- isa_bios, 1); +- memory_region_set_readonly(isa_bios, read_only); +-} +- +-void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, +- MemoryRegion *rom_memory, bool isapc_ram_fw) +-{ +- const char *bios_name; +- char *filename; +- int bios_size; +- ssize_t ret; +- +- /* BIOS load */ +- bios_name = MACHINE(x86ms)->firmware ?: default_firmware; +- filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); +- if (filename) { +- bios_size = get_image_size(filename); +- } else { +- bios_size = -1; +- } +- if (bios_size <= 0 || +- (bios_size % 65536) != 0) { +- goto bios_error; +- } +- memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, +- &error_fatal); +- if (sev_enabled()) { +- /* +- * The concept of a "reset" simply doesn't exist for +- * confidential computing guests, we have to destroy and +- * re-launch them instead. So there is no need to register +- * the firmware as rom to properly re-initialize on reset. +- * Just go for a straight file load instead. +- */ +- void *ptr = memory_region_get_ram_ptr(&x86ms->bios); +- load_image_size(filename, ptr, bios_size); +- x86_firmware_configure(ptr, bios_size); +- } else { +- memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); +- ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); +- if (ret != 0) { +- goto bios_error; +- } +- } +- g_free(filename); +- +- /* map the last 128KB of the BIOS in ISA space */ +- x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, +- !isapc_ram_fw); +- +- /* map all the bios at the top of memory */ +- memory_region_add_subregion(rom_memory, +- (uint32_t)(-bios_size), +- &x86ms->bios); +- return; +- +-bios_error: +- fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); +- exit(1); +-} +- + bool x86_machine_is_smm_enabled(const X86MachineState *x86ms) + { + bool smm_available = false; +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index c2062db13f..b006f16b8d 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -21,6 +21,7 @@ + #include "exec/memory.h" + + #include "hw/boards.h" ++#include "hw/i386/topology.h" + #include "hw/intc/ioapic.h" + #include "hw/isa/isa.h" + #include "qom/object.h" +@@ -109,12 +110,11 @@ struct X86MachineState { + #define TYPE_X86_MACHINE MACHINE_TYPE_NAME("x86") + OBJECT_DECLARE_TYPE(X86MachineState, X86MachineClass, X86_MACHINE) + +-uint32_t x86_cpu_apic_id_from_index(X86MachineState *pcms, ++void init_topo_info(X86CPUTopoInfo *topo_info, const X86MachineState *x86ms); ++uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, + unsigned int cpu_index); + +-void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, Error **errp); + void x86_cpus_init(X86MachineState *pcms, int default_cpu_version); +-CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx); + void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count); + void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch b/SOURCES/kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch new file mode 100644 index 0000000..38fd870 --- /dev/null +++ b/SOURCES/kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch @@ -0,0 +1,133 @@ +From ebf08d2a822576acfa60fbd5f552d26de1e4c4be Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Wed, 8 May 2024 19:55:04 +0200 +Subject: [PATCH 040/100] hw/i386/x86: Don't leak "isa-bios" memory regions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [40/91] bb595357c6cc2d5a80bf3873853c69553c5feee5 (bonzini/rhel-qemu-kvm) + +Fix the leaking in x86_bios_rom_init() and pc_isa_bios_init() by adding an +"isa_bios" attribute to X86MachineState. + +Suggested-by: Philippe Mathieu-Daudé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Bernhard Beschow +Message-ID: <20240508175507.22270-4-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 32d3ee87a17fc91e981a23dba94855bff89f5920) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_sysfw.c | 7 +++---- + hw/i386/x86.c | 9 ++++----- + include/hw/i386/x86.h | 7 +++++++ + 3 files changed, 14 insertions(+), 9 deletions(-) + +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 59c7a81692..82d37cb376 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -40,11 +40,10 @@ + + #define FLASH_SECTOR_SIZE 4096 + +-static void pc_isa_bios_init(MemoryRegion *rom_memory, ++static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, + MemoryRegion *flash_mem) + { + int isa_bios_size; +- MemoryRegion *isa_bios; + uint64_t flash_size; + void *flash_ptr, *isa_bios_ptr; + +@@ -52,7 +51,6 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory, + + /* map the last 128KB of the BIOS in ISA space */ + isa_bios_size = MIN(flash_size, 128 * KiB); +- isa_bios = g_malloc(sizeof(*isa_bios)); + memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, + &error_fatal); + memory_region_add_subregion_overlap(rom_memory, +@@ -136,6 +134,7 @@ void pc_system_flash_cleanup_unused(PCMachineState *pcms) + static void pc_system_flash_map(PCMachineState *pcms, + MemoryRegion *rom_memory) + { ++ X86MachineState *x86ms = X86_MACHINE(pcms); + hwaddr total_size = 0; + int i; + BlockBackend *blk; +@@ -185,7 +184,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + + if (i == 0) { + flash_mem = pflash_cfi01_get_memory(system_flash); +- pc_isa_bios_init(rom_memory, flash_mem); ++ pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); + + /* Encrypt the pflash boot ROM */ + if (sev_enabled()) { +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 6d3c72f124..457e8a34a5 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1133,7 +1133,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + { + const char *bios_name; + char *filename; +- MemoryRegion *bios, *isa_bios; ++ MemoryRegion *bios; + int bios_size, isa_bios_size; + ssize_t ret; + +@@ -1173,14 +1173,13 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + + /* map the last 128KB of the BIOS in ISA space */ + isa_bios_size = MIN(bios_size, 128 * KiB); +- isa_bios = g_malloc(sizeof(*isa_bios)); +- memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, ++ memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", bios, + bios_size - isa_bios_size, isa_bios_size); + memory_region_add_subregion_overlap(rom_memory, + 0x100000 - isa_bios_size, +- isa_bios, ++ &x86ms->isa_bios, + 1); +- memory_region_set_readonly(isa_bios, !isapc_ram_fw); ++ memory_region_set_readonly(&x86ms->isa_bios, !isapc_ram_fw); + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index cb07618d19..a07de79167 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -18,6 +18,7 @@ + #define HW_I386_X86_H + + #include "exec/hwaddr.h" ++#include "exec/memory.h" + + #include "hw/boards.h" + #include "hw/intc/ioapic.h" +@@ -52,6 +53,12 @@ struct X86MachineState { + GMappedFile *initrd_mapped_file; + HotplugHandler *acpi_dev; + ++ /* ++ * Map the upper 128 KiB of the BIOS just underneath the 1 MiB address ++ * boundary. ++ */ ++ MemoryRegion isa_bios; ++ + /* RAM information (sizes, addresses, configuration): */ + ram_addr_t below_4g_mem_size, above_4g_mem_size; + +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch b/SOURCES/kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch new file mode 100644 index 0000000..7a61f95 --- /dev/null +++ b/SOURCES/kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch @@ -0,0 +1,105 @@ +From e1f2265b5f6bf5b63bf3808bb540888f3cf8badb Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Wed, 8 May 2024 19:55:05 +0200 +Subject: [PATCH 041/100] hw/i386/x86: Don't leak "pc.bios" memory region +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [41/91] a9cd61d8d240134c09c46e244efb89217cadf60c (bonzini/rhel-qemu-kvm) + +Fix the leaking in x86_bios_rom_init() by adding a "bios" attribute to +X86MachineState. Note that it is only used in the -bios case. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Bernhard Beschow +Message-ID: <20240508175507.22270-5-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 865d95321ffc8d9941e33000b10140550f094556) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 13 ++++++------- + include/hw/i386/x86.h | 6 ++++++ + 2 files changed, 12 insertions(+), 7 deletions(-) + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 457e8a34a5..29167de97d 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1133,7 +1133,6 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + { + const char *bios_name; + char *filename; +- MemoryRegion *bios; + int bios_size, isa_bios_size; + ssize_t ret; + +@@ -1149,8 +1148,8 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + (bios_size % 65536) != 0) { + goto bios_error; + } +- bios = g_malloc(sizeof(*bios)); +- memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal); ++ memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, ++ &error_fatal); + if (sev_enabled()) { + /* + * The concept of a "reset" simply doesn't exist for +@@ -1159,11 +1158,11 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + * the firmware as rom to properly re-initialize on reset. + * Just go for a straight file load instead. + */ +- void *ptr = memory_region_get_ram_ptr(bios); ++ void *ptr = memory_region_get_ram_ptr(&x86ms->bios); + load_image_size(filename, ptr, bios_size); + x86_firmware_configure(ptr, bios_size); + } else { +- memory_region_set_readonly(bios, !isapc_ram_fw); ++ memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); + ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); + if (ret != 0) { + goto bios_error; +@@ -1173,7 +1172,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + + /* map the last 128KB of the BIOS in ISA space */ + isa_bios_size = MIN(bios_size, 128 * KiB); +- memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", bios, ++ memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", &x86ms->bios, + bios_size - isa_bios_size, isa_bios_size); + memory_region_add_subregion_overlap(rom_memory, + 0x100000 - isa_bios_size, +@@ -1184,7 +1183,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, + (uint32_t)(-bios_size), +- bios); ++ &x86ms->bios); + return; + + bios_error: +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index a07de79167..55c6809ae0 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -53,6 +53,12 @@ struct X86MachineState { + GMappedFile *initrd_mapped_file; + HotplugHandler *acpi_dev; + ++ /* ++ * Map the whole BIOS just underneath the 4 GiB address boundary. Only used ++ * in the ROM (-bios) case. ++ */ ++ MemoryRegion bios; ++ + /* + * Map the upper 128 KiB of the BIOS just underneath the 1 MiB address + * boundary. +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch b/SOURCES/kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch new file mode 100644 index 0000000..b9c18e7 --- /dev/null +++ b/SOURCES/kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch @@ -0,0 +1,69 @@ +From b9d0c78f04160fbc1eee6cfd94b17f1133a35d83 Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Tue, 30 Apr 2024 17:06:38 +0200 +Subject: [PATCH 037/100] hw/i386/x86: Eliminate two if statements in + x86_bios_rom_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [37/91] 1ef6a13214e85f6ef773f5c894c720f20330912b (bonzini/rhel-qemu-kvm) + +Given that memory_region_set_readonly() is a no-op when the readonlyness is +already as requested it is possible to simplify the pattern + + if (condition) { + foo(true); + } + +to + + foo(condition); + +which is shorter and allows to see the invariant of the code more easily. + +Signed-off-by: Bernhard Beschow +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240430150643.111976-2-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 014dbdac8798799d081abc9dff3e4876ca54f49e) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 3d5b51e92d..2a4f3ee285 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1163,9 +1163,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, + load_image_size(filename, ptr, bios_size); + x86_firmware_configure(ptr, bios_size); + } else { +- if (!isapc_ram_fw) { +- memory_region_set_readonly(bios, true); +- } ++ memory_region_set_readonly(bios, !isapc_ram_fw); + ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); + if (ret != 0) { + goto bios_error; +@@ -1182,9 +1180,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, + 0x100000 - isa_bios_size, + isa_bios, + 1); +- if (!isapc_ram_fw) { +- memory_region_set_readonly(isa_bios, true); +- } ++ memory_region_set_readonly(isa_bios, !isapc_ram_fw); + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch b/SOURCES/kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch new file mode 100644 index 0000000..6ce9c72 --- /dev/null +++ b/SOURCES/kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch @@ -0,0 +1,98 @@ +From 1baf67564d4227d6ba98923217a15814c438c32b Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Wed, 8 May 2024 19:55:06 +0200 +Subject: [PATCH 042/100] hw/i386/x86: Extract x86_isa_bios_init() from + x86_bios_rom_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [42/91] 1db417a5995480924f7fd0661a306f2d2bfa0a77 (bonzini/rhel-qemu-kvm) + +The function is inspired by pc_isa_bios_init() and should eventually replace it. +Using x86_isa_bios_init() rather than pc_isa_bios_init() fixes pflash commands +to work in the isa-bios region. + +While at it convert the magic number 0x100000 (== 1MiB) to increase readability. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Bernhard Beschow +Message-ID: <20240508175507.22270-6-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 5c5ffec12c30d2017cbdee6798f54d8fad3f9656) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 25 ++++++++++++++++--------- + include/hw/i386/x86.h | 2 ++ + 2 files changed, 18 insertions(+), 9 deletions(-) + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 29167de97d..c61f4ebfa6 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1128,12 +1128,25 @@ void x86_load_linux(X86MachineState *x86ms, + nb_option_roms++; + } + ++void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, ++ MemoryRegion *bios, bool read_only) ++{ ++ uint64_t bios_size = memory_region_size(bios); ++ uint64_t isa_bios_size = MIN(bios_size, 128 * KiB); ++ ++ memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, ++ bios_size - isa_bios_size, isa_bios_size); ++ memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size, ++ isa_bios, 1); ++ memory_region_set_readonly(isa_bios, read_only); ++} ++ + void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + MemoryRegion *rom_memory, bool isapc_ram_fw) + { + const char *bios_name; + char *filename; +- int bios_size, isa_bios_size; ++ int bios_size; + ssize_t ret; + + /* BIOS load */ +@@ -1171,14 +1184,8 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + g_free(filename); + + /* map the last 128KB of the BIOS in ISA space */ +- isa_bios_size = MIN(bios_size, 128 * KiB); +- memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", &x86ms->bios, +- bios_size - isa_bios_size, isa_bios_size); +- memory_region_add_subregion_overlap(rom_memory, +- 0x100000 - isa_bios_size, +- &x86ms->isa_bios, +- 1); +- memory_region_set_readonly(&x86ms->isa_bios, !isapc_ram_fw); ++ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, ++ !isapc_ram_fw); + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index 55c6809ae0..d7b7d3f3ce 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -129,6 +129,8 @@ void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, + void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); + ++void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, ++ MemoryRegion *bios, bool read_only); + void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + MemoryRegion *rom_memory, bool isapc_ram_fw); + +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch b/SOURCES/kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch deleted file mode 100644 index f850765..0000000 --- a/SOURCES/kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 84f378c41832602dcf9bad6167b1f532c7c53e37 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 21 Nov 2023 15:03:55 +0100 -Subject: [PATCH 048/101] hw/ppc/Kconfig: Imply VFIO_PCI -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [47/67] c1a40cdab9bf62b16cb428d57a20b3e0eaa6de38 (eauger1/centos-qemu-kvm) - -When the legacy and iommufd backends were introduced, a set of common -vfio-pci routines were exported in pci.c for both backends to use : - - vfio_pci_pre_reset - vfio_pci_get_pci_hot_reset_info - vfio_pci_host_match - vfio_pci_post_reset - -This introduced a build failure on PPC when --without-default-devices -is use because VFIO is always selected in ppc/Kconfig but VFIO_PCI is -not. - -Use an 'imply VFIO_PCI' in ppc/Kconfig and bypass compilation of the -VFIO EEH hooks routines defined in hw/ppc/spapr_pci_vfio.c with -CONFIG_VFIO_PCI. - -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Zhenzhong Duan -Signed-off-by: Cédric Le Goater -(cherry picked from commit 4278df9d1d2383b738338c857406357660f11e42) -Signed-off-by: Eric Auger ---- - hw/ppc/Kconfig | 2 +- - hw/ppc/spapr_pci_vfio.c | 36 ++++++++++++++++++++++++++++++++++++ - 2 files changed, 37 insertions(+), 1 deletion(-) - -diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig -index 56f0475a8e..44263a58c4 100644 ---- a/hw/ppc/Kconfig -+++ b/hw/ppc/Kconfig -@@ -3,11 +3,11 @@ config PSERIES - imply PCI_DEVICES - imply TEST_DEVICES - imply VIRTIO_VGA -+ imply VFIO_PCI if LINUX # needed by spapr_pci_vfio.c - select NVDIMM - select DIMM - select PCI - select SPAPR_VSCSI -- select VFIO if LINUX # needed by spapr_pci_vfio.c - select XICS - select XIVE - select MSI_NONBROKEN -diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c -index d1d07bec46..76b2a3487b 100644 ---- a/hw/ppc/spapr_pci_vfio.c -+++ b/hw/ppc/spapr_pci_vfio.c -@@ -26,10 +26,12 @@ - #include "hw/pci/pci_device.h" - #include "hw/vfio/vfio-common.h" - #include "qemu/error-report.h" -+#include CONFIG_DEVICES /* CONFIG_VFIO_PCI */ - - /* - * Interfaces for IBM EEH (Enhanced Error Handling) - */ -+#ifdef CONFIG_VFIO_PCI - static bool vfio_eeh_container_ok(VFIOContainer *container) - { - /* -@@ -314,3 +316,37 @@ int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb) - - return RTAS_OUT_SUCCESS; - } -+ -+#else -+ -+bool spapr_phb_eeh_available(SpaprPhbState *sphb) -+{ -+ return false; -+} -+ -+void spapr_phb_vfio_reset(DeviceState *qdev) -+{ -+} -+ -+int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, -+ unsigned int addr, int option) -+{ -+ return RTAS_OUT_NOT_SUPPORTED; -+} -+ -+int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state) -+{ -+ return RTAS_OUT_NOT_SUPPORTED; -+} -+ -+int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option) -+{ -+ return RTAS_OUT_NOT_SUPPORTED; -+} -+ -+int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb) -+{ -+ return RTAS_OUT_NOT_SUPPORTED; -+} -+ -+#endif /* CONFIG_VFIO_PCI */ --- -2.39.3 - diff --git a/SOURCES/kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch b/SOURCES/kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch deleted file mode 100644 index 2c7f6ff..0000000 --- a/SOURCES/kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 8f27893a37e55a31180bb66cd9eae7199911881b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Volker=20R=C3=BCmelin?= -Date: Fri, 29 Dec 2023 21:38:54 +0100 -Subject: [PATCH 060/101] hw/vfio: fix iteration over global VFIODevice list -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [59/67] f926e1233c8c5ad418e8794b1a103371c9dc5eb0 (eauger1/centos-qemu-kvm) - -Commit 3d779abafe ("vfio/common: Introduce a global VFIODevice list") -introduced a global VFIODevice list, but forgot to update the list -element field name when iterating over the new list. Change the code -to use the correct list element field. - -Fixes: 3d779abafe ("vfio/common: Introduce a global VFIODevice list") -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2061 -Signed-off-by: Volker Rümelin -Reviewed-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Auger -(cherry picked from commit 9353b6da430f90e47f352dbf6dc31120c8914da6) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 0d4d8b8416..0b3352f2a9 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -73,7 +73,7 @@ bool vfio_mig_active(void) - return false; - } - -- QLIST_FOREACH(vbasedev, &vfio_device_list, next) { -+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) { - if (vbasedev->migration_blocker) { - return false; - } -@@ -94,7 +94,7 @@ static bool vfio_multiple_devices_migration_is_supported(void) - unsigned int device_num = 0; - bool all_support_p2p = true; - -- QLIST_FOREACH(vbasedev, &vfio_device_list, next) { -+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) { - if (vbasedev->migration) { - device_num++; - -@@ -1366,13 +1366,13 @@ void vfio_reset_handler(void *opaque) - { - VFIODevice *vbasedev; - -- QLIST_FOREACH(vbasedev, &vfio_device_list, next) { -+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) { - if (vbasedev->dev->realized) { - vbasedev->ops->vfio_compute_needs_reset(vbasedev); - } - } - -- QLIST_FOREACH(vbasedev, &vfio_device_list, next) { -+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) { - if (vbasedev->dev->realized && vbasedev->needs_reset) { - vbasedev->ops->vfio_hot_reset_multi(vbasedev); - } --- -2.39.3 - diff --git a/SOURCES/kvm-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch b/SOURCES/kvm-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch new file mode 100644 index 0000000..7b2e1b6 --- /dev/null +++ b/SOURCES/kvm-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch @@ -0,0 +1,108 @@ +From c554f8768a18ceba173aedbd582c1cae43a41e2c Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 18 Jun 2024 14:19:58 +0200 +Subject: [PATCH 1/2] hw/virtio: Fix the de-initialization of vhost-user + devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 255: hw/virtio: Fix the de-initialization of vhost-user devices +RH-Jira: RHEL-40708 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] c7815a249ec135993f45934cab1c1f2c038b80ea (thuth/qemu-kvm-cs9) + +JIRA: https://issues.redhat.com/browse/RHEL-40708 + +The unrealize functions of the various vhost-user devices are +calling the corresponding vhost_*_set_status() functions with a +status of 0 to shut down the device correctly. + +Now these vhost_*_set_status() functions all follow this scheme: + + bool should_start = virtio_device_should_start(vdev, status); + + if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) { + return; + } + + if (should_start) { + /* ... do the initialization stuff ... */ + } else { + /* ... do the cleanup stuff ... */ + } + +The problem here is virtio_device_should_start(vdev, 0) currently +always returns "true" since it internally only looks at vdev->started +instead of looking at the "status" parameter. Thus once the device +got started once, virtio_device_should_start() always returns true +and thus the vhost_*_set_status() functions return early, without +ever doing any clean-up when being called with status == 0. This +causes e.g. problems when trying to hot-plug and hot-unplug a vhost +user devices multiple times since the de-initialization step is +completely skipped during the unplug operation. + +This bug has been introduced in commit 9f6bcfd99f ("hw/virtio: move +vm_running check to virtio_device_started") which replaced + + should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + +with + + should_start = virtio_device_started(vdev, status); + +which later got replaced by virtio_device_should_start(). This blocked +the possibility to set should_start to false in case the status flag +VIRTIO_CONFIG_S_DRIVER_OK was not set. + +Fix it by adjusting the virtio_device_should_start() function to +only consider the status flag instead of vdev->started. Since this +function is only used in the various vhost_*_set_status() functions +for exactly the same purpose, it should be fine to fix it in this +central place there without any risk to change the behavior of other +code. + +Fixes: 9f6bcfd99f ("hw/virtio: move vm_running check to virtio_device_started") +Buglink: https://issues.redhat.com/browse/RHEL-40708 +Signed-off-by: Thomas Huth +Message-Id: <20240618121958.88673-1-thuth@redhat.com> +Reviewed-by: Manos Pitsidianakis +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit d72479b11797c28893e1e3fc565497a9cae5ca16) +Signed-off-by: Thomas Huth +--- + include/hw/virtio/virtio.h | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index 7d5ffdc145..2eafad17b8 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -470,9 +470,9 @@ static inline bool virtio_device_started(VirtIODevice *vdev, uint8_t status) + * @vdev - the VirtIO device + * @status - the devices status bits + * +- * This is similar to virtio_device_started() but also encapsulates a +- * check on the VM status which would prevent a device starting +- * anyway. ++ * This is similar to virtio_device_started() but ignores vdev->started ++ * and also encapsulates a check on the VM status which would prevent a ++ * device from starting anyway. + */ + static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status) + { +@@ -480,7 +480,7 @@ static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status + return false; + } + +- return virtio_device_started(vdev, status); ++ return status & VIRTIO_CONFIG_S_DRIVER_OK; + } + + static inline void virtio_set_started(VirtIODevice *vdev, bool started) +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch b/SOURCES/kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch new file mode 100644 index 0000000..8f69f9e --- /dev/null +++ b/SOURCES/kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch @@ -0,0 +1,68 @@ +From f572a40924c7138072e387111d0f092185972477 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 9 May 2024 19:00:39 +0200 +Subject: [PATCH 044/100] i386: correctly select code in hw/i386 that depends + on other components + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [44/91] 1327a5eb2b91edacf56cc4e93255cad456abbbeb (bonzini/rhel-qemu-kvm) + +fw_cfg.c and vapic.c are currently included unconditionally but +depend on other components. vapic.c depends on the local APIC, +while fw_cfg.c includes a piece of AML builder code that depends +on CONFIG_ACPI. + +Signed-off-by: Paolo Bonzini +Reviewed-by: Zhao Liu +Message-ID: <20240509170044.190795-9-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 7974e51342775c87f6e759a8c525db1045ddfa24) +Signed-off-by: Paolo Bonzini +--- + hw/i386/fw_cfg.c | 2 ++ + hw/i386/meson.build | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index 283c3f4c16..7f97d40616 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -204,6 +204,7 @@ void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg) + fw_cfg_add_file(fw_cfg, "etc/msr_feature_control", val, sizeof(*val)); + } + ++#ifdef CONFIG_ACPI + void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg) + { + /* +@@ -230,3 +231,4 @@ void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg) + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); + } ++#endif +diff --git a/hw/i386/meson.build b/hw/i386/meson.build +index d8b70ef3e9..d9da676038 100644 +--- a/hw/i386/meson.build ++++ b/hw/i386/meson.build +@@ -1,12 +1,12 @@ + i386_ss = ss.source_set() + i386_ss.add(files( + 'fw_cfg.c', +- 'vapic.c', + 'e820_memory_layout.c', + 'multiboot.c', + 'x86.c', + )) + ++i386_ss.add(when: 'CONFIG_APIC', if_true: files('vapic.c')) + i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'), + if_false: files('x86-iommu-stub.c')) + i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'), +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch b/SOURCES/kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch new file mode 100644 index 0000000..31a7e92 --- /dev/null +++ b/SOURCES/kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch @@ -0,0 +1,40 @@ +From 127f3c60668e1bd08ec00856a317cb841adf0440 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:23 -0500 +Subject: [PATCH 063/100] i386/cpu: Set SEV-SNP CPUID bit when SNP enabled + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [63/91] 0f834a6897c5cdc0e29a5b1862e621f8ce309657 (bonzini/rhel-qemu-kvm) + +SNP guests will rely on this bit to determine certain feature support. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-12-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 7831221941cccbde922412c1550ed8b4bce7c361) +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 489c853b42..13737cd703 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6822,6 +6822,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + if (sev_enabled()) { + *eax = 0x2; + *eax |= sev_es_enabled() ? 0x8 : 0; ++ *eax |= sev_snp_enabled() ? 0x10 : 0; + *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ + *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ + } +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch b/SOURCES/kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch new file mode 100644 index 0000000..fd604d2 --- /dev/null +++ b/SOURCES/kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch @@ -0,0 +1,145 @@ +From 14aa42bbacde75b2ce9a59d1267f73d613026461 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:42 -0500 +Subject: [PATCH 076/100] i386/kvm: Add KVM_EXIT_HYPERCALL handling for + KVM_HC_MAP_GPA_RANGE + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [76/91] 3e1201c330dc826af1ec4650974d47053270eb16 (bonzini/rhel-qemu-kvm) + +KVM_HC_MAP_GPA_RANGE will be used to send requests to userspace for +private/shared memory attribute updates requested by the guest. +Implement handling for that use-case along with some basic +infrastructure for enabling specific hypercall events. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-31-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 47e76d03b155e43beca550251a6eb7ea926c059f) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 55 ++++++++++++++++++++++++++++++++++++ + target/i386/kvm/kvm_i386.h | 1 + + target/i386/kvm/trace-events | 1 + + 3 files changed, 57 insertions(+) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 75e75d9772..2935e3931a 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -21,6 +21,7 @@ + #include + + #include ++#include + #include "standard-headers/asm-x86/kvm_para.h" + #include "hw/xen/interface/arch-x86/cpuid.h" + +@@ -208,6 +209,13 @@ int kvm_get_vm_type(MachineState *ms) + return kvm_type; + } + ++bool kvm_enable_hypercall(uint64_t enable_mask) ++{ ++ KVMState *s = KVM_STATE(current_accel()); ++ ++ return !kvm_vm_enable_cap(s, KVM_CAP_EXIT_HYPERCALL, 0, enable_mask); ++} ++ + bool kvm_has_smm(void) + { + return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM); +@@ -5325,6 +5333,50 @@ static bool host_supports_vmx(void) + return ecx & CPUID_EXT_VMX; + } + ++/* ++ * Currently the handling here only supports use of KVM_HC_MAP_GPA_RANGE ++ * to service guest-initiated memory attribute update requests so that ++ * KVM_SET_MEMORY_ATTRIBUTES can update whether or not a page should be ++ * backed by the private memory pool provided by guest_memfd, and as such ++ * is only applicable to guest_memfd-backed guests (e.g. SNP/TDX). ++ * ++ * Other other use-cases for KVM_HC_MAP_GPA_RANGE, such as for SEV live ++ * migration, are not implemented here currently. ++ * ++ * For the guest_memfd use-case, these exits will generally be synthesized ++ * by KVM based on platform-specific hypercalls, like GHCB requests in the ++ * case of SEV-SNP, and not issued directly within the guest though the ++ * KVM_HC_MAP_GPA_RANGE hypercall. So in this case, KVM_HC_MAP_GPA_RANGE is ++ * not actually advertised to guests via the KVM CPUID feature bit, as ++ * opposed to SEV live migration where it would be. Since it is unlikely the ++ * SEV live migration use-case would be useful for guest-memfd backed guests, ++ * because private/shared page tracking is already provided through other ++ * means, these 2 use-cases should be treated as being mutually-exclusive. ++ */ ++static int kvm_handle_hc_map_gpa_range(struct kvm_run *run) ++{ ++ uint64_t gpa, size, attributes; ++ ++ if (!machine_require_guest_memfd(current_machine)) ++ return -EINVAL; ++ ++ gpa = run->hypercall.args[0]; ++ size = run->hypercall.args[1] * TARGET_PAGE_SIZE; ++ attributes = run->hypercall.args[2]; ++ ++ trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags); ++ ++ return kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED); ++} ++ ++static int kvm_handle_hypercall(struct kvm_run *run) ++{ ++ if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE) ++ return kvm_handle_hc_map_gpa_range(run); ++ ++ return -EINVAL; ++} ++ + #define VMX_INVALID_GUEST_STATE 0x80000021 + + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) +@@ -5420,6 +5472,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + ret = kvm_xen_handle_exit(cpu, &run->xen); + break; + #endif ++ case KVM_EXIT_HYPERCALL: ++ ret = kvm_handle_hypercall(run); ++ break; + default: + fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); + ret = -1; +diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h +index 6b44844d95..34fc60774b 100644 +--- a/target/i386/kvm/kvm_i386.h ++++ b/target/i386/kvm/kvm_i386.h +@@ -33,6 +33,7 @@ + bool kvm_has_smm(void); + bool kvm_enable_x2apic(void); + bool kvm_hv_vpindex_settable(void); ++bool kvm_enable_hypercall(uint64_t enable_mask); + + bool kvm_enable_sgx_provisioning(KVMState *s); + bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); +diff --git a/target/i386/kvm/trace-events b/target/i386/kvm/trace-events +index b365a8e8e2..74a6234ff7 100644 +--- a/target/i386/kvm/trace-events ++++ b/target/i386/kvm/trace-events +@@ -5,6 +5,7 @@ kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %" + kvm_x86_add_msi_route(int virq) "Adding route entry for virq %d" + kvm_x86_remove_msi_route(int virq) "Removing route entry for virq %d" + kvm_x86_update_msi_routes(int num) "Updated %d MSI routes" ++kvm_hc_map_gpa_range(uint64_t gpa, uint64_t size, uint64_t attributes, uint64_t flags) "gpa 0x%" PRIx64 " size 0x%" PRIx64 " attributes 0x%" PRIx64 " flags 0x%" PRIx64 + + # xen-emu.c + kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIx64 +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch b/SOURCES/kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch new file mode 100644 index 0000000..4b91e93 --- /dev/null +++ b/SOURCES/kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch @@ -0,0 +1,536 @@ +From 5ead79f45e8e90b7a04586c89e70cb9d0b66b730 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 29 Feb 2024 01:36:43 -0500 +Subject: [PATCH 004/100] i386/kvm: Move architectural CPUID leaf generation to + separate helper + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [4/91] 06ecdbcf05ad3d658273980b114f02477d0b0475 (bonzini/rhel-qemu-kvm) + +Move the architectural (for lack of a better term) CPUID leaf generation +to a separate helper so that the generation code can be reused by TDX, +which needs to generate a canonical VM-scoped configuration. + +For now this is just a cleanup, so keep the function static. + +Signed-off-by: Sean Christopherson +Signed-off-by: Xiaoyao Li +Message-ID: <20240229063726.610065-23-xiaoyao.li@intel.com> +Reviewed-by: Xiaoyao Li +Signed-off-by: Paolo Bonzini +(cherry picked from commit a5acf4f26c208a05d05ef1bde65553ce2ab5e5d0) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 417 +++++++++++++++++++++--------------------- + 1 file changed, 211 insertions(+), 206 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 739f33db47..5f30b649a0 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -1706,195 +1706,22 @@ static void kvm_init_nested_state(CPUX86State *env) + } + } + +-int kvm_arch_init_vcpu(CPUState *cs) ++static uint32_t kvm_x86_build_cpuid(CPUX86State *env, ++ struct kvm_cpuid_entry2 *entries, ++ uint32_t cpuid_i) + { +- struct { +- struct kvm_cpuid2 cpuid; +- struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; +- } cpuid_data; +- /* +- * The kernel defines these structs with padding fields so there +- * should be no extra padding in our cpuid_data struct. +- */ +- QEMU_BUILD_BUG_ON(sizeof(cpuid_data) != +- sizeof(struct kvm_cpuid2) + +- sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); +- +- X86CPU *cpu = X86_CPU(cs); +- CPUX86State *env = &cpu->env; +- uint32_t limit, i, j, cpuid_i; ++ uint32_t limit, i, j; + uint32_t unused; + struct kvm_cpuid_entry2 *c; +- uint32_t signature[3]; +- int kvm_base = KVM_CPUID_SIGNATURE; +- int max_nested_state_len; +- int r; +- Error *local_err = NULL; +- +- memset(&cpuid_data, 0, sizeof(cpuid_data)); +- +- cpuid_i = 0; +- +- has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); +- +- r = kvm_arch_set_tsc_khz(cs); +- if (r < 0) { +- return r; +- } +- +- /* vcpu's TSC frequency is either specified by user, or following +- * the value used by KVM if the former is not present. In the +- * latter case, we query it from KVM and record in env->tsc_khz, +- * so that vcpu's TSC frequency can be migrated later via this field. +- */ +- if (!env->tsc_khz) { +- r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? +- kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : +- -ENOTSUP; +- if (r > 0) { +- env->tsc_khz = r; +- } +- } +- +- env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY; +- +- /* +- * kvm_hyperv_expand_features() is called here for the second time in case +- * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle +- * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to +- * check which Hyper-V enlightenments are supported and which are not, we +- * can still proceed and check/expand Hyper-V enlightenments here so legacy +- * behavior is preserved. +- */ +- if (!kvm_hyperv_expand_features(cpu, &local_err)) { +- error_report_err(local_err); +- return -ENOSYS; +- } +- +- if (hyperv_enabled(cpu)) { +- r = hyperv_init_vcpu(cpu); +- if (r) { +- return r; +- } +- +- cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries); +- kvm_base = KVM_CPUID_SIGNATURE_NEXT; +- has_msr_hv_hypercall = true; +- } +- +- if (cs->kvm_state->xen_version) { +-#ifdef CONFIG_XEN_EMU +- struct kvm_cpuid_entry2 *xen_max_leaf; +- +- memcpy(signature, "XenVMMXenVMM", 12); +- +- xen_max_leaf = c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_SIGNATURE; +- c->eax = kvm_base + XEN_CPUID_TIME; +- c->ebx = signature[0]; +- c->ecx = signature[1]; +- c->edx = signature[2]; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_VENDOR; +- c->eax = cs->kvm_state->xen_version; +- c->ebx = 0; +- c->ecx = 0; +- c->edx = 0; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_HVM_MSR; +- /* Number of hypercall-transfer pages */ +- c->eax = 1; +- /* Hypercall MSR base address */ +- if (hyperv_enabled(cpu)) { +- c->ebx = XEN_HYPERCALL_MSR_HYPERV; +- kvm_xen_init(cs->kvm_state, c->ebx); +- } else { +- c->ebx = XEN_HYPERCALL_MSR; +- } +- c->ecx = 0; +- c->edx = 0; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_TIME; +- c->eax = ((!!tsc_is_stable_and_known(env) << 1) | +- (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2)); +- /* default=0 (emulate if necessary) */ +- c->ebx = 0; +- /* guest tsc frequency */ +- c->ecx = env->user_tsc_khz; +- /* guest tsc incarnation (migration count) */ +- c->edx = 0; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_HVM; +- xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM; +- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) { +- c->function = kvm_base + XEN_CPUID_HVM; +- +- if (cpu->xen_vapic) { +- c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT; +- c->eax |= XEN_HVM_CPUID_X2APIC_VIRT; +- } +- +- c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS; +- +- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) { +- c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT; +- c->ebx = cs->cpu_index; +- } +- +- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) { +- c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR; +- } +- } +- +- r = kvm_xen_init_vcpu(cs); +- if (r) { +- return r; +- } +- +- kvm_base += 0x100; +-#else /* CONFIG_XEN_EMU */ +- /* This should never happen as kvm_arch_init() would have died first. */ +- fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n"); +- abort(); +-#endif +- } else if (cpu->expose_kvm) { +- memcpy(signature, "KVMKVMKVM\0\0\0", 12); +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = KVM_CPUID_SIGNATURE | kvm_base; +- c->eax = KVM_CPUID_FEATURES | kvm_base; +- c->ebx = signature[0]; +- c->ecx = signature[1]; +- c->edx = signature[2]; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = KVM_CPUID_FEATURES | kvm_base; +- c->eax = env->features[FEAT_KVM]; +- c->edx = env->features[FEAT_KVM_HINTS]; +- } + + cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); + +- if (cpu->kvm_pv_enforce_cpuid) { +- r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1); +- if (r < 0) { +- fprintf(stderr, +- "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s", +- strerror(-r)); +- abort(); +- } +- } +- + for (i = 0; i <= limit; i++) { ++ j = 0; + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "unsupported level value: 0x%x\n", limit); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; +- ++ c = &entries[cpuid_i++]; + switch (i) { + case 2: { + /* Keep reading function 2 till all the input is received */ +@@ -1908,11 +1735,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + + for (j = 1; j < times; ++j) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:2):eax & 0xf = 0x%x\n", times); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + c->function = i; + c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC; + cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); +@@ -1951,11 +1776,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + continue; + } + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + } + break; + case 0x12: +@@ -1970,11 +1793,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + } + + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:0x12,ecx:0x%x)\n", j); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + } + break; + case 0x7: +@@ -1991,11 +1812,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + + for (j = 1; j <= times; ++j) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + c->function = i; + c->index = j; + c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; +@@ -2048,11 +1867,11 @@ int kvm_arch_init_vcpu(CPUState *cs) + cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused); + + for (i = 0x80000000; i <= limit; i++) { ++ j = 0; + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + + switch (i) { + case 0x8000001d: +@@ -2067,11 +1886,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + break; + } + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + } + break; + default: +@@ -2094,11 +1911,11 @@ int kvm_arch_init_vcpu(CPUState *cs) + cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused); + + for (i = 0xC0000000; i <= limit; i++) { ++ j = 0; + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + + c->function = i; + c->flags = 0; +@@ -2106,6 +1923,194 @@ int kvm_arch_init_vcpu(CPUState *cs) + } + } + ++ return cpuid_i; ++ ++full: ++ fprintf(stderr, "cpuid_data is full, no space for " ++ "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); ++ abort(); ++} ++ ++int kvm_arch_init_vcpu(CPUState *cs) ++{ ++ struct { ++ struct kvm_cpuid2 cpuid; ++ struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; ++ } cpuid_data; ++ /* ++ * The kernel defines these structs with padding fields so there ++ * should be no extra padding in our cpuid_data struct. ++ */ ++ QEMU_BUILD_BUG_ON(sizeof(cpuid_data) != ++ sizeof(struct kvm_cpuid2) + ++ sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); ++ ++ X86CPU *cpu = X86_CPU(cs); ++ CPUX86State *env = &cpu->env; ++ uint32_t cpuid_i; ++ struct kvm_cpuid_entry2 *c; ++ uint32_t signature[3]; ++ int kvm_base = KVM_CPUID_SIGNATURE; ++ int max_nested_state_len; ++ int r; ++ Error *local_err = NULL; ++ ++ memset(&cpuid_data, 0, sizeof(cpuid_data)); ++ ++ cpuid_i = 0; ++ ++ has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); ++ ++ r = kvm_arch_set_tsc_khz(cs); ++ if (r < 0) { ++ return r; ++ } ++ ++ /* vcpu's TSC frequency is either specified by user, or following ++ * the value used by KVM if the former is not present. In the ++ * latter case, we query it from KVM and record in env->tsc_khz, ++ * so that vcpu's TSC frequency can be migrated later via this field. ++ */ ++ if (!env->tsc_khz) { ++ r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? ++ kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : ++ -ENOTSUP; ++ if (r > 0) { ++ env->tsc_khz = r; ++ } ++ } ++ ++ env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY; ++ ++ /* ++ * kvm_hyperv_expand_features() is called here for the second time in case ++ * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle ++ * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to ++ * check which Hyper-V enlightenments are supported and which are not, we ++ * can still proceed and check/expand Hyper-V enlightenments here so legacy ++ * behavior is preserved. ++ */ ++ if (!kvm_hyperv_expand_features(cpu, &local_err)) { ++ error_report_err(local_err); ++ return -ENOSYS; ++ } ++ ++ if (hyperv_enabled(cpu)) { ++ r = hyperv_init_vcpu(cpu); ++ if (r) { ++ return r; ++ } ++ ++ cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries); ++ kvm_base = KVM_CPUID_SIGNATURE_NEXT; ++ has_msr_hv_hypercall = true; ++ } ++ ++ if (cs->kvm_state->xen_version) { ++#ifdef CONFIG_XEN_EMU ++ struct kvm_cpuid_entry2 *xen_max_leaf; ++ ++ memcpy(signature, "XenVMMXenVMM", 12); ++ ++ xen_max_leaf = c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_SIGNATURE; ++ c->eax = kvm_base + XEN_CPUID_TIME; ++ c->ebx = signature[0]; ++ c->ecx = signature[1]; ++ c->edx = signature[2]; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_VENDOR; ++ c->eax = cs->kvm_state->xen_version; ++ c->ebx = 0; ++ c->ecx = 0; ++ c->edx = 0; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_HVM_MSR; ++ /* Number of hypercall-transfer pages */ ++ c->eax = 1; ++ /* Hypercall MSR base address */ ++ if (hyperv_enabled(cpu)) { ++ c->ebx = XEN_HYPERCALL_MSR_HYPERV; ++ kvm_xen_init(cs->kvm_state, c->ebx); ++ } else { ++ c->ebx = XEN_HYPERCALL_MSR; ++ } ++ c->ecx = 0; ++ c->edx = 0; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_TIME; ++ c->eax = ((!!tsc_is_stable_and_known(env) << 1) | ++ (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2)); ++ /* default=0 (emulate if necessary) */ ++ c->ebx = 0; ++ /* guest tsc frequency */ ++ c->ecx = env->user_tsc_khz; ++ /* guest tsc incarnation (migration count) */ ++ c->edx = 0; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_HVM; ++ xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM; ++ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) { ++ c->function = kvm_base + XEN_CPUID_HVM; ++ ++ if (cpu->xen_vapic) { ++ c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT; ++ c->eax |= XEN_HVM_CPUID_X2APIC_VIRT; ++ } ++ ++ c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS; ++ ++ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) { ++ c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT; ++ c->ebx = cs->cpu_index; ++ } ++ ++ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) { ++ c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR; ++ } ++ } ++ ++ r = kvm_xen_init_vcpu(cs); ++ if (r) { ++ return r; ++ } ++ ++ kvm_base += 0x100; ++#else /* CONFIG_XEN_EMU */ ++ /* This should never happen as kvm_arch_init() would have died first. */ ++ fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n"); ++ abort(); ++#endif ++ } else if (cpu->expose_kvm) { ++ memcpy(signature, "KVMKVMKVM\0\0\0", 12); ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = KVM_CPUID_SIGNATURE | kvm_base; ++ c->eax = KVM_CPUID_FEATURES | kvm_base; ++ c->ebx = signature[0]; ++ c->ecx = signature[1]; ++ c->edx = signature[2]; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = KVM_CPUID_FEATURES | kvm_base; ++ c->eax = env->features[FEAT_KVM]; ++ c->edx = env->features[FEAT_KVM_HINTS]; ++ } ++ ++ if (cpu->kvm_pv_enforce_cpuid) { ++ r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1); ++ if (r < 0) { ++ fprintf(stderr, ++ "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s", ++ strerror(-r)); ++ abort(); ++ } ++ } ++ ++ cpuid_i = kvm_x86_build_cpuid(env, cpuid_data.entries, cpuid_i); + cpuid_data.cpuid.nent = cpuid_i; + + if (((env->cpuid_version >> 8)&0xF) >= 6 +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch b/SOURCES/kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch new file mode 100644 index 0000000..65d09ab --- /dev/null +++ b/SOURCES/kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch @@ -0,0 +1,91 @@ +From 03e275023b482ac79b4f92ca4ceef6de3caa634f Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 9 May 2024 19:00:40 +0200 +Subject: [PATCH 045/100] i386: pc: remove unnecessary MachineClass overrides + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [45/91] c03d5b57014d0d02f6ce0cdfb19a34996d100dea (bonzini/rhel-qemu-kvm) + +There is no need to override these fields of MachineClass because they are +already set to the right value in the superclass. + +Signed-off-by: Paolo Bonzini +Reviewed-by: Zhao Liu +Message-ID: <20240509170044.190795-10-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit b348fdcdac9f9fc70be9ae56c54e41765e9aae24) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 3 --- + hw/i386/x86.c | 6 +++--- + include/hw/i386/x86.h | 4 ---- + 3 files changed, 3 insertions(+), 10 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 660a59c63b..0aca0cc79e 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1979,9 +1979,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + mc->async_pf_vmexit_disable = false; + mc->get_hotplug_handler = pc_get_hotplug_handler; + mc->hotplug_allowed = pc_hotplug_allowed; +- mc->cpu_index_to_instance_props = x86_cpu_index_to_props; +- mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; +- mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; + mc->has_hotpluggable_cpus = true; +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index c61f4ebfa6..fcef652c1e 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -443,7 +443,7 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, + numa_cpu_pre_plug(cpu_slot, dev, errp); + } + +-CpuInstanceProperties ++static CpuInstanceProperties + x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) + { + MachineClass *mc = MACHINE_GET_CLASS(ms); +@@ -453,7 +453,7 @@ x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) + return possible_cpus->cpus[cpu_index].props; + } + +-int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) ++static int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) + { + X86CPUTopoIDs topo_ids; + X86MachineState *x86ms = X86_MACHINE(ms); +@@ -467,7 +467,7 @@ int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) + return topo_ids.pkg_id % ms->numa_state->num_nodes; + } + +-const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) ++static const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) + { + X86MachineState *x86ms = X86_MACHINE(ms); + unsigned int max_cpus = ms->smp.max_cpus; +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index d7b7d3f3ce..c2062db13f 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -114,10 +114,6 @@ uint32_t x86_cpu_apic_id_from_index(X86MachineState *pcms, + + void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, Error **errp); + void x86_cpus_init(X86MachineState *pcms, int default_cpu_version); +-CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms, +- unsigned cpu_index); +-int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx); +-const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms); + CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx); + void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count); + void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch b/SOURCES/kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch new file mode 100644 index 0000000..fce51aa --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch @@ -0,0 +1,116 @@ +From 652793962000d6906e219ceae36348a476b78c28 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 31 May 2024 12:44:44 +0200 +Subject: [PATCH 065/100] i386/sev: Add a class method to determine KVM VM type + for SNP guests + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [65/91] c6cbeac0a6f691138df212b80efaa9b1143fdaa8 (bonzini/rhel-qemu-kvm) + +SEV guests can use either KVM_X86_DEFAULT_VM, KVM_X86_SEV_VM, +or KVM_X86_SEV_ES_VM depending on the configuration and what +the host kernel supports. SNP guests on the other hand can only +ever use KVM_X86_SNP_VM, so split determination of VM type out +into a separate class method that can be set accordingly for +sev-guest vs. sev-snp-guest objects and add handling for SNP. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-14-pankaj.gupta@amd.com> +[Remove unnecessary function pointer declaration. - Paolo] +Signed-off-by: Paolo Bonzini +(cherry picked from commit a808132f6d8e855bd83a400570ec91d2e00bebe3) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 1 + + target/i386/sev.c | 15 ++++++++++++--- + 2 files changed, 13 insertions(+), 3 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 408568d053..75e75d9772 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -166,6 +166,7 @@ static const char *vm_type_name[] = { + [KVM_X86_DEFAULT_VM] = "default", + [KVM_X86_SEV_VM] = "SEV", + [KVM_X86_SEV_ES_VM] = "SEV-ES", ++ [KVM_X86_SNP_VM] = "SEV-SNP", + }; + + bool kvm_is_vm_type_supported(int type) +diff --git a/target/i386/sev.c b/target/i386/sev.c +index c3daaf1ad5..072cc4f853 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -885,6 +885,11 @@ out: + return sev_common->kvm_type; + } + ++static int sev_snp_kvm_type(X86ConfidentialGuest *cg) ++{ ++ return KVM_X86_SNP_VM; ++} ++ + static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + char *devname; +@@ -894,6 +899,8 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + struct sev_user_data_status status = {}; + SevCommonState *sev_common = SEV_COMMON(cgs); + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); ++ X86ConfidentialGuestClass *x86_klass = ++ X86_CONFIDENTIAL_GUEST_GET_CLASS(cgs); + + sev_common->state = SEV_STATE_UNINIT; + +@@ -964,7 +971,7 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + trace_kvm_sev_init(); +- if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { ++ if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { + cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; + + ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error); +@@ -1441,10 +1448,8 @@ static void + sev_common_class_init(ObjectClass *oc, void *data) + { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); +- X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->kvm_init = sev_common_kvm_init; +- x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "sev-device", + sev_common_get_sev_device, +@@ -1529,10 +1534,12 @@ static void + sev_guest_class_init(ObjectClass *oc, void *data) + { + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); ++ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->launch_start = sev_launch_start; + klass->launch_finish = sev_launch_finish; + klass->kvm_init = sev_kvm_init; ++ x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, +@@ -1770,8 +1777,10 @@ static void + sev_snp_guest_class_init(ObjectClass *oc, void *data) + { + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); ++ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->kvm_init = sev_snp_kvm_init; ++ x86_klass->kvm_type = sev_snp_kvm_type; + + object_class_property_add(oc, "policy", "uint64", + sev_snp_guest_get_policy, +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch b/SOURCES/kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch new file mode 100644 index 0000000..d194994 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch @@ -0,0 +1,84 @@ +From 82a714b79851b5c2d1389d2fa7a01548c486a854 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:20 -0500 +Subject: [PATCH 060/100] i386/sev: Add a sev_snp_enabled() helper + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [60/91] c35ead095028ccfb1e1be0fe010ca4f7688530a0 (bonzini/rhel-qemu-kvm) + +Add a simple helper to check if the current guest type is SNP. Also have +SNP-enabled imply that SEV-ES is enabled as well, and fix up any places +where the sev_es_enabled() check is expecting a pure/non-SNP guest. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-9-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 99190f805dca9475fe244fbd8041961842657dc2) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 13 ++++++++++++- + target/i386/sev.h | 2 ++ + 2 files changed, 14 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index a81b3228d4..4edfedc139 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -325,12 +325,21 @@ sev_enabled(void) + return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON); + } + ++bool ++sev_snp_enabled(void) ++{ ++ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; ++ ++ return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_SNP_GUEST); ++} ++ + bool + sev_es_enabled(void) + { + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + +- return sev_enabled() && (SEV_GUEST(cgs)->policy & SEV_POLICY_ES); ++ return sev_snp_enabled() || ++ (sev_enabled() && SEV_GUEST(cgs)->policy & SEV_POLICY_ES); + } + + uint32_t +@@ -946,7 +955,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + "support", __func__); + goto err; + } ++ } + ++ if (sev_es_enabled() && !sev_snp_enabled()) { + if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) { + error_setg(errp, "%s: guest policy requires SEV-ES, but " + "host SEV-ES support unavailable", +diff --git a/target/i386/sev.h b/target/i386/sev.h +index bedc667eeb..94295ee74f 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -45,9 +45,11 @@ typedef struct SevKernelLoaderContext { + #ifdef CONFIG_SEV + bool sev_enabled(void); + bool sev_es_enabled(void); ++bool sev_snp_enabled(void); + #else + #define sev_enabled() 0 + #define sev_es_enabled() 0 ++#define sev_snp_enabled() 0 + #endif + + uint32_t sev_get_cbit_position(void); +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch b/SOURCES/kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch new file mode 100644 index 0000000..2bab2ac --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch @@ -0,0 +1,187 @@ +From 0e435819540b0d39da2c828aacc0f35ecaadbdf6 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:28 -0500 +Subject: [PATCH 068/100] i386/sev: Add handling to encrypt/finalize guest + launch data + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [68/91] fe77931d279aa8df061823da88a320fb5f72ffea (bonzini/rhel-qemu-kvm) + +Process any queued up launch data and encrypt/measure it into the SNP +guest instance prior to initial guest launch. + +This also updates the KVM_SEV_SNP_LAUNCH_UPDATE call to handle partial +update responses. + +Signed-off-by: Brijesh Singh +Co-developed-by: Michael Roth +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-17-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9f3a6999f9730a694d7db448a99f9c9cb6515992) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 112 ++++++++++++++++++++++++++++++++++++++- + target/i386/trace-events | 2 + + 2 files changed, 113 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index e89b87d2f5..ef2e592ca7 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -756,6 +756,76 @@ out: + return ret; + } + ++static const char * ++snp_page_type_to_str(int type) ++{ ++ switch (type) { ++ case KVM_SEV_SNP_PAGE_TYPE_NORMAL: return "Normal"; ++ case KVM_SEV_SNP_PAGE_TYPE_ZERO: return "Zero"; ++ case KVM_SEV_SNP_PAGE_TYPE_UNMEASURED: return "Unmeasured"; ++ case KVM_SEV_SNP_PAGE_TYPE_SECRETS: return "Secrets"; ++ case KVM_SEV_SNP_PAGE_TYPE_CPUID: return "Cpuid"; ++ default: return "unknown"; ++ } ++} ++ ++static int ++sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, ++ SevLaunchUpdateData *data) ++{ ++ int ret, fw_error; ++ struct kvm_sev_snp_launch_update update = {0}; ++ ++ if (!data->hva || !data->len) { ++ error_report("SNP_LAUNCH_UPDATE called with invalid address" ++ "/ length: %p / %lx", ++ data->hva, data->len); ++ return 1; ++ } ++ ++ update.uaddr = (__u64)(unsigned long)data->hva; ++ update.gfn_start = data->gpa >> TARGET_PAGE_BITS; ++ update.len = data->len; ++ update.type = data->type; ++ ++ /* ++ * KVM_SEV_SNP_LAUNCH_UPDATE requires that GPA ranges have the private ++ * memory attribute set in advance. ++ */ ++ ret = kvm_set_memory_attributes_private(data->gpa, data->len); ++ if (ret) { ++ error_report("SEV-SNP: failed to configure initial" ++ "private guest memory"); ++ goto out; ++ } ++ ++ while (update.len || ret == -EAGAIN) { ++ trace_kvm_sev_snp_launch_update(update.uaddr, update.gfn_start << ++ TARGET_PAGE_BITS, update.len, ++ snp_page_type_to_str(update.type)); ++ ++ ret = sev_ioctl(SEV_COMMON(sev_snp_guest)->sev_fd, ++ KVM_SEV_SNP_LAUNCH_UPDATE, ++ &update, &fw_error); ++ if (ret && ret != -EAGAIN) { ++ error_report("SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'", ++ ret, fw_error, fw_error_to_str(fw_error)); ++ break; ++ } ++ } ++ ++out: ++ if (!ret && update.gfn_start << TARGET_PAGE_BITS != data->gpa + data->len) { ++ error_report("SEV-SNP: expected update of GPA range %lx-%lx," ++ "got GPA range %lx-%llx", ++ data->gpa, data->gpa + data->len, data->gpa, ++ update.gfn_start << TARGET_PAGE_BITS); ++ ret = -EIO; ++ } ++ ++ return ret; ++} ++ + static int + sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) + { +@@ -901,6 +971,46 @@ sev_launch_finish(SevCommonState *sev_common) + migrate_add_blocker(&sev_mig_blocker, &error_fatal); + } + ++static void ++sev_snp_launch_finish(SevCommonState *sev_common) ++{ ++ int ret, error; ++ Error *local_err = NULL; ++ SevLaunchUpdateData *data; ++ SevSnpGuestState *sev_snp = SEV_SNP_GUEST(sev_common); ++ struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf; ++ ++ QTAILQ_FOREACH(data, &launch_update, next) { ++ ret = sev_snp_launch_update(sev_snp, data); ++ if (ret) { ++ exit(1); ++ } ++ } ++ ++ trace_kvm_sev_snp_launch_finish(sev_snp->id_block, sev_snp->id_auth, ++ sev_snp->host_data); ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_FINISH, ++ finish, &error); ++ if (ret) { ++ error_report("SNP_LAUNCH_FINISH ret=%d fw_error=%d '%s'", ++ ret, error, fw_error_to_str(error)); ++ exit(1); ++ } ++ ++ sev_set_guest_state(sev_common, SEV_STATE_RUNNING); ++ ++ /* add migration blocker */ ++ error_setg(&sev_mig_blocker, ++ "SEV-SNP: Migration is not implemented"); ++ ret = migrate_add_blocker(&sev_mig_blocker, &local_err); ++ if (local_err) { ++ error_report_err(local_err); ++ error_free(sev_mig_blocker); ++ exit(1); ++ } ++} ++ ++ + static void + sev_vm_state_change(void *opaque, bool running, RunState state) + { +@@ -1832,10 +1942,10 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->launch_start = sev_snp_launch_start; ++ klass->launch_finish = sev_snp_launch_finish; + klass->kvm_init = sev_snp_kvm_init; + x86_klass->kvm_type = sev_snp_kvm_type; + +- + object_class_property_add(oc, "policy", "uint64", + sev_snp_guest_get_policy, + sev_snp_guest_set_policy, NULL, NULL); +diff --git a/target/i386/trace-events b/target/i386/trace-events +index cb26d8a925..06b44ead2e 100644 +--- a/target/i386/trace-events ++++ b/target/i386/trace-events +@@ -12,3 +12,5 @@ kvm_sev_launch_finish(void) "" + kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d" + kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s" + kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s" ++kvm_sev_snp_launch_update(uint64_t src, uint64_t gpa, uint64_t len, const char *type) "src 0x%" PRIx64 " gpa 0x%" PRIx64 " len 0x%" PRIx64 " (%s page)" ++kvm_sev_snp_launch_finish(char *id_block, char *id_auth, char *host_data) "id_block %s id_auth %s host_data %s" +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch b/SOURCES/kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch new file mode 100644 index 0000000..572dddc --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch @@ -0,0 +1,127 @@ +From 2872c423fa44dcbf50b581a5c3feac064a0473a0 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Tue, 9 Apr 2024 18:07:41 -0500 +Subject: [PATCH 024/100] i386/sev: Add 'legacy-vm-type' parameter for SEV + guest objects + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [24/91] ce35d1b09fe8aa8772ff149543f7760455c1e6b5 (bonzini/rhel-qemu-kvm) + +QEMU will currently automatically make use of the KVM_SEV_INIT2 API for +initializing SEV and SEV-ES guests verses the older +KVM_SEV_INIT/KVM_SEV_ES_INIT interfaces. + +However, the older interfaces will silently avoid sync'ing FPU/XSAVE +state to the VMSA prior to encryption, thus relying on behavior and +measurements that assume the related fields to be allow zero. + +With KVM_SEV_INIT2, this state is now synced into the VMSA, resulting in +measurements changes and, theoretically, behaviorial changes, though the +latter are unlikely to be seen in practice. + +To allow a smooth transition to the newer interface, while still +providing a mechanism to maintain backward compatibility with VMs +created using the older interfaces, provide a new command-line +parameter: + + -object sev-guest,legacy-vm-type=true,... + +and have it default to false. + +Signed-off-by: Michael Roth +Message-ID: <20240409230743.962513-2-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 023267334da375226720e62963df9545aa8fc2fd) +Signed-off-by: Paolo Bonzini +--- + qapi/qom.json | 11 ++++++++++- + target/i386/sev.c | 18 +++++++++++++++++- + 2 files changed, 27 insertions(+), 2 deletions(-) + +diff --git a/qapi/qom.json b/qapi/qom.json +index 85e6b4f84a..38dde6d785 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -898,6 +898,14 @@ + # designated guest firmware page for measured boot with -kernel + # (default: false) (since 6.2) + # ++# @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM. ++# The newer KVM_SEV_INIT2 interface syncs additional vCPU ++# state when initializing the VMSA structures, which will ++# result in a different guest measurement. Set this to ++# maintain compatibility with older QEMU or kernel versions ++# that rely on legacy KVM_SEV_INIT behavior. ++# (default: false) (since 9.1) ++# + # Since: 2.12 + ## + { 'struct': 'SevGuestProperties', +@@ -908,7 +916,8 @@ + '*handle': 'uint32', + '*cbitpos': 'uint32', + 'reduced-phys-bits': 'uint32', +- '*kernel-hashes': 'bool' } } ++ '*kernel-hashes': 'bool', ++ '*legacy-vm-type': 'bool' } } + + ## + # @ThreadContextProperties: +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 9dab4060b8..f4ee317cb0 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -67,6 +67,7 @@ struct SevGuestState { + uint32_t cbitpos; + uint32_t reduced_phys_bits; + bool kernel_hashes; ++ bool legacy_vm_type; + + /* runtime state */ + uint32_t handle; +@@ -356,6 +357,16 @@ static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) + sev->kernel_hashes = value; + } + ++static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) ++{ ++ return SEV_GUEST(obj)->legacy_vm_type; ++} ++ ++static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) ++{ ++ SEV_GUEST(obj)->legacy_vm_type = value; ++} ++ + bool + sev_enabled(void) + { +@@ -863,7 +874,7 @@ static int sev_kvm_type(X86ConfidentialGuest *cg) + } + + kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; +- if (kvm_is_vm_type_supported(kvm_type)) { ++ if (kvm_is_vm_type_supported(kvm_type) && !sev->legacy_vm_type) { + sev->kvm_type = kvm_type; + } else { + sev->kvm_type = KVM_X86_DEFAULT_VM; +@@ -1381,6 +1392,11 @@ sev_guest_class_init(ObjectClass *oc, void *data) + sev_guest_set_kernel_hashes); + object_class_property_set_description(oc, "kernel-hashes", + "add kernel hashes to guest firmware for measured Linux boot"); ++ object_class_property_add_bool(oc, "legacy-vm-type", ++ sev_guest_get_legacy_vm_type, ++ sev_guest_set_legacy_vm_type); ++ object_class_property_set_description(oc, "legacy-vm-type", ++ "use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions."); + } + + static void +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch b/SOURCES/kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch new file mode 100644 index 0000000..ca1338c --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch @@ -0,0 +1,203 @@ +From a236548a903aa8350fff9601d481b2f529c8d4a7 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:21 -0500 +Subject: [PATCH 061/100] i386/sev: Add sev_kvm_init() override for SEV class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [61/91] b24fcbc8712e7394e029312229da023c63803969 (bonzini/rhel-qemu-kvm) + +Some aspects of the init routine SEV are specific to SEV and not +applicable for SNP guests, so move the SEV-specific bits into +separate class method and retain only the common functionality. + +Co-developed-by: Michael Roth +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-10-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 990da8d243a8c59dafcbed78b56a0e4ffb1605d9) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 72 +++++++++++++++++++++++++++++++++-------------- + 1 file changed, 51 insertions(+), 21 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 4edfedc139..5519de1c6b 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -73,6 +73,7 @@ struct SevCommonStateClass { + /* public */ + int (*launch_start)(SevCommonState *sev_common); + void (*launch_finish)(SevCommonState *sev_common); ++ int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); + }; + + /** +@@ -882,7 +883,7 @@ out: + return sev_common->kvm_type; + } + +-static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + SevCommonState *sev_common = SEV_COMMON(cgs); + char *devname; +@@ -892,12 +893,6 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + struct sev_user_data_status status = {}; + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); + +- ret = ram_block_discard_disable(true); +- if (ret) { +- error_report("%s: cannot disable RAM discard", __func__); +- return -1; +- } +- + sev_common->state = SEV_STATE_UNINIT; + + host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); +@@ -911,7 +906,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + if (host_cbitpos != sev_common->cbitpos) { + error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", + __func__, host_cbitpos, sev_common->cbitpos); +- goto err; ++ return -1; + } + + /* +@@ -924,7 +919,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + error_setg(errp, "%s: reduced_phys_bits check failed," + " it should be in the range of 1 to 63, requested '%d'", + __func__, sev_common->reduced_phys_bits); +- goto err; ++ return -1; + } + + devname = object_property_get_str(OBJECT(sev_common), "sev-device", NULL); +@@ -933,7 +928,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + error_setg(errp, "%s: Failed to open %s '%s'", __func__, + devname, strerror(errno)); + g_free(devname); +- goto err; ++ return -1; + } + g_free(devname); + +@@ -943,7 +938,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + error_setg(errp, "%s: failed to get platform status ret=%d " + "fw_error='%d: %s'", __func__, ret, fw_error, + fw_error_to_str(fw_error)); +- goto err; ++ return -1; + } + sev_common->build_id = status.build; + sev_common->api_major = status.api_major; +@@ -953,7 +948,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + if (!kvm_kernel_irqchip_allowed()) { + error_setg(errp, "%s: SEV-ES guests require in-kernel irqchip" + "support", __func__); +- goto err; ++ return -1; + } + } + +@@ -962,7 +957,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + error_setg(errp, "%s: guest policy requires SEV-ES, but " + "host SEV-ES support unavailable", + __func__); +- goto err; ++ return -1; + } + } + +@@ -980,25 +975,59 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + if (ret) { + error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); +- goto err; ++ return -1; + } + + ret = klass->launch_start(sev_common); + if (ret) { + error_setg(errp, "%s: failed to create encryption context", __func__); +- goto err; ++ return -1; ++ } ++ ++ if (klass->kvm_init && klass->kvm_init(cgs, errp)) { ++ return -1; + } + +- ram_block_notifier_add(&sev_ram_notifier); +- qemu_add_machine_init_done_notifier(&sev_machine_done_notify); + qemu_add_vm_change_state_handler(sev_vm_state_change, sev_common); + + cgs->ready = true; + + return 0; +-err: +- ram_block_discard_disable(false); +- return -1; ++} ++ ++static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++{ ++ int ret; ++ ++ /* ++ * SEV/SEV-ES rely on pinned memory to back guest RAM so discarding ++ * isn't actually possible. With SNP, only guest_memfd pages are used ++ * for private guest memory, so discarding of shared memory is still ++ * possible.. ++ */ ++ ret = ram_block_discard_disable(true); ++ if (ret) { ++ error_setg(errp, "%s: cannot disable RAM discard", __func__); ++ return -1; ++ } ++ ++ /* ++ * SEV uses these notifiers to register/pin pages prior to guest use, ++ * but SNP relies on guest_memfd for private pages, which has its ++ * own internal mechanisms for registering/pinning private memory. ++ */ ++ ram_block_notifier_add(&sev_ram_notifier); ++ ++ /* ++ * The machine done notify event is used for SEV guests to get the ++ * measurement of the encrypted images. When SEV-SNP is enabled, the ++ * measurement is part of the guest attestation process where it can ++ * be collected without any reliance on the VMM. So skip registering ++ * the notifier for SNP in favor of using guest attestation instead. ++ */ ++ qemu_add_machine_init_done_notifier(&sev_machine_done_notify); ++ ++ return 0; + } + + int +@@ -1397,7 +1426,7 @@ sev_common_class_init(ObjectClass *oc, void *data) + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + +- klass->kvm_init = sev_kvm_init; ++ klass->kvm_init = sev_common_kvm_init; + x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "sev-device", +@@ -1486,6 +1515,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) + + klass->launch_start = sev_launch_start; + klass->launch_finish = sev_launch_finish; ++ klass->kvm_init = sev_kvm_init; + + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch b/SOURCES/kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch new file mode 100644 index 0000000..0db345c --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch @@ -0,0 +1,94 @@ +From 35ceebdeccbf5dceb374c6f89a12e9981def570b Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:22 -0500 +Subject: [PATCH 062/100] i386/sev: Add snp_kvm_init() override for SNP class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [62/91] 8fa537961c9262b99a4ffb99e1c25f080d76d1de (bonzini/rhel-qemu-kvm) + +SNP does not support SMM and requires guest_memfd for +private guest memory, so add SNP specific kvm_init() +functionality in snp_kvm_init() class method. + +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-11-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 125b95a6d465a03ff30816eff0b1889aec01f0c3) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 24 +++++++++++++++++++++++- + 1 file changed, 23 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 5519de1c6b..6525b3c1a0 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -885,12 +885,12 @@ out: + + static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { +- SevCommonState *sev_common = SEV_COMMON(cgs); + char *devname; + int ret, fw_error, cmd; + uint32_t ebx; + uint32_t host_cbitpos; + struct sev_user_data_status status = {}; ++ SevCommonState *sev_common = SEV_COMMON(cgs); + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); + + sev_common->state = SEV_STATE_UNINIT; +@@ -1030,6 +1030,21 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + return 0; + } + ++static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ X86MachineState *x86ms = X86_MACHINE(ms); ++ ++ if (x86ms->smm == ON_OFF_AUTO_AUTO) { ++ x86ms->smm = ON_OFF_AUTO_OFF; ++ } else if (x86ms->smm == ON_OFF_AUTO_ON) { ++ error_setg(errp, "SEV-SNP does not support SMM."); ++ return -1; ++ } ++ ++ return 0; ++} ++ + int + sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) + { +@@ -1752,6 +1767,10 @@ sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp) + static void + sev_snp_guest_class_init(ObjectClass *oc, void *data) + { ++ SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); ++ ++ klass->kvm_init = sev_snp_kvm_init; ++ + object_class_property_add(oc, "policy", "uint64", + sev_snp_guest_get_policy, + sev_snp_guest_set_policy, NULL, NULL); +@@ -1778,8 +1797,11 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + static void + sev_snp_guest_instance_init(Object *obj) + { ++ ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj); + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + ++ cgs->require_guest_memfd = true; ++ + /* default init/start/finish params for kvm */ + sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch b/SOURCES/kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch new file mode 100644 index 0000000..c10f75f --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch @@ -0,0 +1,262 @@ +From 4013364679757161d6b9754bfc33ae38be0a1b7f Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:32 -0500 +Subject: [PATCH 072/100] i386/sev: Add support for SNP CPUID validation + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [72/91] 080e2942552dc7de8966e69d0d0d3b8951392030 (bonzini/rhel-qemu-kvm) + +SEV-SNP firmware allows a special guest page to be populated with a +table of guest CPUID values so that they can be validated through +firmware before being loaded into encrypted guest memory where they can +be used in place of hypervisor-provided values[1]. + +As part of SEV-SNP guest initialization, use this interface to validate +the CPUID entries reported by KVM_GET_CPUID2 prior to initial guest +start and populate the CPUID page reserved by OVMF with the resulting +encrypted data. + +[1] SEV SNP Firmware ABI Specification, Rev. 0.8, 8.13.2.6 + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-21-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 70943ad8e4dfbe5f77006b880290219be9d03553) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 164 +++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 162 insertions(+), 2 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index c57534fca2..06401f0526 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -200,6 +200,36 @@ static const char *const sev_fw_errlist[] = { + + #define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist) + ++/* doesn't expose this, so re-use the max from kvm.c */ ++#define KVM_MAX_CPUID_ENTRIES 100 ++ ++typedef struct KvmCpuidInfo { ++ struct kvm_cpuid2 cpuid; ++ struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; ++} KvmCpuidInfo; ++ ++#define SNP_CPUID_FUNCTION_MAXCOUNT 64 ++#define SNP_CPUID_FUNCTION_UNKNOWN 0xFFFFFFFF ++ ++typedef struct { ++ uint32_t eax_in; ++ uint32_t ecx_in; ++ uint64_t xcr0_in; ++ uint64_t xss_in; ++ uint32_t eax; ++ uint32_t ebx; ++ uint32_t ecx; ++ uint32_t edx; ++ uint64_t reserved; ++} __attribute__((packed)) SnpCpuidFunc; ++ ++typedef struct { ++ uint32_t count; ++ uint32_t reserved1; ++ uint64_t reserved2; ++ SnpCpuidFunc entries[SNP_CPUID_FUNCTION_MAXCOUNT]; ++} __attribute__((packed)) SnpCpuidInfo; ++ + static int + sev_ioctl(int fd, int cmd, void *data, int *error) + { +@@ -788,6 +818,35 @@ out: + return ret; + } + ++static void ++sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old, ++ SnpCpuidInfo *new) ++{ ++ size_t i; ++ ++ if (old->count != new->count) { ++ error_report("SEV-SNP: CPUID validation failed due to count mismatch," ++ "provided: %d, expected: %d", old->count, new->count); ++ return; ++ } ++ ++ for (i = 0; i < old->count; i++) { ++ SnpCpuidFunc *old_func, *new_func; ++ ++ old_func = &old->entries[i]; ++ new_func = &new->entries[i]; ++ ++ if (memcmp(old_func, new_func, sizeof(SnpCpuidFunc))) { ++ error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x" ++ "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x" ++ "expected: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x", ++ old_func->eax_in, old_func->ecx_in, ++ old_func->eax, old_func->ebx, old_func->ecx, old_func->edx, ++ new_func->eax, new_func->ebx, new_func->ecx, new_func->edx); ++ } ++ } ++} ++ + static const char * + snp_page_type_to_str(int type) + { +@@ -806,6 +865,7 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, + SevLaunchUpdateData *data) + { + int ret, fw_error; ++ SnpCpuidInfo snp_cpuid_info; + struct kvm_sev_snp_launch_update update = {0}; + + if (!data->hva || !data->len) { +@@ -815,6 +875,11 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, + return 1; + } + ++ if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { ++ /* Save a copy for comparison in case the LAUNCH_UPDATE fails */ ++ memcpy(&snp_cpuid_info, data->hva, sizeof(snp_cpuid_info)); ++ } ++ + update.uaddr = (__u64)(unsigned long)data->hva; + update.gfn_start = data->gpa >> TARGET_PAGE_BITS; + update.len = data->len; +@@ -842,6 +907,11 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, + if (ret && ret != -EAGAIN) { + error_report("SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'", + ret, fw_error, fw_error_to_str(fw_error)); ++ ++ if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { ++ sev_snp_cpuid_report_mismatches(&snp_cpuid_info, data->hva); ++ error_report("SEV-SNP: failed update CPUID page"); ++ } + break; + } + } +@@ -1004,7 +1074,8 @@ sev_launch_finish(SevCommonState *sev_common) + } + + static int +-snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type) ++snp_launch_update_data(uint64_t gpa, void *hva, ++ uint32_t len, int type) + { + SevLaunchUpdateData *data; + +@@ -1019,6 +1090,90 @@ snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type) + return 0; + } + ++static int ++sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, ++ const KvmCpuidInfo *kvm_cpuid_info) ++{ ++ size_t i; ++ ++ if (kvm_cpuid_info->cpuid.nent > SNP_CPUID_FUNCTION_MAXCOUNT) { ++ error_report("SEV-SNP: CPUID entry count (%d) exceeds max (%d)", ++ kvm_cpuid_info->cpuid.nent, SNP_CPUID_FUNCTION_MAXCOUNT); ++ return -1; ++ } ++ ++ memset(snp_cpuid_info, 0, sizeof(*snp_cpuid_info)); ++ ++ for (i = 0; i < kvm_cpuid_info->cpuid.nent; i++) { ++ const struct kvm_cpuid_entry2 *kvm_cpuid_entry; ++ SnpCpuidFunc *snp_cpuid_entry; ++ ++ kvm_cpuid_entry = &kvm_cpuid_info->entries[i]; ++ snp_cpuid_entry = &snp_cpuid_info->entries[i]; ++ ++ snp_cpuid_entry->eax_in = kvm_cpuid_entry->function; ++ if (kvm_cpuid_entry->flags == KVM_CPUID_FLAG_SIGNIFCANT_INDEX) { ++ snp_cpuid_entry->ecx_in = kvm_cpuid_entry->index; ++ } ++ snp_cpuid_entry->eax = kvm_cpuid_entry->eax; ++ snp_cpuid_entry->ebx = kvm_cpuid_entry->ebx; ++ snp_cpuid_entry->ecx = kvm_cpuid_entry->ecx; ++ snp_cpuid_entry->edx = kvm_cpuid_entry->edx; ++ ++ /* ++ * Guest kernels will calculate EBX themselves using the 0xD ++ * subfunctions corresponding to the individual XSAVE areas, so only ++ * encode the base XSAVE size in the initial leaves, corresponding ++ * to the initial XCR0=1 state. ++ */ ++ if (snp_cpuid_entry->eax_in == 0xD && ++ (snp_cpuid_entry->ecx_in == 0x0 || snp_cpuid_entry->ecx_in == 0x1)) { ++ snp_cpuid_entry->ebx = 0x240; ++ snp_cpuid_entry->xcr0_in = 1; ++ snp_cpuid_entry->xss_in = 0; ++ } ++ } ++ ++ snp_cpuid_info->count = i; ++ ++ return 0; ++} ++ ++static int ++snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, uint32_t cpuid_len) ++{ ++ KvmCpuidInfo kvm_cpuid_info = {0}; ++ SnpCpuidInfo snp_cpuid_info; ++ CPUState *cs = first_cpu; ++ int ret; ++ uint32_t i = 0; ++ ++ assert(sizeof(snp_cpuid_info) <= cpuid_len); ++ ++ /* get the cpuid list from KVM */ ++ do { ++ kvm_cpuid_info.cpuid.nent = ++i; ++ ret = kvm_vcpu_ioctl(cs, KVM_GET_CPUID2, &kvm_cpuid_info); ++ } while (ret == -E2BIG); ++ ++ if (ret) { ++ error_report("SEV-SNP: unable to query CPUID values for CPU: '%s'", ++ strerror(-ret)); ++ return 1; ++ } ++ ++ ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info); ++ if (ret) { ++ error_report("SEV-SNP: failed to generate CPUID table information"); ++ return 1; ++ } ++ ++ memcpy(hva, &snp_cpuid_info, sizeof(snp_cpuid_info)); ++ ++ return snp_launch_update_data(cpuid_addr, hva, cpuid_len, ++ KVM_SEV_SNP_PAGE_TYPE_CPUID); ++} ++ + static int + snp_metadata_desc_to_page_type(int desc_type) + { +@@ -1053,7 +1208,12 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp, + exit(1); + } + +- ret = snp_launch_update_data(desc->base, hva, desc->len, type); ++ if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { ++ ret = snp_launch_update_cpuid(desc->base, hva, desc->len); ++ } else { ++ ret = snp_launch_update_data(desc->base, hva, desc->len, type); ++ } ++ + if (ret) { + error_report("%s: Failed to add metadata page gpa 0x%x+%x type %d", + __func__, desc->base, desc->len, desc->type); +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch b/SOURCES/kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch new file mode 100644 index 0000000..4691679 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch @@ -0,0 +1,127 @@ +From b2cfd4d89026e76ba86ea7adea323f2c3a588790 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:31 -0500 +Subject: [PATCH 071/100] i386/sev: Add support for populating OVMF metadata + pages + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [71/91] b563442c0e2f6ea01937425d300b56d9e641fd57 (bonzini/rhel-qemu-kvm) + +OVMF reserves various pages so they can be pre-initialized/validated +prior to launching the guest. Add support for populating these pages +with the expected content. + +Signed-off-by: Brijesh Singh +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-20-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3d8c2a7f4806ff39423312e503737fd76c34dcae) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 74 insertions(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 17281bb2c7..c57534fca2 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1003,15 +1003,89 @@ sev_launch_finish(SevCommonState *sev_common) + migrate_add_blocker(&sev_mig_blocker, &error_fatal); + } + ++static int ++snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type) ++{ ++ SevLaunchUpdateData *data; ++ ++ data = g_new0(SevLaunchUpdateData, 1); ++ data->gpa = gpa; ++ data->hva = hva; ++ data->len = len; ++ data->type = type; ++ ++ QTAILQ_INSERT_TAIL(&launch_update, data, next); ++ ++ return 0; ++} ++ ++static int ++snp_metadata_desc_to_page_type(int desc_type) ++{ ++ switch (desc_type) { ++ /* Add the umeasured prevalidated pages as a zero page */ ++ case SEV_DESC_TYPE_SNP_SEC_MEM: return KVM_SEV_SNP_PAGE_TYPE_ZERO; ++ case SEV_DESC_TYPE_SNP_SECRETS: return KVM_SEV_SNP_PAGE_TYPE_SECRETS; ++ case SEV_DESC_TYPE_CPUID: return KVM_SEV_SNP_PAGE_TYPE_CPUID; ++ default: ++ return KVM_SEV_SNP_PAGE_TYPE_ZERO; ++ } ++} ++ ++static void ++snp_populate_metadata_pages(SevSnpGuestState *sev_snp, ++ OvmfSevMetadata *metadata) ++{ ++ OvmfSevMetadataDesc *desc; ++ int type, ret, i; ++ void *hva; ++ MemoryRegion *mr = NULL; ++ ++ for (i = 0; i < metadata->num_desc; i++) { ++ desc = &metadata->descs[i]; ++ ++ type = snp_metadata_desc_to_page_type(desc->type); ++ ++ hva = gpa2hva(&mr, desc->base, desc->len, NULL); ++ if (!hva) { ++ error_report("%s: Failed to get HVA for GPA 0x%x sz 0x%x", ++ __func__, desc->base, desc->len); ++ exit(1); ++ } ++ ++ ret = snp_launch_update_data(desc->base, hva, desc->len, type); ++ if (ret) { ++ error_report("%s: Failed to add metadata page gpa 0x%x+%x type %d", ++ __func__, desc->base, desc->len, desc->type); ++ exit(1); ++ } ++ } ++} ++ + static void + sev_snp_launch_finish(SevCommonState *sev_common) + { + int ret, error; + Error *local_err = NULL; ++ OvmfSevMetadata *metadata; + SevLaunchUpdateData *data; + SevSnpGuestState *sev_snp = SEV_SNP_GUEST(sev_common); + struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf; + ++ /* ++ * To boot the SNP guest, the hypervisor is required to populate the CPUID ++ * and Secrets page before finalizing the launch flow. The location of ++ * the secrets and CPUID page is available through the OVMF metadata GUID. ++ */ ++ metadata = pc_system_get_ovmf_sev_metadata_ptr(); ++ if (metadata == NULL) { ++ error_report("%s: Failed to locate SEV metadata header", __func__); ++ exit(1); ++ } ++ ++ /* Populate all the metadata pages */ ++ snp_populate_metadata_pages(sev_snp, metadata); ++ + QTAILQ_FOREACH(data, &launch_update, next) { + ret = sev_snp_launch_update(sev_snp, data); + if (ret) { +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-the-SNP-launch-start-context.patch b/SOURCES/kvm-i386-sev-Add-the-SNP-launch-start-context.patch new file mode 100644 index 0000000..5da793f --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-the-SNP-launch-start-context.patch @@ -0,0 +1,122 @@ +From 0f7432f2b968298b64fd243df793b176f67a538f Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:27 -0500 +Subject: [PATCH 067/100] i386/sev: Add the SNP launch start context + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [67/91] 63759a25a413a7a9a7274fb4c3b8bc2528634855 (bonzini/rhel-qemu-kvm) + +The SNP_LAUNCH_START is called first to create a cryptographic launch +context within the firmware. + +Signed-off-by: Brijesh Singh +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-16-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit d3107f882ec22cfb211eab7efa0c4e95f5ce11bb) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 39 +++++++++++++++++++++++++++++++++++++++ + target/i386/trace-events | 1 + + 2 files changed, 40 insertions(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 43d1c48bd9..e89b87d2f5 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -39,6 +39,7 @@ + #include "confidential-guest.h" + #include "hw/i386/pc.h" + #include "exec/address-spaces.h" ++#include "qemu/queue.h" + + OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) + OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) +@@ -115,6 +116,16 @@ struct SevSnpGuestState { + #define DEFAULT_SEV_DEVICE "/dev/sev" + #define DEFAULT_SEV_SNP_POLICY 0x30000 + ++typedef struct SevLaunchUpdateData { ++ QTAILQ_ENTRY(SevLaunchUpdateData) next; ++ hwaddr gpa; ++ void *hva; ++ uint64_t len; ++ int type; ++} SevLaunchUpdateData; ++ ++static QTAILQ_HEAD(, SevLaunchUpdateData) launch_update; ++ + #define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" + typedef struct __attribute__((__packed__)) SevInfoBlock { + /* SEV-ES Reset Vector Address */ +@@ -674,6 +685,31 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) + return 0; + } + ++static int ++sev_snp_launch_start(SevCommonState *sev_common) ++{ ++ int fw_error, rc; ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common); ++ struct kvm_sev_snp_launch_start *start = &sev_snp_guest->kvm_start_conf; ++ ++ trace_kvm_sev_snp_launch_start(start->policy, ++ sev_snp_guest->guest_visible_workarounds); ++ ++ rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_START, ++ start, &fw_error); ++ if (rc < 0) { ++ error_report("%s: SNP_LAUNCH_START ret=%d fw_error=%d '%s'", ++ __func__, rc, fw_error, fw_error_to_str(fw_error)); ++ return 1; ++ } ++ ++ QTAILQ_INIT(&launch_update); ++ ++ sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_UPDATE); ++ ++ return 0; ++} ++ + static int + sev_launch_start(SevCommonState *sev_common) + { +@@ -1003,6 +1039,7 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + ret = klass->launch_start(sev_common); ++ + if (ret) { + error_setg(errp, "%s: failed to create encryption context", __func__); + return -1; +@@ -1794,9 +1831,11 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + ++ klass->launch_start = sev_snp_launch_start; + klass->kvm_init = sev_snp_kvm_init; + x86_klass->kvm_type = sev_snp_kvm_type; + ++ + object_class_property_add(oc, "policy", "uint64", + sev_snp_guest_get_policy, + sev_snp_guest_set_policy, NULL, NULL); +diff --git a/target/i386/trace-events b/target/i386/trace-events +index 2cd8726eeb..cb26d8a925 100644 +--- a/target/i386/trace-events ++++ b/target/i386/trace-events +@@ -11,3 +11,4 @@ kvm_sev_launch_measurement(const char *value) "data %s" + kvm_sev_launch_finish(void) "" + kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d" + kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s" ++kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s" +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch b/SOURCES/kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch new file mode 100644 index 0000000..f809242 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch @@ -0,0 +1,237 @@ +From ec786a1ec0a76775e980862d77500f5196a937e3 Mon Sep 17 00:00:00 2001 +From: Dov Murik +Date: Thu, 30 May 2024 06:16:35 -0500 +Subject: [PATCH 080/100] i386/sev: Allow measured direct kernel boot on SNP + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [80/91] 11c629862519c1a279566febf5a537c63c5fcf61 (bonzini/rhel-qemu-kvm) + +In SNP, the hashes page designated with a specific metadata entry +published in AmdSev OVMF. + +Therefore, if the user enabled kernel hashes (for measured direct boot), +QEMU should prepare the content of hashes table, and during the +processing of the metadata entry it copy the content into the designated +page and encrypt it. + +Note that in SNP (unlike SEV and SEV-ES) the measurements is done in +whole 4KB pages. Therefore QEMU zeros the whole page that includes the +hashes table, and fills in the kernel hashes area in that page, and then +encrypts the whole page. The rest of the page is reserved for SEV +launch secrets which are not usable anyway on SNP. + +If the user disabled kernel hashes, QEMU pre-validates the kernel hashes +page as a zero page. + +Signed-off-by: Dov Murik +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-24-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c1996992cc882b00139f78067d6a64e2ec9cb0d8) +Signed-off-by: Paolo Bonzini +--- + include/hw/i386/pc.h | 2 + + target/i386/sev.c | 111 ++++++++++++++++++++++++++++++++----------- + 2 files changed, 85 insertions(+), 28 deletions(-) + +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 94b49310f5..ee3bfb7be9 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -175,6 +175,8 @@ typedef enum { + SEV_DESC_TYPE_SNP_SECRETS, + /* The section contains address that can be used as a CPUID page */ + SEV_DESC_TYPE_CPUID, ++ /* The section contains the region for kernel hashes for measured direct boot */ ++ SEV_DESC_TYPE_SNP_KERNEL_HASHES = 0x10, + + } ovmf_sev_metadata_desc_type; + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 3fce4c08eb..004c667ac1 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -115,6 +115,10 @@ struct SevCommonStateClass { + X86ConfidentialGuestClass parent_class; + + /* public */ ++ bool (*build_kernel_loader_hashes)(SevCommonState *sev_common, ++ SevHashTableDescriptor *area, ++ SevKernelLoaderContext *ctx, ++ Error **errp); + int (*launch_start)(SevCommonState *sev_common); + void (*launch_finish)(SevCommonState *sev_common); + int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, uint64_t len); +@@ -154,6 +158,9 @@ struct SevSnpGuestState { + + struct kvm_sev_snp_launch_start kvm_start_conf; + struct kvm_sev_snp_launch_finish kvm_finish_conf; ++ ++ uint32_t kernel_hashes_offset; ++ PaddedSevHashTable *kernel_hashes_data; + }; + + #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ +@@ -1189,6 +1196,23 @@ snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, uint32_t cpuid_len) + KVM_SEV_SNP_PAGE_TYPE_CPUID); + } + ++static int ++snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr, ++ void *hva, uint32_t len) ++{ ++ int type = KVM_SEV_SNP_PAGE_TYPE_ZERO; ++ if (sev_snp->parent_obj.kernel_hashes) { ++ assert(sev_snp->kernel_hashes_data); ++ assert((sev_snp->kernel_hashes_offset + ++ sizeof(*sev_snp->kernel_hashes_data)) <= len); ++ memset(hva, 0, len); ++ memcpy(hva + sev_snp->kernel_hashes_offset, sev_snp->kernel_hashes_data, ++ sizeof(*sev_snp->kernel_hashes_data)); ++ type = KVM_SEV_SNP_PAGE_TYPE_NORMAL; ++ } ++ return snp_launch_update_data(addr, hva, len, type); ++} ++ + static int + snp_metadata_desc_to_page_type(int desc_type) + { +@@ -1225,6 +1249,9 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp, + + if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { + ret = snp_launch_update_cpuid(desc->base, hva, desc->len); ++ } else if (desc->type == SEV_DESC_TYPE_SNP_KERNEL_HASHES) { ++ ret = snp_launch_update_kernel_hashes(sev_snp, desc->base, hva, ++ desc->len); + } else { + ret = snp_launch_update_data(desc->base, hva, desc->len, type); + } +@@ -1823,6 +1850,58 @@ static bool build_kernel_loader_hashes(PaddedSevHashTable *padded_ht, + return true; + } + ++static bool sev_snp_build_kernel_loader_hashes(SevCommonState *sev_common, ++ SevHashTableDescriptor *area, ++ SevKernelLoaderContext *ctx, ++ Error **errp) ++{ ++ /* ++ * SNP: Populate the hashes table in an area that later in ++ * snp_launch_update_kernel_hashes() will be copied to the guest memory ++ * and encrypted. ++ */ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common); ++ sev_snp_guest->kernel_hashes_offset = area->base & ~TARGET_PAGE_MASK; ++ sev_snp_guest->kernel_hashes_data = g_new0(PaddedSevHashTable, 1); ++ return build_kernel_loader_hashes(sev_snp_guest->kernel_hashes_data, ctx, errp); ++} ++ ++static bool sev_build_kernel_loader_hashes(SevCommonState *sev_common, ++ SevHashTableDescriptor *area, ++ SevKernelLoaderContext *ctx, ++ Error **errp) ++{ ++ PaddedSevHashTable *padded_ht; ++ hwaddr mapped_len = sizeof(*padded_ht); ++ MemTxAttrs attrs = { 0 }; ++ bool ret = true; ++ ++ /* ++ * Populate the hashes table in the guest's memory at the OVMF-designated ++ * area for the SEV hashes table ++ */ ++ padded_ht = address_space_map(&address_space_memory, area->base, ++ &mapped_len, true, attrs); ++ if (!padded_ht || mapped_len != sizeof(*padded_ht)) { ++ error_setg(errp, "SEV: cannot map hashes table guest memory area"); ++ return false; ++ } ++ ++ if (build_kernel_loader_hashes(padded_ht, ctx, errp)) { ++ if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, ++ sizeof(*padded_ht), errp) < 0) { ++ ret = false; ++ } ++ } else { ++ ret = false; ++ } ++ ++ address_space_unmap(&address_space_memory, padded_ht, ++ mapped_len, true, mapped_len); ++ ++ return ret; ++} ++ + /* + * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page + * which is included in SEV's initial memory measurement. +@@ -1831,11 +1910,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + { + uint8_t *data; + SevHashTableDescriptor *area; +- PaddedSevHashTable *padded_ht; +- hwaddr mapped_len = sizeof(*padded_ht); +- MemTxAttrs attrs = { 0 }; +- bool ret = true; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); + + /* + * Only add the kernel hashes if the sev-guest configuration explicitly +@@ -1858,30 +1934,7 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + return false; + } + +- /* +- * Populate the hashes table in the guest's memory at the OVMF-designated +- * area for the SEV hashes table +- */ +- padded_ht = address_space_map(&address_space_memory, area->base, +- &mapped_len, true, attrs); +- if (!padded_ht || mapped_len != sizeof(*padded_ht)) { +- error_setg(errp, "SEV: cannot map hashes table guest memory area"); +- return false; +- } +- +- if (build_kernel_loader_hashes(padded_ht, ctx, errp)) { +- if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, +- sizeof(*padded_ht), errp) < 0) { +- ret = false; +- } +- } else { +- ret = false; +- } +- +- address_space_unmap(&address_space_memory, padded_ht, +- mapped_len, true, mapped_len); +- +- return ret; ++ return klass->build_kernel_loader_hashes(sev_common, area, ctx, errp); + } + + static char * +@@ -1998,6 +2051,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + ++ klass->build_kernel_loader_hashes = sev_build_kernel_loader_hashes; + klass->launch_start = sev_launch_start; + klass->launch_finish = sev_launch_finish; + klass->launch_update_data = sev_launch_update_data; +@@ -2242,6 +2296,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + ++ klass->build_kernel_loader_hashes = sev_snp_build_kernel_loader_hashes; + klass->launch_start = sev_snp_launch_start; + klass->launch_finish = sev_snp_launch_finish; + klass->launch_update_data = sev_snp_launch_update_data; +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch b/SOURCES/kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch new file mode 100644 index 0000000..aacb0da --- /dev/null +++ b/SOURCES/kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch @@ -0,0 +1,268 @@ +From ab6197309551bd6ddd9f8239191f68dfac23684b Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Tue, 9 Jul 2024 23:10:05 -0500 +Subject: [PATCH 090/100] i386/sev: Don't allow automatic fallback to legacy + KVM_SEV*_INIT +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [90/91] 2b1345faa56f993bb6e13d63e11656c784e20412 (bonzini/rhel-qemu-kvm) + +Currently if the 'legacy-vm-type' property of the sev-guest object is +'on', QEMU will attempt to use the newer KVM_SEV_INIT2 kernel +interface in conjunction with the newer KVM_X86_SEV_VM and +KVM_X86_SEV_ES_VM KVM VM types. + +This can lead to measurement changes if, for instance, an SEV guest was +created on a host that originally had an older kernel that didn't +support KVM_SEV_INIT2, but is booted on the same host later on after the +host kernel was upgraded. + +Instead, if legacy-vm-type is 'off', QEMU should fail if the +KVM_SEV_INIT2 interface is not provided by the current host kernel. +Modify the fallback handling accordingly. + +In the future, VMSA features and other flags might be added to QEMU +which will require legacy-vm-type to be 'off' because they will rely +on the newer KVM_SEV_INIT2 interface. It may be difficult to convey to +users what values of legacy-vm-type are compatible with which +features/options, so as part of this rework, switch legacy-vm-type to a +tri-state OnOffAuto option. 'auto' in this case will automatically +switch to using the newer KVM_SEV_INIT2, but only if it is required to +make use of new VMSA features or other options only available via +KVM_SEV_INIT2. + +Defining 'auto' in this way would avoid inadvertantly breaking +compatibility with older kernels since it would only be used in cases +where users opt into newer features that are only available via +KVM_SEV_INIT2 and newer kernels, and provide better default behavior +than the legacy-vm-type=off behavior that was previously in place, so +make it the default for 9.1+ machine types. + +Cc: Daniel P. Berrangé +Cc: Paolo Bonzini +cc: kvm@vger.kernel.org +Signed-off-by: Michael Roth +Reviewed-by: Daniel P. Berrangé +Link: https://lore.kernel.org/r/20240710041005.83720-1-michael.roth@amd.com +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9d38d9dca2a81aaf5752d45d221021ef96d496cd) + +RHEL: adjust compatiility setting, applying it to 9.4 machine type +--- + hw/i386/pc.c | 2 +- + qapi/qom.json | 18 ++++++---- + target/i386/sev.c | 85 +++++++++++++++++++++++++++++++++++++++-------- + 3 files changed, 83 insertions(+), 22 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index b25d075b59..e9c5ea5d8f 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -352,7 +352,7 @@ const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + GlobalProperty pc_rhel_9_5_compat[] = { + /* pc_rhel_9_5_compat from pc_compat_pc_9_0 (backported from 9.1) */ + { TYPE_X86_CPU, "guest-phys-bits", "0" }, +- { "sev-guest", "legacy-vm-type", "true" }, ++ { "sev-guest", "legacy-vm-type", "on" }, + }; + const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat); + +diff --git a/qapi/qom.json b/qapi/qom.json +index 8bd299265e..17bd5a0cf7 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -912,12 +912,16 @@ + # @handle: SEV firmware handle (default: 0) + # + # @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM. +-# The newer KVM_SEV_INIT2 interface syncs additional vCPU +-# state when initializing the VMSA structures, which will +-# result in a different guest measurement. Set this to +-# maintain compatibility with older QEMU or kernel versions +-# that rely on legacy KVM_SEV_INIT behavior. +-# (default: false) (since 9.1) ++# The newer KVM_SEV_INIT2 interface, from Linux >= 6.10, syncs ++# additional vCPU state when initializing the VMSA structures, ++# which will result in a different guest measurement. Set ++# this to 'on' to force compatibility with older QEMU or kernel ++# versions that rely on legacy KVM_SEV_INIT behavior. 'auto' ++# will behave identically to 'on', but will automatically ++# switch to using KVM_SEV_INIT2 if the user specifies any ++# additional options that require it. If set to 'off', QEMU ++# will require KVM_SEV_INIT2 unconditionally. ++# (default: off) (since 9.1) + # + # Since: 2.12 + ## +@@ -927,7 +931,7 @@ + '*session-file': 'str', + '*policy': 'uint32', + '*handle': 'uint32', +- '*legacy-vm-type': 'bool' } } ++ '*legacy-vm-type': 'OnOffAuto' } } + + ## + # @SevSnpGuestProperties: +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 491fab74fd..b921defb63 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -144,7 +144,7 @@ struct SevGuestState { + uint32_t policy; + char *dh_cert_file; + char *session_file; +- bool legacy_vm_type; ++ OnOffAuto legacy_vm_type; + }; + + struct SevSnpGuestState { +@@ -1334,6 +1334,17 @@ sev_vm_state_change(void *opaque, bool running, RunState state) + } + } + ++/* ++ * This helper is to examine sev-guest properties and determine if any options ++ * have been set which rely on the newer KVM_SEV_INIT2 interface and associated ++ * KVM VM types. ++ */ ++static bool sev_init2_required(SevGuestState *sev_guest) ++{ ++ /* Currently no KVM_SEV_INIT2-specific options are exposed via QEMU */ ++ return false; ++} ++ + static int sev_kvm_type(X86ConfidentialGuest *cg) + { + SevCommonState *sev_common = SEV_COMMON(cg); +@@ -1344,14 +1355,39 @@ static int sev_kvm_type(X86ConfidentialGuest *cg) + goto out; + } + ++ /* These are the only cases where legacy VM types can be used. */ ++ if (sev_guest->legacy_vm_type == ON_OFF_AUTO_ON || ++ (sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO && ++ !sev_init2_required(sev_guest))) { ++ sev_common->kvm_type = KVM_X86_DEFAULT_VM; ++ goto out; ++ } ++ ++ /* ++ * Newer VM types are required, either explicitly via legacy-vm-type=on, or ++ * implicitly via legacy-vm-type=auto along with additional sev-guest ++ * properties that require the newer VM types. ++ */ + kvm_type = (sev_guest->policy & SEV_POLICY_ES) ? + KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; +- if (kvm_is_vm_type_supported(kvm_type) && !sev_guest->legacy_vm_type) { +- sev_common->kvm_type = kvm_type; +- } else { +- sev_common->kvm_type = KVM_X86_DEFAULT_VM; ++ if (!kvm_is_vm_type_supported(kvm_type)) { ++ if (sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO) { ++ error_report("SEV: host kernel does not support requested %s VM type, which is required " ++ "for the set of options specified. To allow use of the legacy " ++ "KVM_X86_DEFAULT_VM VM type, please disable any options that are not " ++ "compatible with the legacy VM type, or upgrade your kernel.", ++ kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM"); ++ } else { ++ error_report("SEV: host kernel does not support requested %s VM type. To allow use of " ++ "the legacy KVM_X86_DEFAULT_VM VM type, the 'legacy-vm-type' argument " ++ "must be set to 'on' or 'auto' for the sev-guest object.", ++ kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM"); ++ } ++ ++ return -1; + } + ++ sev_common->kvm_type = kvm_type; + out: + return sev_common->kvm_type; + } +@@ -1442,14 +1478,24 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + trace_kvm_sev_init(); +- if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { ++ switch (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common))) { ++ case KVM_X86_DEFAULT_VM: + cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; + + ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error); +- } else { ++ break; ++ case KVM_X86_SEV_VM: ++ case KVM_X86_SEV_ES_VM: ++ case KVM_X86_SNP_VM: { + struct kvm_sev_init args = { 0 }; + + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error); ++ break; ++ } ++ default: ++ error_setg(errp, "%s: host kernel does not support the requested SEV configuration.", ++ __func__); ++ return -1; + } + + if (ret) { +@@ -2037,14 +2083,23 @@ sev_guest_set_session_file(Object *obj, const char *value, Error **errp) + SEV_GUEST(obj)->session_file = g_strdup(value); + } + +-static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) ++static void sev_guest_get_legacy_vm_type(Object *obj, Visitor *v, ++ const char *name, void *opaque, ++ Error **errp) + { +- return SEV_GUEST(obj)->legacy_vm_type; ++ SevGuestState *sev_guest = SEV_GUEST(obj); ++ OnOffAuto legacy_vm_type = sev_guest->legacy_vm_type; ++ ++ visit_type_OnOffAuto(v, name, &legacy_vm_type, errp); + } + +-static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) ++static void sev_guest_set_legacy_vm_type(Object *obj, Visitor *v, ++ const char *name, void *opaque, ++ Error **errp) + { +- SEV_GUEST(obj)->legacy_vm_type = value; ++ SevGuestState *sev_guest = SEV_GUEST(obj); ++ ++ visit_type_OnOffAuto(v, name, &sev_guest->legacy_vm_type, errp); + } + + static void +@@ -2070,9 +2125,9 @@ sev_guest_class_init(ObjectClass *oc, void *data) + sev_guest_set_session_file); + object_class_property_set_description(oc, "session-file", + "guest owners session parameters (encoded with base64)"); +- object_class_property_add_bool(oc, "legacy-vm-type", +- sev_guest_get_legacy_vm_type, +- sev_guest_set_legacy_vm_type); ++ object_class_property_add(oc, "legacy-vm-type", "OnOffAuto", ++ sev_guest_get_legacy_vm_type, ++ sev_guest_set_legacy_vm_type, NULL, NULL); + object_class_property_set_description(oc, "legacy-vm-type", + "use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions."); + } +@@ -2088,6 +2143,8 @@ sev_guest_instance_init(Object *obj) + object_property_add_uint32_ptr(obj, "policy", &sev_guest->policy, + OBJ_PROP_FLAG_READWRITE); + object_apply_compat_props(obj); ++ ++ sev_guest->legacy_vm_type = ON_OFF_AUTO_AUTO; + } + + /* guest info specific sev/sev-es */ +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch b/SOURCES/kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch new file mode 100644 index 0000000..739a145 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch @@ -0,0 +1,46 @@ +From ebb3c3536366c383fa09b0987a4efb68d018b7b8 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:24 -0500 +Subject: [PATCH 064/100] i386/sev: Don't return launch measurements for + SEV-SNP guests + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [64/91] 5a29bb2d8b5a07aec6fd271ec37345e665e9cce4 (bonzini/rhel-qemu-kvm) + +For SEV-SNP guests, launch measurement is queried from within the guest +during attestation, so don't attempt to return it as part of +query-sev-launch-measure. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-13-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 73ae63b162fc1fed520f53ad200712964d7d0264) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 6525b3c1a0..c3daaf1ad5 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -795,7 +795,9 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + + static char *sev_get_launch_measurement(void) + { +- SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); ++ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; ++ SevGuestState *sev_guest = ++ (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST); + + if (sev_guest && + SEV_COMMON(sev_guest)->state >= SEV_STATE_LAUNCH_SECRET) { +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch b/SOURCES/kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch new file mode 100644 index 0000000..e438cd3 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch @@ -0,0 +1,54 @@ +From 0612c7ed587422ec7e07c27c8ca11b89c7aa8b02 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:43 -0500 +Subject: [PATCH 077/100] i386/sev: Enable KVM_HC_MAP_GPA_RANGE hcall for SNP + guests + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [77/91] 3c494eb54499c24121cc2c47045626478b8bb41e (bonzini/rhel-qemu-kvm) + +KVM will forward GHCB page-state change requests to userspace in the +form of KVM_HC_MAP_GPA_RANGE, so make sure the hypercall handling is +enabled for SNP guests. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-32-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit e3cddff93c1f88fea3b26841e792dc0be6b6fae8) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index eaf5fc6c6b..abb63062ac 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + + #include ++#include + #include + + #include +@@ -758,6 +759,10 @@ sev_snp_launch_start(SevCommonState *sev_common) + trace_kvm_sev_snp_launch_start(start->policy, + sev_snp_guest->guest_visible_workarounds); + ++ if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) { ++ return 1; ++ } ++ + rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_START, + start, &fw_error); + if (rc < 0) { +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Extract-build_kernel_loader_hashes.patch b/SOURCES/kvm-i386-sev-Extract-build_kernel_loader_hashes.patch new file mode 100644 index 0000000..a06301d --- /dev/null +++ b/SOURCES/kvm-i386-sev-Extract-build_kernel_loader_hashes.patch @@ -0,0 +1,167 @@ +From eed17520567c202f53ab767bfd42cfe303838772 Mon Sep 17 00:00:00 2001 +From: Dov Murik +Date: Thu, 30 May 2024 06:16:33 -0500 +Subject: [PATCH 078/100] i386/sev: Extract build_kernel_loader_hashes + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [78/91] 291ea10e774178826d1afd38fc8292d67c5fd42d (bonzini/rhel-qemu-kvm) + +Extract the building of the kernel hashes table out from +sev_add_kernel_loader_hashes() to allow building it in +other memory areas (for SNP support). + +No functional change intended. + +Signed-off-by: Dov Murik +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-22-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 06cbd66cecaa3230cccb330facac241a677b29d5) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 102 ++++++++++++++++++++++++++-------------------- + 1 file changed, 58 insertions(+), 44 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index abb63062ac..73f9406715 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1754,45 +1754,16 @@ static const QemuUUID sev_cmdline_entry_guid = { + 0x4d, 0x36, 0xab, 0x2a) + }; + +-/* +- * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page +- * which is included in SEV's initial memory measurement. +- */ +-bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) ++static bool build_kernel_loader_hashes(PaddedSevHashTable *padded_ht, ++ SevKernelLoaderContext *ctx, ++ Error **errp) + { +- uint8_t *data; +- SevHashTableDescriptor *area; + SevHashTable *ht; +- PaddedSevHashTable *padded_ht; + uint8_t cmdline_hash[HASH_SIZE]; + uint8_t initrd_hash[HASH_SIZE]; + uint8_t kernel_hash[HASH_SIZE]; + uint8_t *hashp; + size_t hash_len = HASH_SIZE; +- hwaddr mapped_len = sizeof(*padded_ht); +- MemTxAttrs attrs = { 0 }; +- bool ret = true; +- SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); +- +- /* +- * Only add the kernel hashes if the sev-guest configuration explicitly +- * stated kernel-hashes=on. +- */ +- if (!sev_common->kernel_hashes) { +- return false; +- } +- +- if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { +- error_setg(errp, "SEV: kernel specified but guest firmware " +- "has no hashes table GUID"); +- return false; +- } +- area = (SevHashTableDescriptor *)data; +- if (!area->base || area->size < sizeof(PaddedSevHashTable)) { +- error_setg(errp, "SEV: guest firmware hashes table area is invalid " +- "(base=0x%x size=0x%x)", area->base, area->size); +- return false; +- } + + /* + * Calculate hash of kernel command-line with the terminating null byte. If +@@ -1829,16 +1800,6 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + } + assert(hash_len == HASH_SIZE); + +- /* +- * Populate the hashes table in the guest's memory at the OVMF-designated +- * area for the SEV hashes table +- */ +- padded_ht = address_space_map(&address_space_memory, area->base, +- &mapped_len, true, attrs); +- if (!padded_ht || mapped_len != sizeof(*padded_ht)) { +- error_setg(errp, "SEV: cannot map hashes table guest memory area"); +- return false; +- } + ht = &padded_ht->ht; + + ht->guid = sev_hash_table_header_guid; +@@ -1859,8 +1820,61 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + /* zero the excess data so the measurement can be reliably calculated */ + memset(padded_ht->padding, 0, sizeof(padded_ht->padding)); + +- if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, +- sizeof(*padded_ht), errp) < 0) { ++ return true; ++} ++ ++/* ++ * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page ++ * which is included in SEV's initial memory measurement. ++ */ ++bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) ++{ ++ uint8_t *data; ++ SevHashTableDescriptor *area; ++ PaddedSevHashTable *padded_ht; ++ hwaddr mapped_len = sizeof(*padded_ht); ++ MemTxAttrs attrs = { 0 }; ++ bool ret = true; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ ++ /* ++ * Only add the kernel hashes if the sev-guest configuration explicitly ++ * stated kernel-hashes=on. ++ */ ++ if (!sev_common->kernel_hashes) { ++ return false; ++ } ++ ++ if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { ++ error_setg(errp, "SEV: kernel specified but guest firmware " ++ "has no hashes table GUID"); ++ return false; ++ } ++ ++ area = (SevHashTableDescriptor *)data; ++ if (!area->base || area->size < sizeof(PaddedSevHashTable)) { ++ error_setg(errp, "SEV: guest firmware hashes table area is invalid " ++ "(base=0x%x size=0x%x)", area->base, area->size); ++ return false; ++ } ++ ++ /* ++ * Populate the hashes table in the guest's memory at the OVMF-designated ++ * area for the SEV hashes table ++ */ ++ padded_ht = address_space_map(&address_space_memory, area->base, ++ &mapped_len, true, attrs); ++ if (!padded_ht || mapped_len != sizeof(*padded_ht)) { ++ error_setg(errp, "SEV: cannot map hashes table guest memory area"); ++ return false; ++ } ++ ++ if (build_kernel_loader_hashes(padded_ht, ctx, errp)) { ++ if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, ++ sizeof(*padded_ht), errp) < 0) { ++ ret = false; ++ } ++ } else { + ret = false; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch b/SOURCES/kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch new file mode 100644 index 0000000..1d30674 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch @@ -0,0 +1,65 @@ +From a9530c89225fce9e381929c4cd8e372068827acf Mon Sep 17 00:00:00 2001 +From: Michal Privoznik +Date: Mon, 24 Jun 2024 10:52:49 +0200 +Subject: [PATCH 089/100] i386/sev: Fallback to the default SEV device if none + provided in sev_get_capabilities() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [89/91] 22318c20d7102815f754cec0efaf383e05ef79c1 (bonzini/rhel-qemu-kvm) + +When management tools (e.g. libvirt) query QEMU capabilities, +they start QEMU with a minimalistic configuration and issue +various commands on monitor. One of the command issued is/might +be "query-sev-capabilities" to learn values like cbitpos or +reduced-phys-bits. But as of v9.0.0-1145-g16dcf200dc the monitor +command returns an error instead. + +This creates a chicken-egg problem because in order to query +those aforementioned values QEMU needs to be started with a +'sev-guest' object. But to start QEMU with the values must be +known. + +I think it's safe to assume that the default path ("/dev/sev") +provides the same data as user provided one. So fall back to it. + +Fixes: 16dcf200dc951c1cde3e5b442457db5f690b8cf0 +Signed-off-by: Michal Privoznik +Link: https://lore.kernel.org/r/157f93712c23818be193ce785f648f0060b33dee.1719218926.git.mprivozn@redhat.com +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3fb24530b2bb1346a44e17becefc9865b40a2257) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 53b7f7315b..491fab74fd 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -585,13 +585,13 @@ static SevCapability *sev_get_capabilities(Error **errp) + } + + sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); +- if (!sev_common) { +- error_setg(errp, "SEV is not configured"); +- return NULL; ++ if (sev_common) { ++ sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", ++ &error_abort); ++ } else { ++ sev_device = g_strdup(DEFAULT_SEV_DEVICE); + } + +- sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", +- &error_abort); + fd = open(sev_device, O_RDWR); + if (fd < 0) { + error_setg_errno(errp, errno, "SEV: Failed to open %s", +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch b/SOURCES/kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch new file mode 100644 index 0000000..b23e008 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch @@ -0,0 +1,48 @@ +From b672cdf8c10a530b5bcf6dd4489632891eb2c731 Mon Sep 17 00:00:00 2001 +From: Michal Privoznik +Date: Mon, 24 Jun 2024 10:52:48 +0200 +Subject: [PATCH 088/100] i386/sev: Fix error message in sev_get_capabilities() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [88/91] ff8a8b27af02e565172ffe39d0571c234317713d (bonzini/rhel-qemu-kvm) + +When a custom path is provided to sev-guest object and opening +the path fails an error message is reported. But the error +message still mentions DEFAULT_SEV_DEVICE ("/dev/sev") instead of +the custom path. + +Fixes: 16dcf200dc951c1cde3e5b442457db5f690b8cf0 +Signed-off-by: Michal Privoznik +Reviewed-by: Philippe Mathieu-Daudé +Link: https://lore.kernel.org/r/b4648905d399780063dc70851d3d6a3cd28719a5.1719218926.git.mprivozn@redhat.com +Signed-off-by: Paolo Bonzini +(cherry picked from commit e306ae87e0ef04bc7a5dec6db693f6ea09d64d45) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 37de80adc7..53b7f7315b 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -595,7 +595,7 @@ static SevCapability *sev_get_capabilities(Error **errp) + fd = open(sev_device, O_RDWR); + if (fd < 0) { + error_setg_errno(errp, errno, "SEV: Failed to open %s", +- DEFAULT_SEV_DEVICE); ++ sev_device); + g_free(sev_device); + return NULL; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch b/SOURCES/kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch new file mode 100644 index 0000000..2d167af --- /dev/null +++ b/SOURCES/kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch @@ -0,0 +1,1118 @@ +From e6cf2115eb9db545821180b8a978cdccc6a2c2db Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:16 -0500 +Subject: [PATCH 056/100] i386/sev: Introduce "sev-common" type to encapsulate + common SEV state + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [56/91] b52d5c9c5e4997d2fd791fa36dd5d4c836dfc32f (bonzini/rhel-qemu-kvm) + +Currently all SEV/SEV-ES functionality is managed through a single +'sev-guest' QOM type. With upcoming support for SEV-SNP, taking this +same approach won't work well since some of the properties/state +managed by 'sev-guest' is not applicable to SEV-SNP, which will instead +rely on a new QOM type with its own set of properties/state. + +To prepare for this, this patch moves common state into an abstract +'sev-common' parent type to encapsulate properties/state that are +common to both SEV/SEV-ES and SEV-SNP, leaving only SEV/SEV-ES-specific +properties/state in the current 'sev-guest' type. This should not +affect current behavior or command-line options. + +As part of this patch, some related changes are also made: + + - a static 'sev_guest' variable is currently used to keep track of + the 'sev-guest' instance. SEV-SNP would similarly introduce an + 'sev_snp_guest' static variable. But these instances are now + available via qdev_get_machine()->cgs, so switch to using that + instead and drop the static variable. + + - 'sev_guest' is currently used as the name for the static variable + holding a pointer to the 'sev-guest' instance. Re-purpose the name + as a local variable referring the 'sev-guest' instance, and use + that consistently throughout the code so it can be easily + distinguished from sev-common/sev-snp-guest instances. + + - 'sev' is generally used as the name for local variables holding a + pointer to the 'sev-guest' instance. In cases where that now points + to common state, use the name 'sev_common'; in cases where that now + points to state specific to 'sev-guest' instance, use the name + 'sev_guest' + +In order to enable kernel-hashes for SNP, pull it from +SevGuestProperties to its parent SevCommonProperties so +it will be available for both SEV and SNP. + +Signed-off-by: Michael Roth +Co-developed-by: Dov Murik +Signed-off-by: Dov Murik +Acked-by: Markus Armbruster (QAPI schema) +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-5-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 16dcf200dc951c1cde3e5b442457db5f690b8cf0) +Signed-off-by: Paolo Bonzini +--- + qapi/qom.json | 40 ++-- + target/i386/sev.c | 489 ++++++++++++++++++++++++++-------------------- + target/i386/sev.h | 3 + + 3 files changed, 301 insertions(+), 231 deletions(-) + +diff --git a/qapi/qom.json b/qapi/qom.json +index 38dde6d785..056b38f491 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -875,20 +875,12 @@ + 'data': { '*filename': 'str' } } + + ## +-# @SevGuestProperties: ++# @SevCommonProperties: + # +-# Properties for sev-guest objects. ++# Properties common to objects that are derivatives of sev-common. + # + # @sev-device: SEV device to use (default: "/dev/sev") + # +-# @dh-cert-file: guest owners DH certificate (encoded with base64) +-# +-# @session-file: guest owners session parameters (encoded with base64) +-# +-# @policy: SEV policy value (default: 0x1) +-# +-# @handle: SEV firmware handle (default: 0) +-# + # @cbitpos: C-bit location in page table entry (default: 0) + # + # @reduced-phys-bits: number of bits in physical addresses that become +@@ -898,6 +890,27 @@ + # designated guest firmware page for measured boot with -kernel + # (default: false) (since 6.2) + # ++# Since: 9.1 ++## ++{ 'struct': 'SevCommonProperties', ++ 'data': { '*sev-device': 'str', ++ '*cbitpos': 'uint32', ++ 'reduced-phys-bits': 'uint32', ++ '*kernel-hashes': 'bool' } } ++ ++## ++# @SevGuestProperties: ++# ++# Properties for sev-guest objects. ++# ++# @dh-cert-file: guest owners DH certificate (encoded with base64) ++# ++# @session-file: guest owners session parameters (encoded with base64) ++# ++# @policy: SEV policy value (default: 0x1) ++# ++# @handle: SEV firmware handle (default: 0) ++# + # @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM. + # The newer KVM_SEV_INIT2 interface syncs additional vCPU + # state when initializing the VMSA structures, which will +@@ -909,14 +922,11 @@ + # Since: 2.12 + ## + { 'struct': 'SevGuestProperties', +- 'data': { '*sev-device': 'str', +- '*dh-cert-file': 'str', ++ 'base': 'SevCommonProperties', ++ 'data': { '*dh-cert-file': 'str', + '*session-file': 'str', + '*policy': 'uint32', + '*handle': 'uint32', +- '*cbitpos': 'uint32', +- 'reduced-phys-bits': 'uint32', +- '*kernel-hashes': 'bool', + '*legacy-vm-type': 'bool' } } + + ## +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 67ed32e5ea..33e606eea0 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -40,49 +40,59 @@ + #include "hw/i386/pc.h" + #include "exec/address-spaces.h" + +-#define TYPE_SEV_GUEST "sev-guest" +-OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) ++OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) ++OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) + +- +-/** +- * SevGuestState: +- * +- * The SevGuestState object is used for creating and managing a SEV +- * guest. +- * +- * # $QEMU \ +- * -object sev-guest,id=sev0 \ +- * -machine ...,memory-encryption=sev0 +- */ +-struct SevGuestState { ++struct SevCommonState { + X86ConfidentialGuest parent_obj; + + int kvm_type; + + /* configuration parameters */ + char *sev_device; +- uint32_t policy; +- char *dh_cert_file; +- char *session_file; + uint32_t cbitpos; + uint32_t reduced_phys_bits; + bool kernel_hashes; +- bool legacy_vm_type; + + /* runtime state */ +- uint32_t handle; + uint8_t api_major; + uint8_t api_minor; + uint8_t build_id; + int sev_fd; + SevState state; +- gchar *measurement; + + uint32_t reset_cs; + uint32_t reset_ip; + bool reset_data_valid; + }; + ++struct SevCommonStateClass { ++ X86ConfidentialGuestClass parent_class; ++ ++}; ++ ++/** ++ * SevGuestState: ++ * ++ * The SevGuestState object is used for creating and managing a SEV ++ * guest. ++ * ++ * # $QEMU \ ++ * -object sev-guest,id=sev0 \ ++ * -machine ...,memory-encryption=sev0 ++ */ ++struct SevGuestState { ++ SevCommonState parent_obj; ++ gchar *measurement; ++ ++ /* configuration parameters */ ++ uint32_t handle; ++ uint32_t policy; ++ char *dh_cert_file; ++ char *session_file; ++ bool legacy_vm_type; ++}; ++ + #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ + #define DEFAULT_SEV_DEVICE "/dev/sev" + +@@ -128,7 +138,6 @@ typedef struct QEMU_PACKED PaddedSevHashTable { + + QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); + +-static SevGuestState *sev_guest; + static Error *sev_mig_blocker; + + static const char *const sev_fw_errlist[] = { +@@ -209,21 +218,21 @@ fw_error_to_str(int code) + } + + static bool +-sev_check_state(const SevGuestState *sev, SevState state) ++sev_check_state(const SevCommonState *sev_common, SevState state) + { +- assert(sev); +- return sev->state == state ? true : false; ++ assert(sev_common); ++ return sev_common->state == state ? true : false; + } + + static void +-sev_set_guest_state(SevGuestState *sev, SevState new_state) ++sev_set_guest_state(SevCommonState *sev_common, SevState new_state) + { + assert(new_state < SEV_STATE__MAX); +- assert(sev); ++ assert(sev_common); + +- trace_kvm_sev_change_state(SevState_str(sev->state), ++ trace_kvm_sev_change_state(SevState_str(sev_common->state), + SevState_str(new_state)); +- sev->state = new_state; ++ sev_common->state = new_state; + } + + static void +@@ -290,121 +299,61 @@ static struct RAMBlockNotifier sev_ram_notifier = { + .ram_block_removed = sev_ram_block_removed, + }; + +-static void +-sev_guest_finalize(Object *obj) +-{ +-} +- +-static char * +-sev_guest_get_session_file(Object *obj, Error **errp) +-{ +- SevGuestState *s = SEV_GUEST(obj); +- +- return s->session_file ? g_strdup(s->session_file) : NULL; +-} +- +-static void +-sev_guest_set_session_file(Object *obj, const char *value, Error **errp) +-{ +- SevGuestState *s = SEV_GUEST(obj); +- +- s->session_file = g_strdup(value); +-} +- +-static char * +-sev_guest_get_dh_cert_file(Object *obj, Error **errp) +-{ +- SevGuestState *s = SEV_GUEST(obj); +- +- return g_strdup(s->dh_cert_file); +-} +- +-static void +-sev_guest_set_dh_cert_file(Object *obj, const char *value, Error **errp) +-{ +- SevGuestState *s = SEV_GUEST(obj); +- +- s->dh_cert_file = g_strdup(value); +-} +- +-static char * +-sev_guest_get_sev_device(Object *obj, Error **errp) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- return g_strdup(sev->sev_device); +-} +- +-static void +-sev_guest_set_sev_device(Object *obj, const char *value, Error **errp) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- sev->sev_device = g_strdup(value); +-} +- +-static bool sev_guest_get_kernel_hashes(Object *obj, Error **errp) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- return sev->kernel_hashes; +-} +- +-static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- sev->kernel_hashes = value; +-} +- +-static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) +-{ +- return SEV_GUEST(obj)->legacy_vm_type; +-} +- +-static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) +-{ +- SEV_GUEST(obj)->legacy_vm_type = value; +-} +- + bool + sev_enabled(void) + { +- return !!sev_guest; ++ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; ++ ++ return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON); + } + + bool + sev_es_enabled(void) + { +- return sev_enabled() && (sev_guest->policy & SEV_POLICY_ES); ++ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; ++ ++ return sev_enabled() && (SEV_GUEST(cgs)->policy & SEV_POLICY_ES); + } + + uint32_t + sev_get_cbit_position(void) + { +- return sev_guest ? sev_guest->cbitpos : 0; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ ++ return sev_common ? sev_common->cbitpos : 0; + } + + uint32_t + sev_get_reduced_phys_bits(void) + { +- return sev_guest ? sev_guest->reduced_phys_bits : 0; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ ++ return sev_common ? sev_common->reduced_phys_bits : 0; + } + + static SevInfo *sev_get_info(void) + { + SevInfo *info; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ SevGuestState *sev_guest = ++ (SevGuestState *)object_dynamic_cast(OBJECT(sev_common), ++ TYPE_SEV_GUEST); + + info = g_new0(SevInfo, 1); + info->enabled = sev_enabled(); + + if (info->enabled) { +- info->api_major = sev_guest->api_major; +- info->api_minor = sev_guest->api_minor; +- info->build_id = sev_guest->build_id; +- info->policy = sev_guest->policy; +- info->state = sev_guest->state; +- info->handle = sev_guest->handle; ++ if (sev_guest) { ++ info->handle = sev_guest->handle; ++ } ++ info->api_major = sev_common->api_major; ++ info->api_minor = sev_common->api_minor; ++ info->build_id = sev_common->build_id; ++ info->state = sev_common->state; ++ /* we only report the lower 32-bits of policy for SNP, ok for now... */ ++ info->policy = ++ (uint32_t)object_property_get_uint(OBJECT(sev_common), ++ "policy", NULL); + } + + return info; +@@ -530,6 +479,8 @@ static SevCapability *sev_get_capabilities(Error **errp) + size_t pdh_len = 0, cert_chain_len = 0, cpu0_id_len = 0; + uint32_t ebx; + int fd; ++ SevCommonState *sev_common; ++ char *sev_device; + + if (!kvm_enabled()) { + error_setg(errp, "KVM not enabled"); +@@ -540,12 +491,21 @@ static SevCapability *sev_get_capabilities(Error **errp) + return NULL; + } + +- fd = open(DEFAULT_SEV_DEVICE, O_RDWR); ++ sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ if (!sev_common) { ++ error_setg(errp, "SEV is not configured"); ++ } ++ ++ sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", ++ &error_abort); ++ fd = open(sev_device, O_RDWR); + if (fd < 0) { + error_setg_errno(errp, errno, "SEV: Failed to open %s", + DEFAULT_SEV_DEVICE); ++ g_free(sev_device); + return NULL; + } ++ g_free(sev_device); + + if (sev_get_pdh_info(fd, &pdh_data, &pdh_len, + &cert_chain_data, &cert_chain_len, errp)) { +@@ -588,7 +548,7 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, + { + struct kvm_sev_attestation_report input = {}; + SevAttestationReport *report = NULL; +- SevGuestState *sev = sev_guest; ++ SevCommonState *sev_common; + g_autofree guchar *data = NULL; + g_autofree guchar *buf = NULL; + gsize len; +@@ -613,8 +573,10 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, + return NULL; + } + ++ sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ + /* Query the report length */ +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, + &input, &err); + if (ret < 0) { + if (err != SEV_RET_INVALID_LEN) { +@@ -630,7 +592,7 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, + memcpy(input.mnonce, buf, sizeof(input.mnonce)); + + /* Query the report */ +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, + &input, &err); + if (ret) { + error_setg_errno(errp, errno, "SEV: Failed to get attestation report" +@@ -670,26 +632,27 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) + } + + static int +-sev_launch_start(SevGuestState *sev) ++sev_launch_start(SevGuestState *sev_guest) + { + gsize sz; + int ret = 1; + int fw_error, rc; + struct kvm_sev_launch_start start = { +- .handle = sev->handle, .policy = sev->policy ++ .handle = sev_guest->handle, .policy = sev_guest->policy + }; + guchar *session = NULL, *dh_cert = NULL; ++ SevCommonState *sev_common = SEV_COMMON(sev_guest); + +- if (sev->session_file) { +- if (sev_read_file_base64(sev->session_file, &session, &sz) < 0) { ++ if (sev_guest->session_file) { ++ if (sev_read_file_base64(sev_guest->session_file, &session, &sz) < 0) { + goto out; + } + start.session_uaddr = (unsigned long)session; + start.session_len = sz; + } + +- if (sev->dh_cert_file) { +- if (sev_read_file_base64(sev->dh_cert_file, &dh_cert, &sz) < 0) { ++ if (sev_guest->dh_cert_file) { ++ if (sev_read_file_base64(sev_guest->dh_cert_file, &dh_cert, &sz) < 0) { + goto out; + } + start.dh_uaddr = (unsigned long)dh_cert; +@@ -697,15 +660,15 @@ sev_launch_start(SevGuestState *sev) + } + + trace_kvm_sev_launch_start(start.policy, session, dh_cert); +- rc = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_START, &start, &fw_error); ++ rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_START, &start, &fw_error); + if (rc < 0) { + error_report("%s: LAUNCH_START ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); + goto out; + } + +- sev_set_guest_state(sev, SEV_STATE_LAUNCH_UPDATE); +- sev->handle = start.handle; ++ sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_UPDATE); ++ sev_guest->handle = start.handle; + ret = 0; + + out: +@@ -715,7 +678,7 @@ out: + } + + static int +-sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) ++sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) + { + int ret, fw_error; + struct kvm_sev_launch_update_data update; +@@ -727,7 +690,7 @@ sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) + update.uaddr = (uintptr_t)addr; + update.len = len; + trace_kvm_sev_launch_update_data(addr, len); +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, ++ ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, + &update, &fw_error); + if (ret) { + error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", +@@ -738,11 +701,12 @@ sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) + } + + static int +-sev_launch_update_vmsa(SevGuestState *sev) ++sev_launch_update_vmsa(SevGuestState *sev_guest) + { + int ret, fw_error; + +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL, &fw_error); ++ ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, ++ NULL, &fw_error); + if (ret) { + error_report("%s: LAUNCH_UPDATE_VMSA ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); +@@ -754,18 +718,19 @@ sev_launch_update_vmsa(SevGuestState *sev) + static void + sev_launch_get_measure(Notifier *notifier, void *unused) + { +- SevGuestState *sev = sev_guest; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ SevGuestState *sev_guest = SEV_GUEST(sev_common); + int ret, error; + g_autofree guchar *data = NULL; + struct kvm_sev_launch_measure measurement = {}; + +- if (!sev_check_state(sev, SEV_STATE_LAUNCH_UPDATE)) { ++ if (!sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { + return; + } + + if (sev_es_enabled()) { + /* measure all the VM save areas before getting launch_measure */ +- ret = sev_launch_update_vmsa(sev); ++ ret = sev_launch_update_vmsa(sev_guest); + if (ret) { + exit(1); + } +@@ -773,7 +738,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + } + + /* query the measurement blob length */ +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_MEASURE, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_MEASURE, + &measurement, &error); + if (!measurement.len) { + error_report("%s: LAUNCH_MEASURE ret=%d fw_error=%d '%s'", +@@ -785,7 +750,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + measurement.uaddr = (unsigned long)data; + + /* get the measurement blob */ +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_MEASURE, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_MEASURE, + &measurement, &error); + if (ret) { + error_report("%s: LAUNCH_MEASURE ret=%d fw_error=%d '%s'", +@@ -793,17 +758,19 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + return; + } + +- sev_set_guest_state(sev, SEV_STATE_LAUNCH_SECRET); ++ sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_SECRET); + + /* encode the measurement value and emit the event */ +- sev->measurement = g_base64_encode(data, measurement.len); +- trace_kvm_sev_launch_measurement(sev->measurement); ++ sev_guest->measurement = g_base64_encode(data, measurement.len); ++ trace_kvm_sev_launch_measurement(sev_guest->measurement); + } + + static char *sev_get_launch_measurement(void) + { ++ SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); ++ + if (sev_guest && +- sev_guest->state >= SEV_STATE_LAUNCH_SECRET) { ++ SEV_COMMON(sev_guest)->state >= SEV_STATE_LAUNCH_SECRET) { + return g_strdup(sev_guest->measurement); + } + +@@ -832,19 +799,20 @@ static Notifier sev_machine_done_notify = { + }; + + static void +-sev_launch_finish(SevGuestState *sev) ++sev_launch_finish(SevGuestState *sev_guest) + { + int ret, error; + + trace_kvm_sev_launch_finish(); +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, &error); ++ ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, ++ &error); + if (ret) { + error_report("%s: LAUNCH_FINISH ret=%d fw_error=%d '%s'", + __func__, ret, error, fw_error_to_str(error)); + exit(1); + } + +- sev_set_guest_state(sev, SEV_STATE_RUNNING); ++ sev_set_guest_state(SEV_COMMON(sev_guest), SEV_STATE_RUNNING); + + /* add migration blocker */ + error_setg(&sev_mig_blocker, +@@ -855,38 +823,40 @@ sev_launch_finish(SevGuestState *sev) + static void + sev_vm_state_change(void *opaque, bool running, RunState state) + { +- SevGuestState *sev = opaque; ++ SevCommonState *sev_common = opaque; + + if (running) { +- if (!sev_check_state(sev, SEV_STATE_RUNNING)) { +- sev_launch_finish(sev); ++ if (!sev_check_state(sev_common, SEV_STATE_RUNNING)) { ++ sev_launch_finish(SEV_GUEST(sev_common)); + } + } + } + + static int sev_kvm_type(X86ConfidentialGuest *cg) + { +- SevGuestState *sev = SEV_GUEST(cg); ++ SevCommonState *sev_common = SEV_COMMON(cg); ++ SevGuestState *sev_guest = SEV_GUEST(sev_common); + int kvm_type; + +- if (sev->kvm_type != -1) { ++ if (sev_common->kvm_type != -1) { + goto out; + } + +- kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; +- if (kvm_is_vm_type_supported(kvm_type) && !sev->legacy_vm_type) { +- sev->kvm_type = kvm_type; ++ kvm_type = (sev_guest->policy & SEV_POLICY_ES) ? ++ KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; ++ if (kvm_is_vm_type_supported(kvm_type) && !sev_guest->legacy_vm_type) { ++ sev_common->kvm_type = kvm_type; + } else { +- sev->kvm_type = KVM_X86_DEFAULT_VM; ++ sev_common->kvm_type = KVM_X86_DEFAULT_VM; + } + + out: +- return sev->kvm_type; ++ return sev_common->kvm_type; + } + + static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { +- SevGuestState *sev = SEV_GUEST(cgs); ++ SevCommonState *sev_common = SEV_COMMON(cgs); + char *devname; + int ret, fw_error, cmd; + uint32_t ebx; +@@ -899,8 +869,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + return -1; + } + +- sev_guest = sev; +- sev->state = SEV_STATE_UNINIT; ++ sev_common->state = SEV_STATE_UNINIT; + + host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); + host_cbitpos = ebx & 0x3f; +@@ -910,9 +879,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + * register of CPUID 0x8000001F. No need to verify the range as the + * comparison against the host value accomplishes that. + */ +- if (host_cbitpos != sev->cbitpos) { ++ if (host_cbitpos != sev_common->cbitpos) { + error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", +- __func__, host_cbitpos, sev->cbitpos); ++ __func__, host_cbitpos, sev_common->cbitpos); + goto err; + } + +@@ -921,16 +890,17 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + * the EBX register of CPUID 0x8000001F, so verify the supplied value + * is in the range of 1 to 63. + */ +- if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) { ++ if (sev_common->reduced_phys_bits < 1 || ++ sev_common->reduced_phys_bits > 63) { + error_setg(errp, "%s: reduced_phys_bits check failed," + " it should be in the range of 1 to 63, requested '%d'", +- __func__, sev->reduced_phys_bits); ++ __func__, sev_common->reduced_phys_bits); + goto err; + } + +- devname = object_property_get_str(OBJECT(sev), "sev-device", NULL); +- sev->sev_fd = open(devname, O_RDWR); +- if (sev->sev_fd < 0) { ++ devname = object_property_get_str(OBJECT(sev_common), "sev-device", NULL); ++ sev_common->sev_fd = open(devname, O_RDWR); ++ if (sev_common->sev_fd < 0) { + error_setg(errp, "%s: Failed to open %s '%s'", __func__, + devname, strerror(errno)); + g_free(devname); +@@ -938,7 +908,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + g_free(devname); + +- ret = sev_platform_ioctl(sev->sev_fd, SEV_PLATFORM_STATUS, &status, ++ ret = sev_platform_ioctl(sev_common->sev_fd, SEV_PLATFORM_STATUS, &status, + &fw_error); + if (ret) { + error_setg(errp, "%s: failed to get platform status ret=%d " +@@ -946,9 +916,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + fw_error_to_str(fw_error)); + goto err; + } +- sev->build_id = status.build; +- sev->api_major = status.api_major; +- sev->api_minor = status.api_minor; ++ sev_common->build_id = status.build; ++ sev_common->api_major = status.api_major; ++ sev_common->api_minor = status.api_minor; + + if (sev_es_enabled()) { + if (!kvm_kernel_irqchip_allowed()) { +@@ -966,14 +936,14 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + trace_kvm_sev_init(); +- if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev)) == KVM_X86_DEFAULT_VM) { ++ if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { + cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; + +- ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); ++ ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error); + } else { + struct kvm_sev_init args = { 0 }; + +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_INIT2, &args, &fw_error); ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error); + } + + if (ret) { +@@ -982,7 +952,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + goto err; + } + +- ret = sev_launch_start(sev); ++ sev_launch_start(SEV_GUEST(sev_common)); + if (ret) { + error_setg(errp, "%s: failed to create encryption context", __func__); + goto err; +@@ -990,13 +960,12 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + + ram_block_notifier_add(&sev_ram_notifier); + qemu_add_machine_init_done_notifier(&sev_machine_done_notify); +- qemu_add_vm_change_state_handler(sev_vm_state_change, sev); ++ qemu_add_vm_change_state_handler(sev_vm_state_change, sev_common); + + cgs->ready = true; + + return 0; + err: +- sev_guest = NULL; + ram_block_discard_disable(false); + return -1; + } +@@ -1004,13 +973,15 @@ err: + int + sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) + { +- if (!sev_guest) { ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ ++ if (!sev_common) { + return 0; + } + + /* if SEV is in update state then encrypt the data else do nothing */ +- if (sev_check_state(sev_guest, SEV_STATE_LAUNCH_UPDATE)) { +- int ret = sev_launch_update_data(sev_guest, ptr, len); ++ if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { ++ int ret = sev_launch_update_data(SEV_GUEST(sev_common), ptr, len); + if (ret < 0) { + error_setg(errp, "SEV: Failed to encrypt pflash rom"); + return ret; +@@ -1030,16 +1001,17 @@ int sev_inject_launch_secret(const char *packet_hdr, const char *secret, + void *hva; + gsize hdr_sz = 0, data_sz = 0; + MemoryRegion *mr = NULL; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + +- if (!sev_guest) { ++ if (!sev_common) { + error_setg(errp, "SEV not enabled for guest"); + return 1; + } + + /* secret can be injected only in this state */ +- if (!sev_check_state(sev_guest, SEV_STATE_LAUNCH_SECRET)) { ++ if (!sev_check_state(sev_common, SEV_STATE_LAUNCH_SECRET)) { + error_setg(errp, "SEV: Not in correct state. (LSECRET) %x", +- sev_guest->state); ++ sev_common->state); + return 1; + } + +@@ -1073,7 +1045,7 @@ int sev_inject_launch_secret(const char *packet_hdr, const char *secret, + trace_kvm_sev_launch_secret(gpa, input.guest_uaddr, + input.trans_uaddr, input.trans_len); + +- ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_LAUNCH_SECRET, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_SECRET, + &input, &error); + if (ret) { + error_setg(errp, "SEV: failed to inject secret ret=%d fw_error=%d '%s'", +@@ -1180,9 +1152,10 @@ void sev_es_set_reset_vector(CPUState *cpu) + { + X86CPU *x86; + CPUX86State *env; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + /* Only update if we have valid reset information */ +- if (!sev_guest || !sev_guest->reset_data_valid) { ++ if (!sev_common || !sev_common->reset_data_valid) { + return; + } + +@@ -1194,11 +1167,11 @@ void sev_es_set_reset_vector(CPUState *cpu) + x86 = X86_CPU(cpu); + env = &x86->env; + +- cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_guest->reset_cs, 0xffff, ++ cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_common->reset_cs, 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | + DESC_R_MASK | DESC_A_MASK); + +- env->eip = sev_guest->reset_ip; ++ env->eip = sev_common->reset_ip; + } + + int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) +@@ -1206,6 +1179,7 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) + CPUState *cpu; + uint32_t addr; + int ret; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + if (!sev_es_enabled()) { + return 0; +@@ -1219,9 +1193,9 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) + } + + if (addr) { +- sev_guest->reset_cs = addr & 0xffff0000; +- sev_guest->reset_ip = addr & 0x0000ffff; +- sev_guest->reset_data_valid = true; ++ sev_common->reset_cs = addr & 0xffff0000; ++ sev_common->reset_ip = addr & 0x0000ffff; ++ sev_common->reset_data_valid = true; + + CPU_FOREACH(cpu) { + sev_es_set_reset_vector(cpu); +@@ -1267,12 +1241,13 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + hwaddr mapped_len = sizeof(*padded_ht); + MemTxAttrs attrs = { 0 }; + bool ret = true; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + /* + * Only add the kernel hashes if the sev-guest configuration explicitly + * stated kernel-hashes=on. + */ +- if (!sev_guest->kernel_hashes) { ++ if (!sev_common->kernel_hashes) { + return false; + } + +@@ -1363,8 +1338,30 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + return ret; + } + ++static char * ++sev_common_get_sev_device(Object *obj, Error **errp) ++{ ++ return g_strdup(SEV_COMMON(obj)->sev_device); ++} ++ + static void +-sev_guest_class_init(ObjectClass *oc, void *data) ++sev_common_set_sev_device(Object *obj, const char *value, Error **errp) ++{ ++ SEV_COMMON(obj)->sev_device = g_strdup(value); ++} ++ ++static bool sev_common_get_kernel_hashes(Object *obj, Error **errp) ++{ ++ return SEV_COMMON(obj)->kernel_hashes; ++} ++ ++static void sev_common_set_kernel_hashes(Object *obj, bool value, Error **errp) ++{ ++ SEV_COMMON(obj)->kernel_hashes = value; ++} ++ ++static void ++sev_common_class_init(ObjectClass *oc, void *data) + { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); +@@ -1373,10 +1370,87 @@ sev_guest_class_init(ObjectClass *oc, void *data) + x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "sev-device", +- sev_guest_get_sev_device, +- sev_guest_set_sev_device); ++ sev_common_get_sev_device, ++ sev_common_set_sev_device); + object_class_property_set_description(oc, "sev-device", + "SEV device to use"); ++ object_class_property_add_bool(oc, "kernel-hashes", ++ sev_common_get_kernel_hashes, ++ sev_common_set_kernel_hashes); ++ object_class_property_set_description(oc, "kernel-hashes", ++ "add kernel hashes to guest firmware for measured Linux boot"); ++} ++ ++static void ++sev_common_instance_init(Object *obj) ++{ ++ SevCommonState *sev_common = SEV_COMMON(obj); ++ ++ sev_common->kvm_type = -1; ++ ++ sev_common->sev_device = g_strdup(DEFAULT_SEV_DEVICE); ++ ++ object_property_add_uint32_ptr(obj, "cbitpos", &sev_common->cbitpos, ++ OBJ_PROP_FLAG_READWRITE); ++ object_property_add_uint32_ptr(obj, "reduced-phys-bits", ++ &sev_common->reduced_phys_bits, ++ OBJ_PROP_FLAG_READWRITE); ++} ++ ++/* sev guest info common to sev/sev-es/sev-snp */ ++static const TypeInfo sev_common_info = { ++ .parent = TYPE_X86_CONFIDENTIAL_GUEST, ++ .name = TYPE_SEV_COMMON, ++ .instance_size = sizeof(SevCommonState), ++ .instance_init = sev_common_instance_init, ++ .class_size = sizeof(SevCommonStateClass), ++ .class_init = sev_common_class_init, ++ .abstract = true, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_USER_CREATABLE }, ++ { } ++ } ++}; ++ ++static char * ++sev_guest_get_dh_cert_file(Object *obj, Error **errp) ++{ ++ return g_strdup(SEV_GUEST(obj)->dh_cert_file); ++} ++ ++static void ++sev_guest_set_dh_cert_file(Object *obj, const char *value, Error **errp) ++{ ++ SEV_GUEST(obj)->dh_cert_file = g_strdup(value); ++} ++ ++static char * ++sev_guest_get_session_file(Object *obj, Error **errp) ++{ ++ SevGuestState *sev_guest = SEV_GUEST(obj); ++ ++ return sev_guest->session_file ? g_strdup(sev_guest->session_file) : NULL; ++} ++ ++static void ++sev_guest_set_session_file(Object *obj, const char *value, Error **errp) ++{ ++ SEV_GUEST(obj)->session_file = g_strdup(value); ++} ++ ++static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) ++{ ++ return SEV_GUEST(obj)->legacy_vm_type; ++} ++ ++static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) ++{ ++ SEV_GUEST(obj)->legacy_vm_type = value; ++} ++ ++static void ++sev_guest_class_init(ObjectClass *oc, void *data) ++{ + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, + sev_guest_set_dh_cert_file); +@@ -1387,11 +1461,6 @@ sev_guest_class_init(ObjectClass *oc, void *data) + sev_guest_set_session_file); + object_class_property_set_description(oc, "session-file", + "guest owners session parameters (encoded with base64)"); +- object_class_property_add_bool(oc, "kernel-hashes", +- sev_guest_get_kernel_hashes, +- sev_guest_set_kernel_hashes); +- object_class_property_set_description(oc, "kernel-hashes", +- "add kernel hashes to guest firmware for measured Linux boot"); + object_class_property_add_bool(oc, "legacy-vm-type", + sev_guest_get_legacy_vm_type, + sev_guest_set_legacy_vm_type); +@@ -1402,41 +1471,29 @@ sev_guest_class_init(ObjectClass *oc, void *data) + static void + sev_guest_instance_init(Object *obj) + { +- SevGuestState *sev = SEV_GUEST(obj); +- +- sev->kvm_type = -1; ++ SevGuestState *sev_guest = SEV_GUEST(obj); + +- sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); +- sev->policy = DEFAULT_GUEST_POLICY; +- object_property_add_uint32_ptr(obj, "policy", &sev->policy, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "handle", &sev->handle, ++ sev_guest->policy = DEFAULT_GUEST_POLICY; ++ object_property_add_uint32_ptr(obj, "handle", &sev_guest->handle, + OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "reduced-phys-bits", +- &sev->reduced_phys_bits, ++ object_property_add_uint32_ptr(obj, "policy", &sev_guest->policy, + OBJ_PROP_FLAG_READWRITE); + object_apply_compat_props(obj); + } + +-/* sev guest info */ ++/* guest info specific sev/sev-es */ + static const TypeInfo sev_guest_info = { +- .parent = TYPE_X86_CONFIDENTIAL_GUEST, ++ .parent = TYPE_SEV_COMMON, + .name = TYPE_SEV_GUEST, + .instance_size = sizeof(SevGuestState), +- .instance_finalize = sev_guest_finalize, +- .class_init = sev_guest_class_init, + .instance_init = sev_guest_instance_init, +- .interfaces = (InterfaceInfo[]) { +- { TYPE_USER_CREATABLE }, +- { } +- } ++ .class_init = sev_guest_class_init, + }; + + static void + sev_register_types(void) + { ++ type_register_static(&sev_common_info); + type_register_static(&sev_guest_info); + } + +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 9e10d09539..668374eef3 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -20,6 +20,9 @@ + + #include "exec/confidential-guest-support.h" + ++#define TYPE_SEV_COMMON "sev-common" ++#define TYPE_SEV_GUEST "sev-guest" ++ + #define SEV_POLICY_NODBG 0x1 + #define SEV_POLICY_NOKS 0x2 + #define SEV_POLICY_ES 0x4 +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Introduce-sev-snp-guest-object.patch b/SOURCES/kvm-i386-sev-Introduce-sev-snp-guest-object.patch new file mode 100644 index 0000000..b347bf6 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Introduce-sev-snp-guest-object.patch @@ -0,0 +1,530 @@ +From 900859fd3445b9a71f1a9a8befda17f0c33f3923 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:19 -0500 +Subject: [PATCH 059/100] i386/sev: Introduce 'sev-snp-guest' object + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [59/91] 3e585113d209176c2b97ad5e4fe943f19dfdcaeb (bonzini/rhel-qemu-kvm) + +SEV-SNP support relies on a different set of properties/state than the +existing 'sev-guest' object. This patch introduces the 'sev-snp-guest' +object, which can be used to configure an SEV-SNP guest. For example, +a default-configured SEV-SNP guest with no additional information +passed in for use with attestation: + + -object sev-snp-guest,id=sev0 + +or a fully-specified SEV-SNP guest where all spec-defined binary +blobs are passed in as base64-encoded strings: + + -object sev-snp-guest,id=sev0, \ + policy=0x30000, \ + init-flags=0, \ + id-block=YWFhYWFhYWFhYWFhYWFhCg==, \ + id-auth=CxHK/OKLkXGn/KpAC7Wl1FSiisWDbGTEKz..., \ + author-key-enabled=on, \ + host-data=LNkCWBRC5CcdGXirbNUV1OrsR28s..., \ + guest-visible-workarounds=AA==, \ + +See the QAPI schema updates included in this patch for more usage +details. + +In some cases these blobs may be up to 4096 characters, but this is +generally well below the default limit for linux hosts where +command-line sizes are defined by the sysconf-configurable ARG_MAX +value, which defaults to 2097152 characters for Ubuntu hosts, for +example. + +Signed-off-by: Brijesh Singh +Co-developed-by: Michael Roth +Acked-by: Markus Armbruster (for QAPI schema) +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-8-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 7b34df44260b391e33bc3acf1ced30019d9aadf1) +Signed-off-by: Paolo Bonzini +--- + docs/system/i386/amd-memory-encryption.rst | 70 +++++- + qapi/qom.json | 58 +++++ + target/i386/sev.c | 253 +++++++++++++++++++++ + target/i386/sev.h | 1 + + 4 files changed, 380 insertions(+), 2 deletions(-) + +diff --git a/docs/system/i386/amd-memory-encryption.rst b/docs/system/i386/amd-memory-encryption.rst +index e9bc142bc1..748f5094ba 100644 +--- a/docs/system/i386/amd-memory-encryption.rst ++++ b/docs/system/i386/amd-memory-encryption.rst +@@ -25,8 +25,8 @@ support for notifying a guest's operating system when certain types of VMEXITs + are about to occur. This allows the guest to selectively share information with + the hypervisor to satisfy the requested function. + +-Launching +---------- ++Launching (SEV and SEV-ES) ++-------------------------- + + Boot images (such as bios) must be encrypted before a guest can be booted. The + ``MEMORY_ENCRYPT_OP`` ioctl provides commands to encrypt the images: ``LAUNCH_START``, +@@ -161,6 +161,72 @@ The value of GCTX.LD is + If kernel hashes are not used, or SEV-ES is disabled, use empty blobs for + ``kernel_hashes_blob`` and ``vmsas_blob`` as needed. + ++Launching (SEV-SNP) ++------------------- ++Boot images (such as bios) must be encrypted before a guest can be booted. The ++``MEMORY_ENCRYPT_OP`` ioctl provides commands to encrypt the images: ++``SNP_LAUNCH_START``, ``SNP_LAUNCH_UPDATE``, and ``SNP_LAUNCH_FINISH``. These ++three commands communicate with SEV-SNP firmware to generate a fresh memory ++encryption key for the VM, encrypt the boot images for a successful launch. For ++more details on the SEV-SNP firmware interfaces used by these commands please ++see the SEV-SNP Firmware ABI. ++ ++``SNP_LAUNCH_START`` is called first to create a cryptographic launch context ++within the firmware. To create this context, the guest owner must provide a ++guest policy and other parameters as described in the SEV-SNP firmware ++specification. The launch parameters should be specified as described in the ++QAPI schema for the sev-snp-guest object. ++ ++The ``SNP_LAUNCH_START`` uses the following parameters, which can be configured ++by the corresponding parameters documented in the QAPI schema for the ++'sev-snp-guest' object. ++ +++--------+-------+----------+-------------------------------------------------+ ++| key | type | default | meaning | +++---------------------------+-------------------------------------------------+ ++| policy | hex | 0x30000 | a 64-bit guest policy | +++---------------------------+-------------------------------------------------+ ++| guest-visible-workarounds | string| 0 | 16-byte base64 encoded string| ++| | | | for guest OS visible | ++| | | | workarounds. | +++---------------------------+-------------------------------------------------+ ++ ++``SNP_LAUNCH_UPDATE`` encrypts the memory region using the cryptographic context ++created via the ``SNP_LAUNCH_START`` command. If required, this command can be ++called multiple times to encrypt different memory regions. The command also ++calculates the measurement of the memory contents as it encrypts. ++ ++``SNP_LAUNCH_FINISH`` finalizes the guest launch flow. Optionally, while ++finalizing the launch the firmware can perform checks on the launch digest ++computing through the ``SNP_LAUNCH_UPDATE``. To perform the check the user must ++supply the id block, authentication blob and host data that should be included ++in the attestation report. See the SEV-SNP spec for further details. ++ ++The ``SNP_LAUNCH_FINISH`` uses the following parameters, which can be configured ++by the corresponding parameters documented in the QAPI schema for the ++'sev-snp-guest' object. ++ +++--------------------+-------+----------+-------------------------------------+ ++| key | type | default | meaning | +++--------------------+-------+----------+-------------------------------------+ ++| id-block | string| none | base64 encoded ID block | +++--------------------+-------+----------+-------------------------------------+ ++| id-auth | string| none | base64 encoded authentication | ++| | | | information | +++--------------------+-------+----------+-------------------------------------+ ++| author-key-enabled | bool | 0 | auth block contains author key | +++--------------------+-------+----------+-------------------------------------+ ++| host_data | string| none | host provided data | +++--------------------+-------+----------+-------------------------------------+ ++ ++To launch a SEV-SNP guest (additional parameters are documented in the QAPI ++schema for the 'sev-snp-guest' object):: ++ ++ # ${QEMU} \ ++ -machine ...,confidential-guest-support=sev0 \ ++ -object sev-snp-guest,id=sev0,cbitpos=51,reduced-phys-bits=1 ++ ++ + Debugging + --------- + +diff --git a/qapi/qom.json b/qapi/qom.json +index 056b38f491..8bd299265e 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -929,6 +929,62 @@ + '*handle': 'uint32', + '*legacy-vm-type': 'bool' } } + ++## ++# @SevSnpGuestProperties: ++# ++# Properties for sev-snp-guest objects. Most of these are direct ++# arguments for the KVM_SNP_* interfaces documented in the Linux ++# kernel source under ++# Documentation/arch/x86/amd-memory-encryption.rst, which are in turn ++# closely coupled with the SNP_INIT/SNP_LAUNCH_* firmware commands ++# documented in the SEV-SNP Firmware ABI Specification (Rev 0.9). ++# ++# More usage information is also available in the QEMU source tree ++# under docs/amd-memory-encryption. ++# ++# @policy: the 'POLICY' parameter to the SNP_LAUNCH_START command, as ++# defined in the SEV-SNP firmware ABI (default: 0x30000) ++# ++# @guest-visible-workarounds: 16-byte, base64-encoded blob to report ++# hypervisor-defined workarounds, corresponding to the 'GOSVW' ++# parameter of the SNP_LAUNCH_START command defined in the SEV-SNP ++# firmware ABI (default: all-zero) ++# ++# @id-block: 96-byte, base64-encoded blob to provide the 'ID Block' ++# structure for the SNP_LAUNCH_FINISH command defined in the ++# SEV-SNP firmware ABI (default: all-zero) ++# ++# @id-auth: 4096-byte, base64-encoded blob to provide the 'ID ++# Authentication Information Structure' for the SNP_LAUNCH_FINISH ++# command defined in the SEV-SNP firmware ABI (default: all-zero) ++# ++# @author-key-enabled: true if 'id-auth' blob contains the 'AUTHOR_KEY' ++# field defined SEV-SNP firmware ABI (default: false) ++# ++# @host-data: 32-byte, base64-encoded, user-defined blob to provide to ++# the guest, as documented for the 'HOST_DATA' parameter of the ++# SNP_LAUNCH_FINISH command in the SEV-SNP firmware ABI (default: ++# all-zero) ++# ++# @vcek-disabled: Guests are by default allowed to choose between VLEK ++# (Versioned Loaded Endorsement Key) or VCEK (Versioned Chip ++# Endorsement Key) when requesting attestation reports from ++# firmware. Set this to true to disable the use of VCEK. ++# (default: false) (since: 9.1) ++# ++# Since: 9.1 ++## ++{ 'struct': 'SevSnpGuestProperties', ++ 'base': 'SevCommonProperties', ++ 'data': { ++ '*policy': 'uint64', ++ '*guest-visible-workarounds': 'str', ++ '*id-block': 'str', ++ '*id-auth': 'str', ++ '*author-key-enabled': 'bool', ++ '*host-data': 'str', ++ '*vcek-disabled': 'bool' } } ++ + ## + # @ThreadContextProperties: + # +@@ -1007,6 +1063,7 @@ + { 'name': 'secret_keyring', + 'if': 'CONFIG_SECRET_KEYRING' }, + 'sev-guest', ++ 'sev-snp-guest', + 'thread-context', + 's390-pv-guest', + 'throttle-group', +@@ -1077,6 +1134,7 @@ + 'secret_keyring': { 'type': 'SecretKeyringProperties', + 'if': 'CONFIG_SECRET_KEYRING' }, + 'sev-guest': 'SevGuestProperties', ++ 'sev-snp-guest': 'SevSnpGuestProperties', + 'thread-context': 'ThreadContextProperties', + 'throttle-group': 'ThrottleGroupProperties', + 'tls-creds-anon': 'TlsCredsAnonProperties', +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 28a018ed83..a81b3228d4 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -42,6 +42,7 @@ + + OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) + OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) ++OBJECT_DECLARE_TYPE(SevSnpGuestState, SevCommonStateClass, SEV_SNP_GUEST) + + struct SevCommonState { + X86ConfidentialGuest parent_obj; +@@ -96,8 +97,22 @@ struct SevGuestState { + bool legacy_vm_type; + }; + ++struct SevSnpGuestState { ++ SevCommonState parent_obj; ++ ++ /* configuration parameters */ ++ char *guest_visible_workarounds; ++ char *id_block; ++ char *id_auth; ++ char *host_data; ++ ++ struct kvm_sev_snp_launch_start kvm_start_conf; ++ struct kvm_sev_snp_launch_finish kvm_finish_conf; ++}; ++ + #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ + #define DEFAULT_SEV_DEVICE "/dev/sev" ++#define DEFAULT_SEV_SNP_POLICY 0x30000 + + #define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" + typedef struct __attribute__((__packed__)) SevInfoBlock { +@@ -1500,11 +1515,249 @@ static const TypeInfo sev_guest_info = { + .class_init = sev_guest_class_init, + }; + ++static void ++sev_snp_guest_get_policy(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ visit_type_uint64(v, name, ++ (uint64_t *)&SEV_SNP_GUEST(obj)->kvm_start_conf.policy, ++ errp); ++} ++ ++static void ++sev_snp_guest_set_policy(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ visit_type_uint64(v, name, ++ (uint64_t *)&SEV_SNP_GUEST(obj)->kvm_start_conf.policy, ++ errp); ++} ++ ++static char * ++sev_snp_guest_get_guest_visible_workarounds(Object *obj, Error **errp) ++{ ++ return g_strdup(SEV_SNP_GUEST(obj)->guest_visible_workarounds); ++} ++ ++static void ++sev_snp_guest_set_guest_visible_workarounds(Object *obj, const char *value, ++ Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ struct kvm_sev_snp_launch_start *start = &sev_snp_guest->kvm_start_conf; ++ g_autofree guchar *blob; ++ gsize len; ++ ++ g_free(sev_snp_guest->guest_visible_workarounds); ++ ++ /* store the base64 str so we don't need to re-encode in getter */ ++ sev_snp_guest->guest_visible_workarounds = g_strdup(value); ++ ++ blob = qbase64_decode(sev_snp_guest->guest_visible_workarounds, ++ -1, &len, errp); ++ if (!blob) { ++ return; ++ } ++ ++ if (len != sizeof(start->gosvw)) { ++ error_setg(errp, "parameter length of %lu exceeds max of %lu", ++ len, sizeof(start->gosvw)); ++ return; ++ } ++ ++ memcpy(start->gosvw, blob, len); ++} ++ ++static char * ++sev_snp_guest_get_id_block(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return g_strdup(sev_snp_guest->id_block); ++} ++ ++static void ++sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; ++ gsize len; ++ ++ g_free(sev_snp_guest->id_block); ++ g_free((guchar *)finish->id_block_uaddr); ++ ++ /* store the base64 str so we don't need to re-encode in getter */ ++ sev_snp_guest->id_block = g_strdup(value); ++ ++ finish->id_block_uaddr = ++ (uint64_t)qbase64_decode(sev_snp_guest->id_block, -1, &len, errp); ++ ++ if (!finish->id_block_uaddr) { ++ return; ++ } ++ ++ if (len != KVM_SEV_SNP_ID_BLOCK_SIZE) { ++ error_setg(errp, "parameter length of %lu not equal to %u", ++ len, KVM_SEV_SNP_ID_BLOCK_SIZE); ++ return; ++ } ++ ++ finish->id_block_en = (len) ? 1 : 0; ++} ++ ++static char * ++sev_snp_guest_get_id_auth(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return g_strdup(sev_snp_guest->id_auth); ++} ++ ++static void ++sev_snp_guest_set_id_auth(Object *obj, const char *value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; ++ gsize len; ++ ++ g_free(sev_snp_guest->id_auth); ++ g_free((guchar *)finish->id_auth_uaddr); ++ ++ /* store the base64 str so we don't need to re-encode in getter */ ++ sev_snp_guest->id_auth = g_strdup(value); ++ ++ finish->id_auth_uaddr = ++ (uint64_t)qbase64_decode(sev_snp_guest->id_auth, -1, &len, errp); ++ ++ if (!finish->id_auth_uaddr) { ++ return; ++ } ++ ++ if (len > KVM_SEV_SNP_ID_AUTH_SIZE) { ++ error_setg(errp, "parameter length:ID_AUTH %lu exceeds max of %u", ++ len, KVM_SEV_SNP_ID_AUTH_SIZE); ++ return; ++ } ++} ++ ++static bool ++sev_snp_guest_get_author_key_enabled(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return !!sev_snp_guest->kvm_finish_conf.auth_key_en; ++} ++ ++static void ++sev_snp_guest_set_author_key_enabled(Object *obj, bool value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ sev_snp_guest->kvm_finish_conf.auth_key_en = value; ++} ++ ++static bool ++sev_snp_guest_get_vcek_disabled(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return !!sev_snp_guest->kvm_finish_conf.vcek_disabled; ++} ++ ++static void ++sev_snp_guest_set_vcek_disabled(Object *obj, bool value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ sev_snp_guest->kvm_finish_conf.vcek_disabled = value; ++} ++ ++static char * ++sev_snp_guest_get_host_data(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return g_strdup(sev_snp_guest->host_data); ++} ++ ++static void ++sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; ++ g_autofree guchar *blob; ++ gsize len; ++ ++ g_free(sev_snp_guest->host_data); ++ ++ /* store the base64 str so we don't need to re-encode in getter */ ++ sev_snp_guest->host_data = g_strdup(value); ++ ++ blob = qbase64_decode(sev_snp_guest->host_data, -1, &len, errp); ++ ++ if (!blob) { ++ return; ++ } ++ ++ if (len != sizeof(finish->host_data)) { ++ error_setg(errp, "parameter length of %lu not equal to %lu", ++ len, sizeof(finish->host_data)); ++ return; ++ } ++ ++ memcpy(finish->host_data, blob, len); ++} ++ ++static void ++sev_snp_guest_class_init(ObjectClass *oc, void *data) ++{ ++ object_class_property_add(oc, "policy", "uint64", ++ sev_snp_guest_get_policy, ++ sev_snp_guest_set_policy, NULL, NULL); ++ object_class_property_add_str(oc, "guest-visible-workarounds", ++ sev_snp_guest_get_guest_visible_workarounds, ++ sev_snp_guest_set_guest_visible_workarounds); ++ object_class_property_add_str(oc, "id-block", ++ sev_snp_guest_get_id_block, ++ sev_snp_guest_set_id_block); ++ object_class_property_add_str(oc, "id-auth", ++ sev_snp_guest_get_id_auth, ++ sev_snp_guest_set_id_auth); ++ object_class_property_add_bool(oc, "author-key-enabled", ++ sev_snp_guest_get_author_key_enabled, ++ sev_snp_guest_set_author_key_enabled); ++ object_class_property_add_bool(oc, "vcek-required", ++ sev_snp_guest_get_vcek_disabled, ++ sev_snp_guest_set_vcek_disabled); ++ object_class_property_add_str(oc, "host-data", ++ sev_snp_guest_get_host_data, ++ sev_snp_guest_set_host_data); ++} ++ ++static void ++sev_snp_guest_instance_init(Object *obj) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ /* default init/start/finish params for kvm */ ++ sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY; ++} ++ ++/* guest info specific to sev-snp */ ++static const TypeInfo sev_snp_guest_info = { ++ .parent = TYPE_SEV_COMMON, ++ .name = TYPE_SEV_SNP_GUEST, ++ .instance_size = sizeof(SevSnpGuestState), ++ .class_init = sev_snp_guest_class_init, ++ .instance_init = sev_snp_guest_instance_init, ++}; ++ + static void + sev_register_types(void) + { + type_register_static(&sev_common_info); + type_register_static(&sev_guest_info); ++ type_register_static(&sev_snp_guest_info); + } + + type_init(sev_register_types); +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 668374eef3..bedc667eeb 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -22,6 +22,7 @@ + + #define TYPE_SEV_COMMON "sev-common" + #define TYPE_SEV_GUEST "sev-guest" ++#define TYPE_SEV_SNP_GUEST "sev-snp-guest" + + #define SEV_POLICY_NODBG 0x1 + #define SEV_POLICY_NOKS 0x2 +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch b/SOURCES/kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch new file mode 100644 index 0000000..265da66 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch @@ -0,0 +1,85 @@ +From be37914ae54c8aebc218cf41b37bc0ea1563daae Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 31 May 2024 12:51:44 +0200 +Subject: [PATCH 074/100] i386/sev: Invoke launch_updata_data() for SEV class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [74/91] f1b588a9ffecd6944a78186d88a6be3849698710 (bonzini/rhel-qemu-kvm) + +Add launch_update_data() in SevCommonStateClass and +invoke as sev_launch_update_data() for SEV object. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-26-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9861405a8f845133b7984322c2df0c43a45553c3) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 7b5c4b4874..8834cf9441 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -74,6 +74,7 @@ struct SevCommonStateClass { + /* public */ + int (*launch_start)(SevCommonState *sev_common); + void (*launch_finish)(SevCommonState *sev_common); ++ int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, uint64_t len); + int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); + }; + +@@ -929,7 +930,7 @@ out: + } + + static int +-sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) ++sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, uint8_t *addr, uint64_t len) + { + int ret, fw_error; + struct kvm_sev_launch_update_data update; +@@ -941,7 +942,7 @@ sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) + update.uaddr = (uintptr_t)addr; + update.len = len; + trace_kvm_sev_launch_update_data(addr, len); +- ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, + &update, &fw_error); + if (ret) { + error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", +@@ -1487,6 +1488,7 @@ int + sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + { + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); + + if (!sev_common) { + return 0; +@@ -1494,7 +1496,9 @@ sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + + /* if SEV is in update state then encrypt the data else do nothing */ + if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { +- int ret = sev_launch_update_data(SEV_GUEST(sev_common), ptr, len); ++ int ret; ++ ++ ret = klass->launch_update_data(sev_common, gpa, ptr, len); + if (ret < 0) { + error_setg(errp, "SEV: Failed to encrypt pflash rom"); + return ret; +@@ -1968,6 +1972,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) + + klass->launch_start = sev_launch_start; + klass->launch_finish = sev_launch_finish; ++ klass->launch_update_data = sev_launch_update_data; + klass->kvm_init = sev_kvm_init; + x86_klass->kvm_type = sev_kvm_type; + +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch b/SOURCES/kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch new file mode 100644 index 0000000..f28004d --- /dev/null +++ b/SOURCES/kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch @@ -0,0 +1,55 @@ +From 32899eb4fa5143b795b107de4857adce2cf1d434 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:38 -0500 +Subject: [PATCH 075/100] i386/sev: Invoke launch_updata_data() for SNP class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [75/91] 3520af5847f8dddb6d7fe7ad5feb308230f387b9 (bonzini/rhel-qemu-kvm) + +Invoke as sev_snp_launch_update_data() for SNP object. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-27-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 0765d136eba400ad1cb7cae18438bb10eace64dc) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 8834cf9441..eaf5fc6c6b 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1091,6 +1091,15 @@ snp_launch_update_data(uint64_t gpa, void *hva, + return 0; + } + ++static int ++sev_snp_launch_update_data(SevCommonState *sev_common, hwaddr gpa, ++ uint8_t *ptr, uint64_t len) ++{ ++ int ret = snp_launch_update_data(gpa, ptr, len, ++ KVM_SEV_SNP_PAGE_TYPE_NORMAL); ++ return ret; ++} ++ + static int + sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, + const KvmCpuidInfo *kvm_cpuid_info) +@@ -2216,6 +2225,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + + klass->launch_start = sev_snp_launch_start; + klass->launch_finish = sev_snp_launch_finish; ++ klass->launch_update_data = sev_snp_launch_update_data; + klass->kvm_init = sev_snp_kvm_init; + x86_klass->kvm_type = sev_snp_kvm_type; + +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch b/SOURCES/kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch new file mode 100644 index 0000000..e38615b --- /dev/null +++ b/SOURCES/kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch @@ -0,0 +1,47 @@ +From fa6076291eb45255bc2fe523399d7d0647fc5570 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Fri, 7 Jun 2024 13:36:10 -0500 +Subject: [PATCH 085/100] i386/sev: Move SEV_COMMON null check before + dereferencing + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [85/91] e8d2bfd077766a5e7777b9337d0e77146f883224 (bonzini/rhel-qemu-kvm) + +Fixes Coverity CID 1546886. + +Fixes: 9861405a8f ("i386/sev: Invoke launch_updata_data() for SEV class") +Signed-off-by: Pankaj Gupta +Message-ID: <20240607183611.1111100-3-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 48779faef3c8e2fe70bd8285bffa731bd76dc844) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 7c9df621de..f18432f58e 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1529,11 +1529,12 @@ int + sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + { + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); +- SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); ++ SevCommonStateClass *klass; + + if (!sev_common) { + return 0; + } ++ klass = SEV_COMMON_GET_CLASS(sev_common); + + /* if SEV is in update state then encrypt the data else do nothing */ + if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch b/SOURCES/kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch new file mode 100644 index 0000000..250a723 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch @@ -0,0 +1,88 @@ +From 4d96ca893126d4c17c9fe03c76973b1d4a414f21 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:18 -0500 +Subject: [PATCH 058/100] i386/sev: Move sev_launch_finish to separate class + method + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [58/91] 7865710d320a6df7038ef7016d350aa9cdcea326 (bonzini/rhel-qemu-kvm) + +When sev-snp-guest objects are introduced there will be a number of +differences in how the launch finish is handled compared to the existing +sev-guest object. Move sev_launch_finish() to a class method to make it +easier to implement SNP-specific launch update functionality later. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-7-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit bce615a14aec07cab0488e5a242f6a91e641efcb) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index b2aa0d6f99..28a018ed83 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -71,6 +71,7 @@ struct SevCommonStateClass { + + /* public */ + int (*launch_start)(SevCommonState *sev_common); ++ void (*launch_finish)(SevCommonState *sev_common); + }; + + /** +@@ -801,12 +802,12 @@ static Notifier sev_machine_done_notify = { + }; + + static void +-sev_launch_finish(SevGuestState *sev_guest) ++sev_launch_finish(SevCommonState *sev_common) + { + int ret, error; + + trace_kvm_sev_launch_finish(); +- ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, + &error); + if (ret) { + error_report("%s: LAUNCH_FINISH ret=%d fw_error=%d '%s'", +@@ -814,7 +815,7 @@ sev_launch_finish(SevGuestState *sev_guest) + exit(1); + } + +- sev_set_guest_state(SEV_COMMON(sev_guest), SEV_STATE_RUNNING); ++ sev_set_guest_state(sev_common, SEV_STATE_RUNNING); + + /* add migration blocker */ + error_setg(&sev_mig_blocker, +@@ -826,10 +827,11 @@ static void + sev_vm_state_change(void *opaque, bool running, RunState state) + { + SevCommonState *sev_common = opaque; ++ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(opaque); + + if (running) { + if (!sev_check_state(sev_common, SEV_STATE_RUNNING)) { +- sev_launch_finish(SEV_GUEST(sev_common)); ++ klass->launch_finish(sev_common); + } + } + } +@@ -1457,6 +1459,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + + klass->launch_start = sev_launch_start; ++ klass->launch_finish = sev_launch_finish; + + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch b/SOURCES/kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch new file mode 100644 index 0000000..12824ec --- /dev/null +++ b/SOURCES/kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch @@ -0,0 +1,91 @@ +From a170ba2c7dbf2775eb9047779d3643a2a81bb372 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:17 -0500 +Subject: [PATCH 057/100] i386/sev: Move sev_launch_update to separate class + method + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [57/91] 4f31e7afaec6f2c2a7c06cda4d7d27d4037e53e0 (bonzini/rhel-qemu-kvm) + +When sev-snp-guest objects are introduced there will be a number of +differences in how the launch data is handled compared to the existing +sev-guest object. Move sev_launch_start() to a class method to make it +easier to implement SNP-specific launch update functionality later. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-6-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 6600f1ac0c81cbe67faf048ea07f78542dea925f) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 33e606eea0..b2aa0d6f99 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -69,6 +69,8 @@ struct SevCommonState { + struct SevCommonStateClass { + X86ConfidentialGuestClass parent_class; + ++ /* public */ ++ int (*launch_start)(SevCommonState *sev_common); + }; + + /** +@@ -632,16 +634,16 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) + } + + static int +-sev_launch_start(SevGuestState *sev_guest) ++sev_launch_start(SevCommonState *sev_common) + { + gsize sz; + int ret = 1; + int fw_error, rc; ++ SevGuestState *sev_guest = SEV_GUEST(sev_common); + struct kvm_sev_launch_start start = { + .handle = sev_guest->handle, .policy = sev_guest->policy + }; + guchar *session = NULL, *dh_cert = NULL; +- SevCommonState *sev_common = SEV_COMMON(sev_guest); + + if (sev_guest->session_file) { + if (sev_read_file_base64(sev_guest->session_file, &session, &sz) < 0) { +@@ -862,6 +864,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + uint32_t ebx; + uint32_t host_cbitpos; + struct sev_user_data_status status = {}; ++ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); + + ret = ram_block_discard_disable(true); + if (ret) { +@@ -952,7 +955,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + goto err; + } + +- sev_launch_start(SEV_GUEST(sev_common)); ++ ret = klass->launch_start(sev_common); + if (ret) { + error_setg(errp, "%s: failed to create encryption context", __func__); + goto err; +@@ -1451,6 +1454,10 @@ static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) + static void + sev_guest_class_init(ObjectClass *oc, void *data) + { ++ SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); ++ ++ klass->launch_start = sev_launch_start; ++ + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, + sev_guest_set_dh_cert_file); +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Reorder-struct-declarations.patch b/SOURCES/kvm-i386-sev-Reorder-struct-declarations.patch new file mode 100644 index 0000000..746317d --- /dev/null +++ b/SOURCES/kvm-i386-sev-Reorder-struct-declarations.patch @@ -0,0 +1,134 @@ +From d009fa2cebebd1da80f4f2f5d0c4fffb87e02afc Mon Sep 17 00:00:00 2001 +From: Dov Murik +Date: Thu, 30 May 2024 06:16:34 -0500 +Subject: [PATCH 079/100] i386/sev: Reorder struct declarations + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [79/91] 1274d4620e88dda99ec10173ca5e3cd4184c8fb6 (bonzini/rhel-qemu-kvm) + +Move the declaration of PaddedSevHashTable before SevSnpGuest so +we can add a new such field to the latter. + +No functional change intended. + +Signed-off-by: Dov Murik +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-23-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit cc483bf911931f405dea682c74a3d8b9b6c54369) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 84 +++++++++++++++++++++++------------------------ + 1 file changed, 42 insertions(+), 42 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 73f9406715..3fce4c08eb 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -46,6 +46,48 @@ OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) + OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) + OBJECT_DECLARE_TYPE(SevSnpGuestState, SevCommonStateClass, SEV_SNP_GUEST) + ++/* hard code sha256 digest size */ ++#define HASH_SIZE 32 ++ ++typedef struct QEMU_PACKED SevHashTableEntry { ++ QemuUUID guid; ++ uint16_t len; ++ uint8_t hash[HASH_SIZE]; ++} SevHashTableEntry; ++ ++typedef struct QEMU_PACKED SevHashTable { ++ QemuUUID guid; ++ uint16_t len; ++ SevHashTableEntry cmdline; ++ SevHashTableEntry initrd; ++ SevHashTableEntry kernel; ++} SevHashTable; ++ ++/* ++ * Data encrypted by sev_encrypt_flash() must be padded to a multiple of ++ * 16 bytes. ++ */ ++typedef struct QEMU_PACKED PaddedSevHashTable { ++ SevHashTable ht; ++ uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)]; ++} PaddedSevHashTable; ++ ++QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); ++ ++#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" ++typedef struct __attribute__((__packed__)) SevInfoBlock { ++ /* SEV-ES Reset Vector Address */ ++ uint32_t reset_addr; ++} SevInfoBlock; ++ ++#define SEV_HASH_TABLE_RV_GUID "7255371f-3a3b-4b04-927b-1da6efa8d454" ++typedef struct QEMU_PACKED SevHashTableDescriptor { ++ /* SEV hash table area guest address */ ++ uint32_t base; ++ /* SEV hash table area size (in bytes) */ ++ uint32_t size; ++} SevHashTableDescriptor; ++ + struct SevCommonState { + X86ConfidentialGuest parent_obj; + +@@ -128,48 +170,6 @@ typedef struct SevLaunchUpdateData { + + static QTAILQ_HEAD(, SevLaunchUpdateData) launch_update; + +-#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" +-typedef struct __attribute__((__packed__)) SevInfoBlock { +- /* SEV-ES Reset Vector Address */ +- uint32_t reset_addr; +-} SevInfoBlock; +- +-#define SEV_HASH_TABLE_RV_GUID "7255371f-3a3b-4b04-927b-1da6efa8d454" +-typedef struct QEMU_PACKED SevHashTableDescriptor { +- /* SEV hash table area guest address */ +- uint32_t base; +- /* SEV hash table area size (in bytes) */ +- uint32_t size; +-} SevHashTableDescriptor; +- +-/* hard code sha256 digest size */ +-#define HASH_SIZE 32 +- +-typedef struct QEMU_PACKED SevHashTableEntry { +- QemuUUID guid; +- uint16_t len; +- uint8_t hash[HASH_SIZE]; +-} SevHashTableEntry; +- +-typedef struct QEMU_PACKED SevHashTable { +- QemuUUID guid; +- uint16_t len; +- SevHashTableEntry cmdline; +- SevHashTableEntry initrd; +- SevHashTableEntry kernel; +-} SevHashTable; +- +-/* +- * Data encrypted by sev_encrypt_flash() must be padded to a multiple of +- * 16 bytes. +- */ +-typedef struct QEMU_PACKED PaddedSevHashTable { +- SevHashTable ht; +- uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)]; +-} PaddedSevHashTable; +- +-QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); +- + static Error *sev_mig_blocker; + + static const char *const sev_fw_errlist[] = { +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Replace-error_report-with-error_setg.patch b/SOURCES/kvm-i386-sev-Replace-error_report-with-error_setg.patch new file mode 100644 index 0000000..ba66cde --- /dev/null +++ b/SOURCES/kvm-i386-sev-Replace-error_report-with-error_setg.patch @@ -0,0 +1,46 @@ +From 80c1d78e31b2567d1c610c8939b75d159ff6ea27 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:13 -0500 +Subject: [PATCH 055/100] i386/sev: Replace error_report with error_setg + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [55/91] 1e15fc2458687e564af9fa5022c29e79ddc8edfd (bonzini/rhel-qemu-kvm) + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-2-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 18c453409a3a84cf7b2c764c5a03fb429a73bbeb) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index d30b68c11e..67ed32e5ea 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -952,13 +952,13 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + + if (sev_es_enabled()) { + if (!kvm_kernel_irqchip_allowed()) { +- error_report("%s: SEV-ES guests require in-kernel irqchip support", +- __func__); ++ error_setg(errp, "%s: SEV-ES guests require in-kernel irqchip" ++ "support", __func__); + goto err; + } + + if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) { +- error_report("%s: guest policy requires SEV-ES, but " ++ error_setg(errp, "%s: guest policy requires SEV-ES, but " + "host SEV-ES support unavailable", + __func__); + goto err; +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Return-when-sev_common-is-null.patch b/SOURCES/kvm-i386-sev-Return-when-sev_common-is-null.patch new file mode 100644 index 0000000..6fc68aa --- /dev/null +++ b/SOURCES/kvm-i386-sev-Return-when-sev_common-is-null.patch @@ -0,0 +1,40 @@ +From 88da6d01b1de2b92adb5c47c6d482876a054705f Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Fri, 7 Jun 2024 13:36:11 -0500 +Subject: [PATCH 086/100] i386/sev: Return when sev_common is null + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [86/91] 02ce4a6a51ce9fd961f417c13db0a760673591ba (bonzini/rhel-qemu-kvm) + +Fixes Coverity CID 1546885. + +Fixes: 16dcf200dc ("i386/sev: Introduce "sev-common" type to encapsulate common SEV state") +Signed-off-by: Pankaj Gupta +Message-ID: <20240607183611.1111100-4-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit cd7093a7a168a823d07671348996f049d45e8f67) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index f18432f58e..c40562dce3 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -587,6 +587,7 @@ static SevCapability *sev_get_capabilities(Error **errp) + sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + if (!sev_common) { + error_setg(errp, "SEV is not configured"); ++ return NULL; + } + + sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch b/SOURCES/kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch new file mode 100644 index 0000000..8548e22 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch @@ -0,0 +1,47 @@ +From c7649ac1b958dc48de50f32b1ad80d84b17945a8 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:29 -0500 +Subject: [PATCH 069/100] i386/sev: Set CPU state to protected once SNP guest + payload is finalized + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [69/91] 09280f987a186511ec7d62c3f340b2148e8556d7 (bonzini/rhel-qemu-kvm) + +Once KVM_SNP_LAUNCH_FINISH is called the vCPU state is copied into the +vCPU's VMSA page and measured/encrypted. Any attempt to read/write CPU +state afterward will only be acting on the initial data and so are +effectively no-ops. + +Set the vCPU state to protected at this point so that QEMU don't +continue trying to re-sync vCPU data during guest runtime. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-18-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3d44fdff60ea66fbd7a33f5d32b50843cd80f48a) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index ef2e592ca7..e84e4395a5 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -997,6 +997,7 @@ sev_snp_launch_finish(SevCommonState *sev_common) + exit(1); + } + ++ kvm_mark_guest_state_protected(); + sev_set_guest_state(sev_common, SEV_STATE_RUNNING); + + /* add migration blocker */ +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch b/SOURCES/kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch new file mode 100644 index 0000000..05ccb0a --- /dev/null +++ b/SOURCES/kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch @@ -0,0 +1,268 @@ +From 5540bb5ca052531563df1ade68995e268ae65224 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:00:36 -0500 +Subject: [PATCH 012/100] i386/sev: Switch to use confidential_guest_kvm_init() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [12/91] 6f5f8d1b818826f7ee4b6ae527963ef23c97f531 (bonzini/rhel-qemu-kvm) + +Use confidential_guest_kvm_init() instead of calling SEV +specific sev_kvm_init(). This allows the introduction of multiple +confidential-guest-support subclasses for different x86 vendors. + +As a bonus, stubs are not needed anymore since there is no +direct call from target/i386/kvm/kvm.c to SEV code. + +Signed-off-by: Xiaoyao Li +Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 637c95b37b106c2eeba313e0abb38ec12e918a59) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 10 +-- + target/i386/kvm/meson.build | 2 - + target/i386/kvm/sev-stub.c | 21 ------ + target/i386/sev.c | 127 ++++++++++++++++++------------------ + target/i386/sev.h | 2 - + 5 files changed, 69 insertions(+), 93 deletions(-) + delete mode 100644 target/i386/kvm/sev-stub.c + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 5f30b649a0..e271652620 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -2543,10 +2543,12 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + * mechanisms are supported in future (e.g. TDX), they'll need + * their own initialization either here or elsewhere. + */ +- ret = sev_kvm_init(ms->cgs, &local_err); +- if (ret < 0) { +- error_report_err(local_err); +- return ret; ++ if (ms->cgs) { ++ ret = confidential_guest_kvm_init(ms->cgs, &local_err); ++ if (ret < 0) { ++ error_report_err(local_err); ++ return ret; ++ } + } + + has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS); +diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build +index 84d9143e60..e7850981e6 100644 +--- a/target/i386/kvm/meson.build ++++ b/target/i386/kvm/meson.build +@@ -7,8 +7,6 @@ i386_kvm_ss.add(files( + + i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c')) + +-i386_kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c')) +- + i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c')) + + i386_system_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss) +diff --git a/target/i386/kvm/sev-stub.c b/target/i386/kvm/sev-stub.c +deleted file mode 100644 +index 1be5341e8a..0000000000 +--- a/target/i386/kvm/sev-stub.c ++++ /dev/null +@@ -1,21 +0,0 @@ +-/* +- * QEMU SEV stub +- * +- * Copyright Advanced Micro Devices 2018 +- * +- * Authors: +- * Brijesh Singh +- * +- * This work is licensed under the terms of the GNU GPL, version 2 or later. +- * See the COPYING file in the top-level directory. +- * +- */ +- +-#include "qemu/osdep.h" +-#include "sev.h" +- +-int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +-{ +- /* If we get here, cgs must be some non-SEV thing */ +- return 0; +-} +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 72930ff0dc..b8f79d34d1 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -353,63 +353,6 @@ static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) + sev->kernel_hashes = value; + } + +-static void +-sev_guest_class_init(ObjectClass *oc, void *data) +-{ +- object_class_property_add_str(oc, "sev-device", +- sev_guest_get_sev_device, +- sev_guest_set_sev_device); +- object_class_property_set_description(oc, "sev-device", +- "SEV device to use"); +- object_class_property_add_str(oc, "dh-cert-file", +- sev_guest_get_dh_cert_file, +- sev_guest_set_dh_cert_file); +- object_class_property_set_description(oc, "dh-cert-file", +- "guest owners DH certificate (encoded with base64)"); +- object_class_property_add_str(oc, "session-file", +- sev_guest_get_session_file, +- sev_guest_set_session_file); +- object_class_property_set_description(oc, "session-file", +- "guest owners session parameters (encoded with base64)"); +- object_class_property_add_bool(oc, "kernel-hashes", +- sev_guest_get_kernel_hashes, +- sev_guest_set_kernel_hashes); +- object_class_property_set_description(oc, "kernel-hashes", +- "add kernel hashes to guest firmware for measured Linux boot"); +-} +- +-static void +-sev_guest_instance_init(Object *obj) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); +- sev->policy = DEFAULT_GUEST_POLICY; +- object_property_add_uint32_ptr(obj, "policy", &sev->policy, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "handle", &sev->handle, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "reduced-phys-bits", +- &sev->reduced_phys_bits, +- OBJ_PROP_FLAG_READWRITE); +-} +- +-/* sev guest info */ +-static const TypeInfo sev_guest_info = { +- .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, +- .name = TYPE_SEV_GUEST, +- .instance_size = sizeof(SevGuestState), +- .instance_finalize = sev_guest_finalize, +- .class_init = sev_guest_class_init, +- .instance_init = sev_guest_instance_init, +- .interfaces = (InterfaceInfo[]) { +- { TYPE_USER_CREATABLE }, +- { } +- } +-}; +- + bool + sev_enabled(void) + { +@@ -906,20 +849,15 @@ sev_vm_state_change(void *opaque, bool running, RunState state) + } + } + +-int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { +- SevGuestState *sev +- = (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST); ++ SevGuestState *sev = SEV_GUEST(cgs); + char *devname; + int ret, fw_error, cmd; + uint32_t ebx; + uint32_t host_cbitpos; + struct sev_user_data_status status = {}; + +- if (!sev) { +- return 0; +- } +- + ret = ram_block_discard_disable(true); + if (ret) { + error_report("%s: cannot disable RAM discard", __func__); +@@ -1384,6 +1322,67 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + return ret; + } + ++static void ++sev_guest_class_init(ObjectClass *oc, void *data) ++{ ++ ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); ++ ++ klass->kvm_init = sev_kvm_init; ++ ++ object_class_property_add_str(oc, "sev-device", ++ sev_guest_get_sev_device, ++ sev_guest_set_sev_device); ++ object_class_property_set_description(oc, "sev-device", ++ "SEV device to use"); ++ object_class_property_add_str(oc, "dh-cert-file", ++ sev_guest_get_dh_cert_file, ++ sev_guest_set_dh_cert_file); ++ object_class_property_set_description(oc, "dh-cert-file", ++ "guest owners DH certificate (encoded with base64)"); ++ object_class_property_add_str(oc, "session-file", ++ sev_guest_get_session_file, ++ sev_guest_set_session_file); ++ object_class_property_set_description(oc, "session-file", ++ "guest owners session parameters (encoded with base64)"); ++ object_class_property_add_bool(oc, "kernel-hashes", ++ sev_guest_get_kernel_hashes, ++ sev_guest_set_kernel_hashes); ++ object_class_property_set_description(oc, "kernel-hashes", ++ "add kernel hashes to guest firmware for measured Linux boot"); ++} ++ ++static void ++sev_guest_instance_init(Object *obj) ++{ ++ SevGuestState *sev = SEV_GUEST(obj); ++ ++ sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); ++ sev->policy = DEFAULT_GUEST_POLICY; ++ object_property_add_uint32_ptr(obj, "policy", &sev->policy, ++ OBJ_PROP_FLAG_READWRITE); ++ object_property_add_uint32_ptr(obj, "handle", &sev->handle, ++ OBJ_PROP_FLAG_READWRITE); ++ object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, ++ OBJ_PROP_FLAG_READWRITE); ++ object_property_add_uint32_ptr(obj, "reduced-phys-bits", ++ &sev->reduced_phys_bits, ++ OBJ_PROP_FLAG_READWRITE); ++} ++ ++/* sev guest info */ ++static const TypeInfo sev_guest_info = { ++ .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, ++ .name = TYPE_SEV_GUEST, ++ .instance_size = sizeof(SevGuestState), ++ .instance_finalize = sev_guest_finalize, ++ .class_init = sev_guest_class_init, ++ .instance_init = sev_guest_instance_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_USER_CREATABLE }, ++ { } ++ } ++}; ++ + static void + sev_register_types(void) + { +diff --git a/target/i386/sev.h b/target/i386/sev.h +index e7499c95b1..9e10d09539 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -57,6 +57,4 @@ int sev_inject_launch_secret(const char *hdr, const char *secret, + int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size); + void sev_es_set_reset_vector(CPUState *cpu); + +-int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +- + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch b/SOURCES/kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch new file mode 100644 index 0000000..27852d5 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch @@ -0,0 +1,240 @@ +From a870e7c31d9605baea4741d82521612b6164c99b Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:26 -0500 +Subject: [PATCH 066/100] i386/sev: Update query-sev QAPI format to handle + SEV-SNP + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [66/91] a19b3e226e857f3995176e7d2ef1ce2e4329a885 (bonzini/rhel-qemu-kvm) + +Most of the current 'query-sev' command is relevant to both legacy +SEV/SEV-ES guests and SEV-SNP guests, with 2 exceptions: + + - 'policy' is a 64-bit field for SEV-SNP, not 32-bit, and + the meaning of the bit positions has changed + - 'handle' is not relevant to SEV-SNP + +To address this, this patch adds a new 'sev-type' field that can be +used as a discriminator to select between SEV and SEV-SNP-specific +fields/formats without breaking compatibility for existing management +tools (so long as management tools that add support for launching +SEV-SNP guest update their handling of query-sev appropriately). + +The corresponding HMP command has also been fixed up similarly. + +Signed-off-by: Michael Roth +Co-developed-by:Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-15-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 59d3740cb4ac0f010ce35877572904f6297284b4) +Signed-off-by: Paolo Bonzini +--- + qapi/misc-target.json | 72 ++++++++++++++++++++++++++++++++++--------- + target/i386/sev.c | 55 +++++++++++++++++++++------------ + target/i386/sev.h | 3 ++ + 3 files changed, 96 insertions(+), 34 deletions(-) + +diff --git a/qapi/misc-target.json b/qapi/misc-target.json +index 4e0a6492a9..2d7d4d89bd 100644 +--- a/qapi/misc-target.json ++++ b/qapi/misc-target.json +@@ -47,6 +47,50 @@ + 'send-update', 'receive-update' ], + 'if': 'TARGET_I386' } + ++## ++# @SevGuestType: ++# ++# An enumeration indicating the type of SEV guest being run. ++# ++# @sev: The guest is a legacy SEV or SEV-ES guest. ++# ++# @sev-snp: The guest is an SEV-SNP guest. ++# ++# Since: 6.2 ++## ++{ 'enum': 'SevGuestType', ++ 'data': [ 'sev', 'sev-snp' ], ++ 'if': 'TARGET_I386' } ++ ++## ++# @SevGuestInfo: ++# ++# Information specific to legacy SEV/SEV-ES guests. ++# ++# @policy: SEV policy value ++# ++# @handle: SEV firmware handle ++# ++# Since: 2.12 ++## ++{ 'struct': 'SevGuestInfo', ++ 'data': { 'policy': 'uint32', ++ 'handle': 'uint32' }, ++ 'if': 'TARGET_I386' } ++ ++## ++# @SevSnpGuestInfo: ++# ++# Information specific to SEV-SNP guests. ++# ++# @snp-policy: SEV-SNP policy value ++# ++# Since: 9.1 ++## ++{ 'struct': 'SevSnpGuestInfo', ++ 'data': { 'snp-policy': 'uint64' }, ++ 'if': 'TARGET_I386' } ++ + ## + # @SevInfo: + # +@@ -60,25 +104,25 @@ + # + # @build-id: SEV FW build id + # +-# @policy: SEV policy value +-# + # @state: SEV guest state + # +-# @handle: SEV firmware handle ++# @sev-type: Type of SEV guest being run + # + # Since: 2.12 + ## +-{ 'struct': 'SevInfo', +- 'data': { 'enabled': 'bool', +- 'api-major': 'uint8', +- 'api-minor' : 'uint8', +- 'build-id' : 'uint8', +- 'policy' : 'uint32', +- 'state' : 'SevState', +- 'handle' : 'uint32' +- }, +- 'if': 'TARGET_I386' +-} ++{ 'union': 'SevInfo', ++ 'base': { 'enabled': 'bool', ++ 'api-major': 'uint8', ++ 'api-minor' : 'uint8', ++ 'build-id' : 'uint8', ++ 'state' : 'SevState', ++ 'sev-type' : 'SevGuestType' }, ++ 'discriminator': 'sev-type', ++ 'data': { ++ 'sev': 'SevGuestInfo', ++ 'sev-snp': 'SevSnpGuestInfo' }, ++ 'if': 'TARGET_I386' } ++ + + ## + # @query-sev: +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 072cc4f853..43d1c48bd9 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -363,25 +363,27 @@ static SevInfo *sev_get_info(void) + { + SevInfo *info; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); +- SevGuestState *sev_guest = +- (SevGuestState *)object_dynamic_cast(OBJECT(sev_common), +- TYPE_SEV_GUEST); + + info = g_new0(SevInfo, 1); + info->enabled = sev_enabled(); + + if (info->enabled) { +- if (sev_guest) { +- info->handle = sev_guest->handle; +- } + info->api_major = sev_common->api_major; + info->api_minor = sev_common->api_minor; + info->build_id = sev_common->build_id; + info->state = sev_common->state; +- /* we only report the lower 32-bits of policy for SNP, ok for now... */ +- info->policy = +- (uint32_t)object_property_get_uint(OBJECT(sev_common), +- "policy", NULL); ++ ++ if (sev_snp_enabled()) { ++ info->sev_type = SEV_GUEST_TYPE_SEV_SNP; ++ info->u.sev_snp.snp_policy = ++ object_property_get_uint(OBJECT(sev_common), "policy", NULL); ++ } else { ++ info->sev_type = SEV_GUEST_TYPE_SEV; ++ info->u.sev.handle = SEV_GUEST(sev_common)->handle; ++ info->u.sev.policy = ++ (uint32_t)object_property_get_uint(OBJECT(sev_common), ++ "policy", NULL); ++ } + } + + return info; +@@ -404,20 +406,33 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict) + { + SevInfo *info = sev_get_info(); + +- if (info && info->enabled) { +- monitor_printf(mon, "handle: %d\n", info->handle); +- monitor_printf(mon, "state: %s\n", SevState_str(info->state)); +- monitor_printf(mon, "build: %d\n", info->build_id); +- monitor_printf(mon, "api version: %d.%d\n", +- info->api_major, info->api_minor); ++ if (!info || !info->enabled) { ++ monitor_printf(mon, "SEV is not enabled\n"); ++ goto out; ++ } ++ ++ monitor_printf(mon, "SEV type: %s\n", SevGuestType_str(info->sev_type)); ++ monitor_printf(mon, "state: %s\n", SevState_str(info->state)); ++ monitor_printf(mon, "build: %d\n", info->build_id); ++ monitor_printf(mon, "api version: %d.%d\n", info->api_major, ++ info->api_minor); ++ ++ if (sev_snp_enabled()) { + monitor_printf(mon, "debug: %s\n", +- info->policy & SEV_POLICY_NODBG ? "off" : "on"); +- monitor_printf(mon, "key-sharing: %s\n", +- info->policy & SEV_POLICY_NOKS ? "off" : "on"); ++ info->u.sev_snp.snp_policy & SEV_SNP_POLICY_DBG ? "on" ++ : "off"); ++ monitor_printf(mon, "SMT allowed: %s\n", ++ info->u.sev_snp.snp_policy & SEV_SNP_POLICY_SMT ? "on" ++ : "off"); + } else { +- monitor_printf(mon, "SEV is not enabled\n"); ++ monitor_printf(mon, "handle: %d\n", info->u.sev.handle); ++ monitor_printf(mon, "debug: %s\n", ++ info->u.sev.policy & SEV_POLICY_NODBG ? "off" : "on"); ++ monitor_printf(mon, "key-sharing: %s\n", ++ info->u.sev.policy & SEV_POLICY_NOKS ? "off" : "on"); + } + ++out: + qapi_free_SevInfo(info); + } + +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 94295ee74f..5dc4767b1e 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -31,6 +31,9 @@ + #define SEV_POLICY_DOMAIN 0x10 + #define SEV_POLICY_SEV 0x20 + ++#define SEV_SNP_POLICY_SMT 0x10000 ++#define SEV_SNP_POLICY_DBG 0x80000 ++ + typedef struct SevKernelLoaderContext { + char *setup_data; + size_t setup_size; +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-fix-unreachable-code-coverity-issue.patch b/SOURCES/kvm-i386-sev-fix-unreachable-code-coverity-issue.patch new file mode 100644 index 0000000..56f9f6f --- /dev/null +++ b/SOURCES/kvm-i386-sev-fix-unreachable-code-coverity-issue.patch @@ -0,0 +1,51 @@ +From 98057e3adafa052b21a4fe5ef22835d30df3e644 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Fri, 7 Jun 2024 13:36:09 -0500 +Subject: [PATCH 084/100] i386/sev: fix unreachable code coverity issue + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [84/91] dc7bf28f491bf675b22a98ea593fba72d8bc415a (bonzini/rhel-qemu-kvm) + +Set 'finish->id_block_en' early, so that it is properly reset. + +Fixes coverity CID 1546887. + +Fixes: 7b34df4426 ("i386/sev: Introduce 'sev-snp-guest' object") +Signed-off-by: Pankaj Gupta +Message-ID: <20240607183611.1111100-2-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c94eb5db8e409c932da9eb187e68d4cdc14acc5b) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 004c667ac1..7c9df621de 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -2165,6 +2165,7 @@ sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) + struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; + gsize len; + ++ finish->id_block_en = 0; + g_free(sev_snp_guest->id_block); + g_free((guchar *)finish->id_block_uaddr); + +@@ -2184,7 +2185,7 @@ sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) + return; + } + +- finish->id_block_en = (len) ? 1 : 0; ++ finish->id_block_en = 1; + } + + static char * +-- +2.39.3 + diff --git a/SOURCES/kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch b/SOURCES/kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch deleted file mode 100644 index 0cf782e..0000000 --- a/SOURCES/kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 51b8f29cddb73eb02f91af5f52a205fdd3af6583 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Wed, 17 Jan 2024 21:08:59 +0100 -Subject: [PATCH 099/101] include/ui/rect.h: fix qemu_rect_init() - mis-assignment -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 216: Fix regression in QEMU's virtio-gpu VNC sessions -RH-Jira: RHEL-21570 -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Cédric Le Goater -RH-Commit: [1/1] a9d487be04e2c1847b80c479b5cc790af81e3428 (thuth/qemu-kvm-cs9) - -JIRA: https://issues.redhat.com/browse/RHEL-21570 - -commit 9d5b42beb6978dc6219d5dc029c9d453c6b8d503 -Author: Elen Avan -Date: Fri Dec 22 22:17:21 2023 +0300 - - include/ui/rect.h: fix qemu_rect_init() mis-assignment - - Signed-off-by: Elen Avan - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2051 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2050 - Fixes: a200d53b1fde "virtio-gpu: replace PIXMAN for region/rect test" - Cc: qemu-stable@nongnu.org - Reviewed-by: Michael Tokarev - Reviewed-by: Marc-André Lureau - Signed-off-by: Michael Tokarev - -Signed-off-by: Thomas Huth ---- - include/ui/rect.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/ui/rect.h b/include/ui/rect.h -index 94898f92d0..68f05d78a8 100644 ---- a/include/ui/rect.h -+++ b/include/ui/rect.h -@@ -19,7 +19,7 @@ static inline void qemu_rect_init(QemuRect *rect, - uint16_t width, uint16_t height) - { - rect->x = x; -- rect->y = x; -+ rect->y = y; - rect->width = width; - rect->height = height; - } --- -2.39.3 - diff --git a/SOURCES/kvm-introduce-pc_rhel_9_5_compat.patch b/SOURCES/kvm-introduce-pc_rhel_9_5_compat.patch new file mode 100644 index 0000000..9a17dda --- /dev/null +++ b/SOURCES/kvm-introduce-pc_rhel_9_5_compat.patch @@ -0,0 +1,81 @@ +From deae6c3b57c3919946a5ce1613e667a3240cf158 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 15 Apr 2024 12:45:09 +0200 +Subject: [PATCH 001/100] introduce pc_rhel_9_5_compat + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [1/91] cfd402fa5080eddba7c954e81ed79f9a1dd654cf (bonzini/rhel-qemu-kvm) + +Allow undoing backported changes that impact guest ABI. + +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 4 ++++ + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 2 ++ + include/hw/i386/pc.h | 3 +++ + 4 files changed, 11 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 4a154c1a9a..648762d908 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -348,6 +348,10 @@ GlobalProperty pc_rhel_compat[] = { + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_9_5_compat[] = { ++}; ++const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat); ++ + GlobalProperty pc_rhel_9_3_compat[] = { + /* pc_rhel_9_3_compat from pc_compat_8_0 */ + { "virtio-mem", "unplugged-inaccessible", "auto" }, +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 6b260682eb..bef3e8b73e 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1015,6 +1015,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + object_class_property_set_description(oc, "x-south-bridge", + "Use a different south bridge than PIIX3"); + ++ compat_props_add(m->compat_props, pc_rhel_9_5_compat, ++ pc_rhel_9_5_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_5, + hw_compat_rhel_9_5_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_4, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 2b54944c0f..9adcdadce8 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -734,6 +734,8 @@ static void pc_q35_machine_rhel940_options(MachineClass *m) + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.4.0"; + ++ compat_props_add(m->compat_props, pc_rhel_9_5_compat, ++ pc_rhel_9_5_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_5, + hw_compat_rhel_9_5_len); + } +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index a984c951ad..87420783ab 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -294,6 +294,9 @@ extern const size_t pc_compat_2_0_len; + extern GlobalProperty pc_rhel_compat[]; + extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_9_5_compat[]; ++extern const size_t pc_rhel_9_5_compat_len; ++ + extern GlobalProperty pc_rhel_9_3_compat[]; + extern const size_t pc_rhel_9_3_compat_len; + +-- +2.39.3 + diff --git a/SOURCES/kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch b/SOURCES/kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch index 0e2fa11..137cb53 100644 --- a/SOURCES/kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch +++ b/SOURCES/kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch @@ -1,35 +1,27 @@ -From c2eafeb32a256cbafb0e65c0380acb478181326e Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Wed, 5 Jun 2024 19:56:51 -0400 +From 16c2e9e339a4c83055fd39e032fa16a0e732ed17 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 25 Apr 2024 14:49:40 +0200 Subject: [PATCH 2/4] iotests/244: Don't store data-file with protocol in image -RH-Author: Jon Maloy -RH-MergeRequest: 2: EMBARGOED CVE-2024-4467 for rhel-9.4.z (PRDSC) -RH-Jira: https://issues.redhat.com/browse/RHEL-35610 +RH-Author: Hana Czenczek +RH-MergeRequest: 1: CVE 2024-4467 (PRDSC) +RH-Jira: RHEL-35611 RH-CVE: CVE-2024-4467 RH-Acked-by: Kevin Wolf RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [2/4] ddef095945aa55bb0aacc2a2cb58f9e12ad20d5e - -commit 92e00dab8be1570b13172353d77d2af44cb4e22b -Author: Kevin Wolf -Date: Thu Apr 25 14:49:40 2024 +0200 - - iotests/244: Don't store data-file with protocol in image - - We want to disable filename parsing for data files because it's too easy - to abuse in malicious image files. Make the test ready for the change by - passing the data file explicitly in command line options. - - Signed-off-by: Kevin Wolf - Reviewed-by: Eric Blake - Reviewed-by: Stefan Hajnoczi - Reviewed-by: Hanna Czenczek - Upstream: N/A, embargoed - Signed-off-by: Hanna Czenczek - -Signed-off-by: Jon Maloy +RH-Acked-by: Eric Blake +RH-Commit: [2/4] 92e00dab8be1570b13172353d77d2af44cb4e22b + +We want to disable filename parsing for data files because it's too easy +to abuse in malicious image files. Make the test ready for the change by +passing the data file explicitly in command line options. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Hanna Czenczek +Upstream: N/A, embargoed +Signed-off-by: Hanna Czenczek --- tests/qemu-iotests/244 | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/SOURCES/kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch b/SOURCES/kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch index caeac22..2c1d6ae 100644 --- a/SOURCES/kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch +++ b/SOURCES/kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch @@ -1,36 +1,28 @@ -From 931ab59f39b5e3551b328fe5b0f872df7a19ba05 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Wed, 5 Jun 2024 19:56:51 -0400 +From d70daa2eb5b670513ccd36c0baa5b36ec8cef666 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 25 Apr 2024 14:49:40 +0200 Subject: [PATCH 3/4] iotests/270: Don't store data-file with json: prefix in image -RH-Author: Jon Maloy -RH-MergeRequest: 2: EMBARGOED CVE-2024-4467 for rhel-9.4.z (PRDSC) -RH-Jira: https://issues.redhat.com/browse/RHEL-35610 +RH-Author: Hana Czenczek +RH-MergeRequest: 1: CVE 2024-4467 (PRDSC) +RH-Jira: RHEL-35611 RH-CVE: CVE-2024-4467 RH-Acked-by: Kevin Wolf RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [3/4] 7a9844fd48e3f3c4d1711ea4fb671c795ca4a1c1 +RH-Acked-by: Eric Blake +RH-Commit: [3/4] 705bcc2819ce8e0f8b9d660a93bc48de26413aec -commit 705bcc2819ce8e0f8b9d660a93bc48de26413aec -Author: Kevin Wolf -Date: Thu Apr 25 14:49:40 2024 +0200 +We want to disable filename parsing for data files because it's too easy +to abuse in malicious image files. Make the test ready for the change by +passing the data file explicitly in command line options. - iotests/270: Don't store data-file with json: prefix in image - - We want to disable filename parsing for data files because it's too easy - to abuse in malicious image files. Make the test ready for the change by - passing the data file explicitly in command line options. - - Signed-off-by: Kevin Wolf - Reviewed-by: Eric Blake - Reviewed-by: Stefan Hajnoczi - Reviewed-by: Hanna Czenczek - Upstream: N/A, embargoed - Signed-off-by: Hanna Czenczek - -Signed-off-by: Jon Maloy +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Hanna Czenczek +Upstream: N/A, embargoed +Signed-off-by: Hanna Czenczek --- tests/qemu-iotests/270 | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/SOURCES/kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch b/SOURCES/kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch deleted file mode 100644 index 3b2841f..0000000 --- a/SOURCES/kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch +++ /dev/null @@ -1,133 +0,0 @@ -From 65d58819ff7b012e43b5f1da1356b559d3f5a962 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 14 Mar 2024 17:58:25 +0100 -Subject: [PATCH 3/4] iotests: Add test for reset/AioContext switches with NBD - exports - -RH-Author: Kevin Wolf -RH-MergeRequest: 231: Fix deadlock and crash during storage migration -RH-Jira: RHEL-28125 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [3/3] 7266acdf85271d157497bfe452aa6eeb58fbe7c6 (kmwolf/centos-qemu-kvm) - -This replicates the scenario in which the bug was reported. -Unfortunately this relies on actually executing a guest (so that the -firmware initialises the virtio-blk device and moves it to its -configured iothread), so this can't make use of the qtest accelerator -like most other test cases. I tried to find a different easy way to -trigger the bug, but couldn't find one. - -Signed-off-by: Kevin Wolf -Message-ID: <20240314165825.40261-3-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit e8fce34eccf68a32f4ecf2c6f121ff2ac383d6bf) -Signed-off-by: Kevin Wolf ---- - tests/qemu-iotests/tests/iothreads-nbd-export | 66 +++++++++++++++++++ - .../tests/iothreads-nbd-export.out | 19 ++++++ - 2 files changed, 85 insertions(+) - create mode 100755 tests/qemu-iotests/tests/iothreads-nbd-export - create mode 100644 tests/qemu-iotests/tests/iothreads-nbd-export.out - -diff --git a/tests/qemu-iotests/tests/iothreads-nbd-export b/tests/qemu-iotests/tests/iothreads-nbd-export -new file mode 100755 -index 0000000000..037260729c ---- /dev/null -+++ b/tests/qemu-iotests/tests/iothreads-nbd-export -@@ -0,0 +1,66 @@ -+#!/usr/bin/env python3 -+# group: rw quick -+# -+# Copyright (C) 2024 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+# Creator/Owner: Kevin Wolf -+ -+import time -+import qemu -+import iotests -+ -+iotests.script_initialize(supported_fmts=['qcow2'], -+ supported_platforms=['linux']) -+ -+with iotests.FilePath('disk1.img') as path, \ -+ iotests.FilePath('nbd.sock', base_dir=iotests.sock_dir) as nbd_sock, \ -+ qemu.machine.QEMUMachine(iotests.qemu_prog) as vm: -+ -+ img_size = '10M' -+ -+ iotests.log('Preparing disk...') -+ iotests.qemu_img_create('-f', iotests.imgfmt, path, img_size) -+ vm.add_args('-blockdev', f'file,node-name=disk-file,filename={path}') -+ vm.add_args('-blockdev', 'qcow2,node-name=disk,file=disk-file') -+ vm.add_args('-object', 'iothread,id=iothread0') -+ vm.add_args('-device', -+ 'virtio-blk,drive=disk,iothread=iothread0,share-rw=on') -+ -+ iotests.log('Launching VM...') -+ vm.add_args('-accel', 'kvm', '-accel', 'tcg') -+ #vm.add_args('-accel', 'qtest') -+ vm.launch() -+ -+ iotests.log('Exporting to NBD...') -+ iotests.log(vm.qmp('nbd-server-start', -+ addr={'type': 'unix', 'data': {'path': nbd_sock}})) -+ iotests.log(vm.qmp('block-export-add', type='nbd', id='exp0', -+ node_name='disk', writable=True)) -+ -+ iotests.log('Connecting qemu-img...') -+ qemu_io = iotests.QemuIoInteractive('-f', 'raw', -+ f'nbd+unix:///disk?socket={nbd_sock}') -+ -+ iotests.log('Moving the NBD export to a different iothread...') -+ for i in range(0, 10): -+ iotests.log(vm.qmp('system_reset')) -+ time.sleep(0.1) -+ -+ iotests.log('Checking that it is still alive...') -+ iotests.log(vm.qmp('query-status')) -+ -+ qemu_io.close() -+ vm.shutdown() -diff --git a/tests/qemu-iotests/tests/iothreads-nbd-export.out b/tests/qemu-iotests/tests/iothreads-nbd-export.out -new file mode 100644 -index 0000000000..bc514e35e5 ---- /dev/null -+++ b/tests/qemu-iotests/tests/iothreads-nbd-export.out -@@ -0,0 +1,19 @@ -+Preparing disk... -+Launching VM... -+Exporting to NBD... -+{"return": {}} -+{"return": {}} -+Connecting qemu-img... -+Moving the NBD export to a different iothread... -+{"return": {}} -+{"return": {}} -+{"return": {}} -+{"return": {}} -+{"return": {}} -+{"return": {}} -+{"return": {}} -+{"return": {}} -+{"return": {}} -+{"return": {}} -+Checking that it is still alive... -+{"return": {"running": true, "status": "running"}} --- -2.39.3 - diff --git a/SOURCES/kvm-iotests-add-filter_qmp_generated_node_ids.patch b/SOURCES/kvm-iotests-add-filter_qmp_generated_node_ids.patch deleted file mode 100644 index d9072f5..0000000 --- a/SOURCES/kvm-iotests-add-filter_qmp_generated_node_ids.patch +++ /dev/null @@ -1,49 +0,0 @@ -From a9be663beaace1c31d75ca353e5d3bb0657a4f6c Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 18 Jan 2024 09:48:21 -0500 -Subject: [PATCH 11/22] iotests: add filter_qmp_generated_node_ids() - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [7/17] 8dd20acc5b1e992294ed422e80897a9c221940dd (stefanha/centos-stream-qemu-kvm) - -Add a filter function for QMP responses that contain QEMU's -automatically generated node ids. The ids change between runs and must -be masked in the reference output. - -The next commit will use this new function. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240118144823.1497953-2-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit da62b507a20510d819bcfbe8f5e573409b954006) -Signed-off-by: Stefan Hajnoczi ---- - tests/qemu-iotests/iotests.py | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index e5c5798c71..ea48af4a7b 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -651,6 +651,13 @@ def _filter(_key, value): - def filter_generated_node_ids(msg): - return re.sub("#block[0-9]+", "NODE_NAME", msg) - -+def filter_qmp_generated_node_ids(qmsg): -+ def _filter(_key, value): -+ if is_str(value): -+ return filter_generated_node_ids(value) -+ return value -+ return filter_qmp(qmsg, _filter) -+ - def filter_img_info(output: str, filename: str, - drop_child_info: bool = True) -> str: - lines = [] --- -2.39.3 - diff --git a/SOURCES/kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch b/SOURCES/kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch deleted file mode 100644 index ab63004..0000000 --- a/SOURCES/kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 453da839a7d81896d03b827a95c1991a60740dc5 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 25 Jan 2024 16:21:50 +0100 -Subject: [PATCH 21/22] iotests/iothreads-stream: Use the right TimeoutError - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [17/17] ca5a512ccccb668089b726d7499562d1e294c828 (stefanha/centos-stream-qemu-kvm) - -Since Python 3.11 asyncio.TimeoutError is an alias for TimeoutError, but -in older versions it's not. We really have to catch asyncio.TimeoutError -here, otherwise a slow test run will fail (as has happened multiple -times on CI recently). - -Signed-off-by: Kevin Wolf -Message-ID: <20240125152150.42389-1-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit c9c0b37ff4c11b712b21efabe8e5381d223d0295) -Signed-off-by: Stefan Hajnoczi ---- - tests/qemu-iotests/tests/iothreads-stream | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/tests/qemu-iotests/tests/iothreads-stream b/tests/qemu-iotests/tests/iothreads-stream -index 503f221f16..231195b5e8 100755 ---- a/tests/qemu-iotests/tests/iothreads-stream -+++ b/tests/qemu-iotests/tests/iothreads-stream -@@ -18,6 +18,7 @@ - # - # Creator/Owner: Kevin Wolf - -+import asyncio - import iotests - - iotests.script_initialize(supported_fmts=['qcow2'], -@@ -69,6 +70,6 @@ with iotests.FilePath('disk1.img') as base1_path, \ - # The test is done once both jobs are gone - if finished == 2: - break -- except TimeoutError: -+ except asyncio.TimeoutError: - pass - vm.cmd('query-jobs') --- -2.39.3 - diff --git a/SOURCES/kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch b/SOURCES/kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch deleted file mode 100644 index 209bd1e..0000000 --- a/SOURCES/kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch +++ /dev/null @@ -1,592 +0,0 @@ -From 70efc3bbf1f7d7b1b0c2475d9ce3bb70cc9d1cc7 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 18 Jan 2024 09:48:22 -0500 -Subject: [PATCH 12/22] iotests: port 141 to Python for reliable QMP testing - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [8/17] 0783f536508916feac4b4c39e41c22c24a2e52e7 (stefanha/centos-stream-qemu-kvm) - -The common.qemu bash functions allow tests to interact with the QMP -monitor of a QEMU process. I spent two days trying to update 141 when -the order of the test output changed, but found it would still fail -occassionally because printf() and QMP events race with synchronous QMP -communication. - -I gave up and ported 141 to the existing Python API for QMP tests. The -Python API is less affected by the order in which QEMU prints output -because it does not print all QMP traffic by default. - -The next commit changes the order in which QMP messages are received. -Make 141 reliable first. - -Cc: Hanna Czenczek -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240118144823.1497953-3-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 9ee2dd4c22a3639c5462b3fc20df60c005c3de64) -Signed-off-by: Stefan Hajnoczi ---- - tests/qemu-iotests/141 | 307 ++++++++++++++++--------------------- - tests/qemu-iotests/141.out | 200 ++++++------------------ - 2 files changed, 176 insertions(+), 331 deletions(-) - -diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141 -index a37030ee17..a7d3985a02 100755 ---- a/tests/qemu-iotests/141 -+++ b/tests/qemu-iotests/141 -@@ -1,9 +1,12 @@ --#!/usr/bin/env bash -+#!/usr/bin/env python3 - # group: rw auto quick - # - # Test case for ejecting BDSs with block jobs still running on them - # --# Copyright (C) 2016 Red Hat, Inc. -+# Originally written in bash by Hanna Czenczek, ported to Python by Stefan -+# Hajnoczi. -+# -+# Copyright Red Hat - # - # This program is free software; you can redistribute it and/or modify - # it under the terms of the GNU General Public License as published by -@@ -19,177 +22,129 @@ - # along with this program. If not, see . - # - --# creator --owner=hreitz@redhat.com -- --seq="$(basename $0)" --echo "QA output created by $seq" -- --status=1 # failure is the default! -- --_cleanup() --{ -- _cleanup_qemu -- _cleanup_test_img -- for img in "$TEST_DIR"/{b,m,o}.$IMGFMT; do -- _rm_test_img "$img" -- done --} --trap "_cleanup; exit \$status" 0 1 2 3 15 -- --# get standard environment, filters and checks --. ./common.rc --. ./common.filter --. ./common.qemu -- --# Needs backing file and backing format support --_supported_fmt qcow2 qed --_supported_proto file --_supported_os Linux -- -- --test_blockjob() --{ -- _send_qemu_cmd $QEMU_HANDLE \ -- "{'execute': 'blockdev-add', -- 'arguments': { -- 'node-name': 'drv0', -- 'driver': '$IMGFMT', -- 'file': { -- 'driver': 'file', -- 'filename': '$TEST_IMG' -- }}}" \ -- 'return' -- -- # If "$2" is an event, we may or may not see it before the -- # {"return": {}}. Therefore, filter the {"return": {}} out both -- # here and in the next command. (Naturally, if we do not see it -- # here, we will see it before the next command can be executed, -- # so it will appear in the next _send_qemu_cmd's output.) -- _send_qemu_cmd $QEMU_HANDLE \ -- "$1" \ -- "$2" \ -- | _filter_img_create | _filter_qmp_empty_return -- -- # We want this to return an error because the block job is still running -- _send_qemu_cmd $QEMU_HANDLE \ -- "{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}}" \ -- 'error' | _filter_generated_node_ids | _filter_qmp_empty_return -- -- _send_qemu_cmd $QEMU_HANDLE \ -- "{'execute': 'block-job-cancel', -- 'arguments': {'device': 'job0'}}" \ -- "$3" -- -- _send_qemu_cmd $QEMU_HANDLE \ -- "{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}}" \ -- 'return' --} -- -- --TEST_IMG="$TEST_DIR/b.$IMGFMT" _make_test_img 1M --TEST_IMG="$TEST_DIR/m.$IMGFMT" _make_test_img -b "$TEST_DIR/b.$IMGFMT" -F $IMGFMT 1M --_make_test_img -b "$TEST_DIR/m.$IMGFMT" 1M -F $IMGFMT -- --_launch_qemu -nodefaults -- --_send_qemu_cmd $QEMU_HANDLE \ -- "{'execute': 'qmp_capabilities'}" \ -- 'return' -- --echo --echo '=== Testing drive-backup ===' --echo -- --# drive-backup will not send BLOCK_JOB_READY by itself, and cancelling the job --# will consequently result in BLOCK_JOB_CANCELLED being emitted. -- --test_blockjob \ -- "{'execute': 'drive-backup', -- 'arguments': {'job-id': 'job0', -- 'device': 'drv0', -- 'target': '$TEST_DIR/o.$IMGFMT', -- 'format': '$IMGFMT', -- 'sync': 'none'}}" \ -- 'return' \ -- '"status": "null"' -- --echo --echo '=== Testing drive-mirror ===' --echo -- --# drive-mirror will send BLOCK_JOB_READY basically immediately, and cancelling --# the job will consequently result in BLOCK_JOB_COMPLETED being emitted. -- --test_blockjob \ -- "{'execute': 'drive-mirror', -- 'arguments': {'job-id': 'job0', -- 'device': 'drv0', -- 'target': '$TEST_DIR/o.$IMGFMT', -- 'format': '$IMGFMT', -- 'sync': 'none'}}" \ -- 'BLOCK_JOB_READY' \ -- '"status": "null"' -- --echo --echo '=== Testing active block-commit ===' --echo -- --# An active block-commit will send BLOCK_JOB_READY basically immediately, and --# cancelling the job will consequently result in BLOCK_JOB_COMPLETED being --# emitted. -- --test_blockjob \ -- "{'execute': 'block-commit', -- 'arguments': {'job-id': 'job0', 'device': 'drv0'}}" \ -- 'BLOCK_JOB_READY' \ -- '"status": "null"' -- --echo --echo '=== Testing non-active block-commit ===' --echo -- --# Give block-commit something to work on, otherwise it would be done --# immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would work just --# fine without the block job still running. -- --$QEMU_IO -c 'write 0 1M' "$TEST_DIR/m.$IMGFMT" | _filter_qemu_io -- --test_blockjob \ -- "{'execute': 'block-commit', -- 'arguments': {'job-id': 'job0', -- 'device': 'drv0', -- 'top': '$TEST_DIR/m.$IMGFMT', -- 'speed': 1}}" \ -- 'return' \ -- '"status": "null"' -- --echo --echo '=== Testing block-stream ===' --echo -- --# Give block-stream something to work on, otherwise it would be done --# immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would work just --# fine without the block job still running. -- --$QEMU_IO -c 'write 0 1M' "$TEST_DIR/b.$IMGFMT" | _filter_qemu_io -- --# With some data to stream (and @speed set to 1), block-stream will not complete --# until we send the block-job-cancel command. -- --test_blockjob \ -- "{'execute': 'block-stream', -- 'arguments': {'job-id': 'job0', -- 'device': 'drv0', -- 'speed': 1}}" \ -- 'return' \ -- '"status": "null"' -- --_cleanup_qemu -- --# success, all done --echo "*** done" --rm -f $seq.full --status=0 -+import iotests -+ -+# Common filters to mask values that vary in the test output -+QMP_FILTERS = [iotests.filter_qmp_testfiles, \ -+ iotests.filter_qmp_imgfmt] -+ -+ -+class TestCase: -+ def __init__(self, name, vm, image_path, cancel_event): -+ self.name = name -+ self.vm = vm -+ self.image_path = image_path -+ self.cancel_event = cancel_event -+ -+ def __enter__(self): -+ iotests.log(f'=== Testing {self.name} ===') -+ self.vm.qmp_log('blockdev-add', \ -+ node_name='drv0', \ -+ driver=iotests.imgfmt, \ -+ file={'driver': 'file', 'filename': self.image_path}, \ -+ filters=QMP_FILTERS) -+ -+ def __exit__(self, *exc_details): -+ # This is expected to fail because the job still exists -+ self.vm.qmp_log('blockdev-del', node_name='drv0', \ -+ filters=[iotests.filter_qmp_generated_node_ids]) -+ -+ self.vm.qmp_log('block-job-cancel', device='job0') -+ event = self.vm.event_wait(self.cancel_event) -+ iotests.log(event, filters=[iotests.filter_qmp_event]) -+ -+ # This time it succeeds -+ self.vm.qmp_log('blockdev-del', node_name='drv0') -+ -+ # Separate test cases in output -+ iotests.log('') -+ -+ -+def main() -> None: -+ with iotests.FilePath('bottom', 'middle', 'top', 'target') as \ -+ (bottom_path, middle_path, top_path, target_path), \ -+ iotests.VM() as vm: -+ -+ iotests.log('Creating bottom <- middle <- top backing file chain...') -+ IMAGE_SIZE='1M' -+ iotests.qemu_img_create('-f', iotests.imgfmt, bottom_path, IMAGE_SIZE) -+ iotests.qemu_img_create('-f', iotests.imgfmt, \ -+ '-F', iotests.imgfmt, \ -+ '-b', bottom_path, \ -+ middle_path, \ -+ IMAGE_SIZE) -+ iotests.qemu_img_create('-f', iotests.imgfmt, \ -+ '-F', iotests.imgfmt, \ -+ '-b', middle_path, \ -+ top_path, \ -+ IMAGE_SIZE) -+ -+ iotests.log('Starting VM...') -+ vm.add_args('-nodefaults') -+ vm.launch() -+ -+ # drive-backup will not send BLOCK_JOB_READY by itself, and cancelling -+ # the job will consequently result in BLOCK_JOB_CANCELLED being -+ # emitted. -+ with TestCase('drive-backup', vm, top_path, 'BLOCK_JOB_CANCELLED'): -+ vm.qmp_log('drive-backup', \ -+ job_id='job0', \ -+ device='drv0', \ -+ target=target_path, \ -+ format=iotests.imgfmt, \ -+ sync='none', \ -+ filters=QMP_FILTERS) -+ -+ # drive-mirror will send BLOCK_JOB_READY basically immediately, and -+ # cancelling the job will consequently result in BLOCK_JOB_COMPLETED -+ # being emitted. -+ with TestCase('drive-mirror', vm, top_path, 'BLOCK_JOB_COMPLETED'): -+ vm.qmp_log('drive-mirror', \ -+ job_id='job0', \ -+ device='drv0', \ -+ target=target_path, \ -+ format=iotests.imgfmt, \ -+ sync='none', \ -+ filters=QMP_FILTERS) -+ event = vm.event_wait('BLOCK_JOB_READY') -+ assert event is not None # silence mypy -+ iotests.log(event, filters=[iotests.filter_qmp_event]) -+ -+ # An active block-commit will send BLOCK_JOB_READY basically -+ # immediately, and cancelling the job will consequently result in -+ # BLOCK_JOB_COMPLETED being emitted. -+ with TestCase('active block-commit', vm, top_path, \ -+ 'BLOCK_JOB_COMPLETED'): -+ vm.qmp_log('block-commit', \ -+ job_id='job0', \ -+ device='drv0') -+ event = vm.event_wait('BLOCK_JOB_READY') -+ assert event is not None # silence mypy -+ iotests.log(event, filters=[iotests.filter_qmp_event]) -+ -+ # Give block-commit something to work on, otherwise it would be done -+ # immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would -+ # work just fine without the block job still running. -+ iotests.qemu_io(middle_path, '-c', f'write 0 {IMAGE_SIZE}') -+ with TestCase('non-active block-commit', vm, top_path, \ -+ 'BLOCK_JOB_CANCELLED'): -+ vm.qmp_log('block-commit', \ -+ job_id='job0', \ -+ device='drv0', \ -+ top=middle_path, \ -+ speed=1, \ -+ filters=[iotests.filter_qmp_testfiles]) -+ -+ # Give block-stream something to work on, otherwise it would be done -+ # immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would -+ # work just fine without the block job still running. -+ iotests.qemu_io(bottom_path, '-c', f'write 0 {IMAGE_SIZE}') -+ with TestCase('block-stream', vm, top_path, 'BLOCK_JOB_CANCELLED'): -+ vm.qmp_log('block-stream', \ -+ job_id='job0', \ -+ device='drv0', \ -+ speed=1) -+ -+if __name__ == '__main__': -+ iotests.script_main(main, supported_fmts=['qcow2', 'qed'], -+ supported_protocols=['file']) -diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out -index 63203d9944..91b7ba50af 100644 ---- a/tests/qemu-iotests/141.out -+++ b/tests/qemu-iotests/141.out -@@ -1,179 +1,69 @@ --QA output created by 141 --Formatting 'TEST_DIR/b.IMGFMT', fmt=IMGFMT size=1048576 --Formatting 'TEST_DIR/m.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/b.IMGFMT backing_fmt=IMGFMT --Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/m.IMGFMT backing_fmt=IMGFMT --{'execute': 'qmp_capabilities'} --{"return": {}} -- -+Creating bottom <- middle <- top backing file chain... -+Starting VM... - === Testing drive-backup === -- --{'execute': 'blockdev-add', -- 'arguments': { -- 'node-name': 'drv0', -- 'driver': 'IMGFMT', -- 'file': { -- 'driver': 'file', -- 'filename': 'TEST_DIR/t.IMGFMT' -- }}} --{"return": {}} --{'execute': 'drive-backup', --'arguments': {'job-id': 'job0', --'device': 'drv0', --'target': 'TEST_DIR/o.IMGFMT', --'format': 'IMGFMT', --'sync': 'none'}} --Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} -+{"return": {}} -+{"execute": "drive-backup", "arguments": {"device": "drv0", "format": "IMGFMT", "job-id": "job0", "sync": "none", "target": "TEST_DIR/PID-target"}} -+{"return": {}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}} --{'execute': 'block-job-cancel', -- 'arguments': {'device': 'job0'}} -+{"execute": "block-job-cancel", "arguments": {"device": "job0"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 0, "speed": 0, "type": "backup"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"data": {"device": "job0", "len": 1048576, "offset": 0, "speed": 0, "type": "backup"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"return": {}} - - === Testing drive-mirror === -- --{'execute': 'blockdev-add', -- 'arguments': { -- 'node-name': 'drv0', -- 'driver': 'IMGFMT', -- 'file': { -- 'driver': 'file', -- 'filename': 'TEST_DIR/t.IMGFMT' -- }}} --{"return": {}} --{'execute': 'drive-mirror', --'arguments': {'job-id': 'job0', --'device': 'drv0', --'target': 'TEST_DIR/o.IMGFMT', --'format': 'IMGFMT', --'sync': 'none'}} --Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} -+{"return": {}} -+{"execute": "drive-mirror", "arguments": {"device": "drv0", "format": "IMGFMT", "job-id": "job0", "sync": "none", "target": "TEST_DIR/PID-target"}} -+{"return": {}} -+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: mirror"}} --{'execute': 'block-job-cancel', -- 'arguments': {'device': 'job0'}} -+{"execute": "block-job-cancel", "arguments": {"device": "job0"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"return": {}} - - === Testing active block-commit === -- --{'execute': 'blockdev-add', -- 'arguments': { -- 'node-name': 'drv0', -- 'driver': 'IMGFMT', -- 'file': { -- 'driver': 'file', -- 'filename': 'TEST_DIR/t.IMGFMT' -- }}} --{"return": {}} --{'execute': 'block-commit', --'arguments': {'job-id': 'job0', 'device': 'drv0'}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} -+{"return": {}} -+{"execute": "block-commit", "arguments": {"device": "drv0", "job-id": "job0"}} -+{"return": {}} -+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}} --{'execute': 'block-job-cancel', -- 'arguments': {'device': 'job0'}} -+{"execute": "block-job-cancel", "arguments": {"device": "job0"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"return": {}} - - === Testing non-active block-commit === -- --wrote 1048576/1048576 bytes at offset 0 --1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) --{'execute': 'blockdev-add', -- 'arguments': { -- 'node-name': 'drv0', -- 'driver': 'IMGFMT', -- 'file': { -- 'driver': 'file', -- 'filename': 'TEST_DIR/t.IMGFMT' -- }}} --{"return": {}} --{'execute': 'block-commit', --'arguments': {'job-id': 'job0', --'device': 'drv0', --'top': 'TEST_DIR/m.IMGFMT', --'speed': 1}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} -+{"return": {}} -+{"execute": "block-commit", "arguments": {"device": "drv0", "job-id": "job0", "speed": 1, "top": "TEST_DIR/PID-middle"}} -+{"return": {}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}} --{'execute': 'block-job-cancel', -- 'arguments': {'device': 'job0'}} -+{"execute": "block-job-cancel", "arguments": {"device": "job0"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "commit"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "commit"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"return": {}} - - === Testing block-stream === -- --wrote 1048576/1048576 bytes at offset 0 --1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) --{'execute': 'blockdev-add', -- 'arguments': { -- 'node-name': 'drv0', -- 'driver': 'IMGFMT', -- 'file': { -- 'driver': 'file', -- 'filename': 'TEST_DIR/t.IMGFMT' -- }}} --{"return": {}} --{'execute': 'block-stream', --'arguments': {'job-id': 'job0', --'device': 'drv0', --'speed': 1}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} -+{"return": {}} -+{"execute": "block-stream", "arguments": {"device": "drv0", "job-id": "job0", "speed": 1}} -+{"return": {}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: stream"}} --{'execute': 'block-job-cancel', -- 'arguments': {'device': 'job0'}} -+{"execute": "block-job-cancel", "arguments": {"device": "job0"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "stream"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "stream"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"return": {}} --*** done -+ --- -2.39.3 - diff --git a/SOURCES/kvm-iotests-test-NBD-TLS-iothread.patch b/SOURCES/kvm-iotests-test-NBD-TLS-iothread.patch index 9d47aab..5640a7e 100644 --- a/SOURCES/kvm-iotests-test-NBD-TLS-iothread.patch +++ b/SOURCES/kvm-iotests-test-NBD-TLS-iothread.patch @@ -1,17 +1,17 @@ -From 4296c41178438f9dffa16c538b0c1a2e28944f4c Mon Sep 17 00:00:00 2001 +From 88adaeaecb1d7753aa8ac3da40f617d93eaf8bdc Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Fri, 17 May 2024 21:50:15 -0500 -Subject: [PATCH 4/4] iotests: test NBD+TLS+iothread +Subject: [PATCH 2/4] iotests: test NBD+TLS+iothread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Eric Blake -RH-MergeRequest: 375: Fix regression on nbd+tls -RH-Jira: RHEL-33754 +RH-MergeRequest: 244: qio: Inherit follow_coroutine_ctx across TLS +RH-Jira: RHEL-33440 RH-Acked-by: Kevin Wolf -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/4] 5905c09466f4e65f3ff9973b41f42cbe1d75363c (ebblake/qemu-kvm) +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/2] 29c3128a158b7c49fa79c141a9adcc12693f7de4 (ebblake/centos-qemu-kvm) Prevent regressions when using NBD with TLS in the presence of iothreads, adding coverage the fix to qio channels made in the @@ -27,7 +27,7 @@ Signed-off-by: Eric Blake Message-ID: <20240531180639.1392905-6-eblake@redhat.com> Reviewed-by: Daniel P. Berrangé (cherry picked from commit a73c99378022ebb785481e84cfe1e81097546268) -Jira: https://issues.redhat.com/browse/RHEL-33754 +Jira: https://issues.redhat.com/browse/RHEL-33440 Signed-off-by: Eric Blake --- tests/qemu-iotests/tests/nbd-tls-iothread | 168 ++++++++++++++++++ diff --git a/SOURCES/kvm-job-remove-outdated-AioContext-locking-comments.patch b/SOURCES/kvm-job-remove-outdated-AioContext-locking-comments.patch deleted file mode 100644 index fc1c62f..0000000 --- a/SOURCES/kvm-job-remove-outdated-AioContext-locking-comments.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 4ab25b33831fa207500179bd30f29388d81e4cce Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:10 -0500 -Subject: [PATCH 093/101] job: remove outdated AioContext locking comments - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [24/26] 15ff2928be82d6905c22619458487fbb72d6044a (kmwolf/centos-qemu-kvm) - -The AioContext lock no longer exists. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Message-ID: <20231205182011.1976568-14-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - include/qemu/job.h | 20 -------------------- - 1 file changed, 20 deletions(-) - -diff --git a/include/qemu/job.h b/include/qemu/job.h -index e502787dd8..9ea98b5927 100644 ---- a/include/qemu/job.h -+++ b/include/qemu/job.h -@@ -67,8 +67,6 @@ typedef struct Job { - - /** - * The completion function that will be called when the job completes. -- * Called with AioContext lock held, since many callback implementations -- * use bdrv_* functions that require to hold the lock. - */ - BlockCompletionFunc *cb; - -@@ -264,9 +262,6 @@ struct JobDriver { - * - * This callback will not be invoked if the job has already failed. - * If it fails, abort and then clean will be called. -- * -- * Called with AioContext lock held, since many callbacs implementations -- * use bdrv_* functions that require to hold the lock. - */ - int (*prepare)(Job *job); - -@@ -277,9 +272,6 @@ struct JobDriver { - * - * All jobs will complete with a call to either .commit() or .abort() but - * never both. -- * -- * Called with AioContext lock held, since many callback implementations -- * use bdrv_* functions that require to hold the lock. - */ - void (*commit)(Job *job); - -@@ -290,9 +282,6 @@ struct JobDriver { - * - * All jobs will complete with a call to either .commit() or .abort() but - * never both. -- * -- * Called with AioContext lock held, since many callback implementations -- * use bdrv_* functions that require to hold the lock. - */ - void (*abort)(Job *job); - -@@ -301,9 +290,6 @@ struct JobDriver { - * .commit() or .abort(). Regardless of which callback is invoked after - * completion, .clean() will always be called, even if the job does not - * belong to a transaction group. -- * -- * Called with AioContext lock held, since many callbacs implementations -- * use bdrv_* functions that require to hold the lock. - */ - void (*clean)(Job *job); - -@@ -318,17 +304,12 @@ struct JobDriver { - * READY). - * (If the callback is NULL, the job is assumed to terminate - * without I/O.) -- * -- * Called with AioContext lock held, since many callback implementations -- * use bdrv_* functions that require to hold the lock. - */ - bool (*cancel)(Job *job, bool force); - - - /** - * Called when the job is freed. -- * Called with AioContext lock held, since many callback implementations -- * use bdrv_* functions that require to hold the lock. - */ - void (*free)(Job *job); - }; -@@ -424,7 +405,6 @@ void job_ref_locked(Job *job); - * Release a reference that was previously acquired with job_ref_locked() or - * job_create(). If it's the last reference to the object, it will be freed. - * -- * Takes AioContext lock internally to invoke a job->driver callback. - * Called with job lock held. - */ - void job_unref_locked(Job *job); --- -2.39.3 - diff --git a/SOURCES/kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch b/SOURCES/kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch deleted file mode 100644 index 3e562b8..0000000 --- a/SOURCES/kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 9c2eb4ab03903bc084c53ac29b60b8d2121c9fed Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 21 Nov 2023 16:44:19 +0800 -Subject: [PATCH 040/101] kconfig: Activate IOMMUFD for s390x machines -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [39/67] cf0ebe770b8db5916dd35247618c0a325dc1eaab (eauger1/centos-qemu-kvm) - -Signed-off-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Matthew Rosato -Reviewed-by: Eric Farman -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 73e2df669335047b542b67d37ade060a6ae40dd8) -Signed-off-by: Eric Auger ---- - hw/s390x/Kconfig | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/s390x/Kconfig b/hw/s390x/Kconfig -index 4c068d7960..26ad104485 100644 ---- a/hw/s390x/Kconfig -+++ b/hw/s390x/Kconfig -@@ -6,6 +6,7 @@ config S390_CCW_VIRTIO - imply VFIO_CCW - imply WDT_DIAG288 - imply PCIE_DEVICES -+ imply IOMMUFD - select PCI_EXPRESS - select S390_FLIC - select S390_FLIC_KVM if KVM --- -2.39.3 - diff --git a/SOURCES/kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch b/SOURCES/kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch new file mode 100644 index 0000000..10e98a7 --- /dev/null +++ b/SOURCES/kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch @@ -0,0 +1,153 @@ +From 120157257ac239050779fdddc9abb56bd39958b3 Mon Sep 17 00:00:00 2001 +From: Chao Peng +Date: Wed, 20 Mar 2024 03:39:05 -0500 +Subject: [PATCH 029/100] kvm: Enable KVM_SET_USER_MEMORY_REGION2 for memslot + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [29/91] 9a08c8699f632cd046a6307e33bd053a7cc7db46 (bonzini/rhel-qemu-kvm) + +Switch to KVM_SET_USER_MEMORY_REGION2 when supported by KVM. + +With KVM_SET_USER_MEMORY_REGION2, QEMU can set up memory region that +backend'ed both by hva-based shared memory and guest memfd based private +memory. + +Signed-off-by: Chao Peng +Co-developed-by: Xiaoyao Li +Signed-off-by: Xiaoyao Li +Message-ID: <20240320083945.991426-10-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit ce5a983233b4ca94ced88c9581014346509b5c71) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 46 +++++++++++++++++++++++++++++++++------- + accel/kvm/trace-events | 2 +- + include/sysemu/kvm_int.h | 2 ++ + 3 files changed, 41 insertions(+), 9 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index a7b9a127dd..5ef55e4dd7 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -284,35 +284,58 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram, + static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, bool new) + { + KVMState *s = kvm_state; +- struct kvm_userspace_memory_region mem; ++ struct kvm_userspace_memory_region2 mem; + int ret; + + mem.slot = slot->slot | (kml->as_id << 16); + mem.guest_phys_addr = slot->start_addr; + mem.userspace_addr = (unsigned long)slot->ram; + mem.flags = slot->flags; ++ mem.guest_memfd = slot->guest_memfd; ++ mem.guest_memfd_offset = slot->guest_memfd_offset; + + if (slot->memory_size && !new && (mem.flags ^ slot->old_flags) & KVM_MEM_READONLY) { + /* Set the slot size to 0 before setting the slot to the desired + * value. This is needed based on KVM commit 75d61fbc. */ + mem.memory_size = 0; +- ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); ++ ++ if (kvm_guest_memfd_supported) { ++ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem); ++ } else { ++ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); ++ } + if (ret < 0) { + goto err; + } + } + mem.memory_size = slot->memory_size; +- ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); ++ if (kvm_guest_memfd_supported) { ++ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem); ++ } else { ++ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); ++ } + slot->old_flags = mem.flags; + err: + trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags, + mem.guest_phys_addr, mem.memory_size, +- mem.userspace_addr, ret); ++ mem.userspace_addr, mem.guest_memfd, ++ mem.guest_memfd_offset, ret); + if (ret < 0) { +- error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," +- " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", +- __func__, mem.slot, slot->start_addr, +- (uint64_t)mem.memory_size, strerror(errno)); ++ if (kvm_guest_memfd_supported) { ++ error_report("%s: KVM_SET_USER_MEMORY_REGION2 failed, slot=%d," ++ " start=0x%" PRIx64 ", size=0x%" PRIx64 "," ++ " flags=0x%" PRIx32 ", guest_memfd=%" PRId32 "," ++ " guest_memfd_offset=0x%" PRIx64 ": %s", ++ __func__, mem.slot, slot->start_addr, ++ (uint64_t)mem.memory_size, mem.flags, ++ mem.guest_memfd, (uint64_t)mem.guest_memfd_offset, ++ strerror(errno)); ++ } else { ++ error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," ++ " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", ++ __func__, mem.slot, slot->start_addr, ++ (uint64_t)mem.memory_size, strerror(errno)); ++ } + } + return ret; + } +@@ -467,6 +490,10 @@ static int kvm_mem_flags(MemoryRegion *mr) + if (readonly && kvm_readonly_mem_allowed) { + flags |= KVM_MEM_READONLY; + } ++ if (memory_region_has_guest_memfd(mr)) { ++ assert(kvm_guest_memfd_supported); ++ flags |= KVM_MEM_GUEST_MEMFD; ++ } + return flags; + } + +@@ -1394,6 +1421,9 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + mem->ram_start_offset = ram_start_offset; + mem->ram = ram; + mem->flags = kvm_mem_flags(mr); ++ mem->guest_memfd = mr->ram_block->guest_memfd; ++ mem->guest_memfd_offset = (uint8_t*)ram - mr->ram_block->host; ++ + kvm_slot_init_dirty_bitmap(mem); + err = kvm_set_user_memory_region(kml, mem, true); + if (err) { +diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events +index 9f599abc17..e8c52cb9e7 100644 +--- a/accel/kvm/trace-events ++++ b/accel/kvm/trace-events +@@ -15,7 +15,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" + kvm_irqchip_release_virq(int virq) "virq %d" + kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d" + kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d" +-kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" ++kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, uint32_t fd, uint64_t fd_offset, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " guest_memfd=%d" " guest_memfd_offset=0x%" PRIx64 " ret=%d" + kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32 + kvm_resample_fd_notify(int gsi) "gsi %d" + kvm_dirty_ring_full(int id) "vcpu %d" +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index 3496be7997..a5a3fee411 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -30,6 +30,8 @@ typedef struct KVMSlot + int as_id; + /* Cache of the offset in ram address space */ + ram_addr_t ram_start_offset; ++ int guest_memfd; ++ hwaddr guest_memfd_offset; + } KVMSlot; + + typedef struct KVMMemoryUpdate { +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-Introduce-support-for-memory_attributes.patch b/SOURCES/kvm-kvm-Introduce-support-for-memory_attributes.patch new file mode 100644 index 0000000..1f043a9 --- /dev/null +++ b/SOURCES/kvm-kvm-Introduce-support-for-memory_attributes.patch @@ -0,0 +1,103 @@ +From 37e6c98987bb2d4be7ce1fdda4475cd0266271c3 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:06 -0500 +Subject: [PATCH 027/100] kvm: Introduce support for memory_attributes + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [27/91] 1b4428289949478f7390196ae4b098c5e6f36bb0 (bonzini/rhel-qemu-kvm) + +Introduce the helper functions to set the attributes of a range of +memory to private or shared. + +This is necessary to notify KVM the private/shared attribute of each gpa +range. KVM needs the information to decide the GPA needs to be mapped at +hva-based shared memory or guest_memfd based private memory. + +Signed-off-by: Xiaoyao Li +Message-ID: <20240320083945.991426-11-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 0811baed49010a9b651b8029ab6b9828b09a884f) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 32 ++++++++++++++++++++++++++++++++ + include/sysemu/kvm.h | 4 ++++ + 2 files changed, 36 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 9bd235c969..272e945f52 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -91,6 +91,7 @@ bool kvm_msi_use_devid; + static bool kvm_has_guest_debug; + static int kvm_sstep_flags; + static bool kvm_immediate_exit; ++static uint64_t kvm_supported_memory_attributes; + static hwaddr kvm_max_slot_size = ~0; + + static const KVMCapabilityInfo kvm_required_capabilites[] = { +@@ -1266,6 +1267,36 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size) + kvm_max_slot_size = max_slot_size; + } + ++static int kvm_set_memory_attributes(hwaddr start, uint64_t size, uint64_t attr) ++{ ++ struct kvm_memory_attributes attrs; ++ int r; ++ ++ assert((attr & kvm_supported_memory_attributes) == attr); ++ attrs.attributes = attr; ++ attrs.address = start; ++ attrs.size = size; ++ attrs.flags = 0; ++ ++ r = kvm_vm_ioctl(kvm_state, KVM_SET_MEMORY_ATTRIBUTES, &attrs); ++ if (r) { ++ error_report("failed to set memory (0x%" HWADDR_PRIx "+0x%" PRIx64 ") " ++ "with attr 0x%" PRIx64 " error '%s'", ++ start, size, attr, strerror(errno)); ++ } ++ return r; ++} ++ ++int kvm_set_memory_attributes_private(hwaddr start, uint64_t size) ++{ ++ return kvm_set_memory_attributes(start, size, KVM_MEMORY_ATTRIBUTE_PRIVATE); ++} ++ ++int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size) ++{ ++ return kvm_set_memory_attributes(start, size, 0); ++} ++ + /* Called with KVMMemoryListener.slots_lock held */ + static void kvm_set_phys_mem(KVMMemoryListener *kml, + MemoryRegionSection *section, bool add) +@@ -2387,6 +2418,7 @@ static int kvm_init(MachineState *ms) + goto err; + } + ++ kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES); + kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); + s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); + +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 54f4d83a37..f114ff6986 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -536,4 +536,8 @@ void kvm_mark_guest_state_protected(void); + * reported for the VM. + */ + bool kvm_hwpoisoned_mem(void); ++ ++int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); ++int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); ++ + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-add-support-for-guest-physical-bits.patch b/SOURCES/kvm-kvm-add-support-for-guest-physical-bits.patch new file mode 100644 index 0000000..97b94eb --- /dev/null +++ b/SOURCES/kvm-kvm-add-support-for-guest-physical-bits.patch @@ -0,0 +1,116 @@ +From 31cc494d69449811f4d995326479372da7c1241e Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Mon, 18 Mar 2024 16:53:35 +0100 +Subject: [PATCH 003/100] kvm: add support for guest physical bits + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [3/91] abb1ba3a584152d8efabd8255b86afe609f8ffbd (bonzini/rhel-qemu-kvm) + +Query kvm for supported guest physical address bits, in cpuid +function 80000008, eax[23:16]. Usually this is identical to host +physical address bits. With NPT or EPT being used this might be +restricted to 48 (max 4-level paging address space size) even if +the host cpu supports more physical address bits. + +When set pass this to the guest, using cpuid too. Guest firmware +can use this to figure how big the usable guest physical address +space is, so PCI bar mapping are actually reachable. + +Signed-off-by: Gerd Hoffmann +Reviewed-by: Xiaoyao Li +Reviewed-by: Zhao Liu +Message-ID: <20240318155336.156197-2-kraxel@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 0d08c423688edcca857f88dab20f1fc56de2b281) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm-cpu.c | 50 ++++++++++++++++++++++++++++++++------- + 1 file changed, 42 insertions(+), 8 deletions(-) + +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index b91af5051f..7ef94c681f 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -18,10 +18,32 @@ + #include "kvm_i386.h" + #include "hw/core/accel-cpu.h" + ++static void kvm_set_guest_phys_bits(CPUState *cs) ++{ ++ X86CPU *cpu = X86_CPU(cs); ++ uint32_t eax, guest_phys_bits; ++ ++ eax = kvm_arch_get_supported_cpuid(cs->kvm_state, 0x80000008, 0, R_EAX); ++ guest_phys_bits = (eax >> 16) & 0xff; ++ if (!guest_phys_bits) { ++ return; ++ } ++ cpu->guest_phys_bits = guest_phys_bits; ++ if (cpu->guest_phys_bits > cpu->phys_bits) { ++ cpu->guest_phys_bits = cpu->phys_bits; ++ } ++ ++ if (cpu->host_phys_bits && cpu->host_phys_bits_limit && ++ cpu->guest_phys_bits > cpu->host_phys_bits_limit) { ++ cpu->guest_phys_bits = cpu->host_phys_bits_limit; ++ } ++} ++ + static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) + { + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; ++ bool ret; + + /* + * The realize order is important, since x86_cpu_realize() checks if +@@ -32,13 +54,15 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) + * + * realize order: + * +- * x86_cpu_realize(): +- * -> x86_cpu_expand_features() +- * -> cpu_exec_realizefn(): +- * -> accel_cpu_common_realize() +- * kvm_cpu_realizefn() -> host_cpu_realizefn() +- * -> cpu_common_realizefn() +- * -> check/update ucode_rev, phys_bits, mwait ++ * x86_cpu_realizefn(): ++ * x86_cpu_expand_features() ++ * cpu_exec_realizefn(): ++ * accel_cpu_common_realize() ++ * kvm_cpu_realizefn() ++ * host_cpu_realizefn() ++ * kvm_set_guest_phys_bits() ++ * check/update ucode_rev, phys_bits, guest_phys_bits, mwait ++ * cpu_common_realizefn() (via xcc->parent_realize) + */ + if (cpu->max_features) { + if (enable_cpu_pm && kvm_has_waitpkg()) { +@@ -50,7 +74,17 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) + MSR_IA32_UCODE_REV); + } + } +- return host_cpu_realizefn(cs, errp); ++ ret = host_cpu_realizefn(cs, errp); ++ if (!ret) { ++ return ret; ++ } ++ ++ if ((env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) && ++ cpu->guest_phys_bits == -1) { ++ kvm_set_guest_phys_bits(cs); ++ } ++ ++ return true; + } + + static bool lmce_supported(void) +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-error-out-of-kvm_irqchip_add_msi_route-in-case-o.patch b/SOURCES/kvm-kvm-error-out-of-kvm_irqchip_add_msi_route-in-case-o.patch deleted file mode 100644 index 5116b96..0000000 --- a/SOURCES/kvm-kvm-error-out-of-kvm_irqchip_add_msi_route-in-case-o.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 8a8fa4ab4dc05502550ca207926cd0c93a3341ea Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Mon, 8 Apr 2024 12:43:49 +0200 -Subject: [PATCH] kvm: error out of kvm_irqchip_add_msi_route() in case of full - route table - -RH-Author: Igor Mammedov -RH-MergeRequest: 374: kvm: error out of kvm_irqchip_add_msi_route() in case of full route table -RH-Jira: RHEL-32990 -RH-Acked-by: Ani Sinha -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] df31f2d0cafe10a1ac22a2bebb85dc17c1e891e0 - -RH-Jira: RHEL-32990 - -subj is calling kvm_add_routing_entry() which simply extends - KVMState::irq_routes::entries[] -but doesn't check if number of routes goes beyond limit the kernel -is willing to accept. Which later leads toi the assert - - qemu-kvm: ../accel/kvm/kvm-all.c:1833: kvm_irqchip_commit_routes: Assertion `ret == 0' failed - -typically it happens during guest boot for large enough guest - -Reproduced with: - ./qemu --enable-kvm -m 8G -smp 64 -machine pc \ - `for b in {1..2}; do echo -n "-device pci-bridge,id=pci$b,chassis_nr=$b "; - for i in {0..31}; do touch /tmp/vblk$b$i; - echo -n "-drive file=/tmp/vblk$b$i,if=none,id=drive$b$i,format=raw - -device virtio-blk-pci,drive=drive$b$i,bus=pci$b "; - done; done` - -While crash at boot time is bad, the same might happen at hotplug time -which is unacceptable. -So instead calling kvm_add_routing_entry() unconditionally, check first -that number of routes won't exceed KVM_CAP_IRQ_ROUTING. This way virtio -device insteads killin qemu, will gracefully fail to initialize device -as expected with following warnings on console: - virtio-blk failed to set guest notifier (-28), ensure -accel kvm is set. - virtio_bus_start_ioeventfd: failed. Fallback to userspace (slower). - -Signed-off-by: Igor Mammedov ---- - accel/kvm/kvm-all.c | 15 ++++++++++----- - 1 file changed, 10 insertions(+), 5 deletions(-) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index e39a810a4e..f1a4564cbd 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -2000,12 +2000,17 @@ int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev) - return -EINVAL; - } - -- trace_kvm_irqchip_add_msi_route(dev ? dev->name : (char *)"N/A", -- vector, virq); -+ if (s->irq_routes->nr < s->gsi_count) { -+ trace_kvm_irqchip_add_msi_route(dev ? dev->name : (char *)"N/A", -+ vector, virq); - -- kvm_add_routing_entry(s, &kroute); -- kvm_arch_add_msi_route_post(&kroute, vector, dev); -- c->changes++; -+ kvm_add_routing_entry(s, &kroute); -+ kvm_arch_add_msi_route_post(&kroute, vector, dev); -+ c->changes++; -+ } else { -+ kvm_irqchip_release_virq(s, virq); -+ return -ENOSPC; -+ } - - return virq; - } --- -2.39.3 - diff --git a/SOURCES/kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch b/SOURCES/kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch new file mode 100644 index 0000000..9baa06f --- /dev/null +++ b/SOURCES/kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch @@ -0,0 +1,194 @@ +From 33cc1b469689ee2bb7c4f745189472c74a0a98ab Mon Sep 17 00:00:00 2001 +From: Chao Peng +Date: Wed, 20 Mar 2024 03:39:08 -0500 +Subject: [PATCH 034/100] kvm: handle KVM_EXIT_MEMORY_FAULT + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [34/91] 59c672f6b19a3afcb61878775eb6425c6fdea6d5 (bonzini/rhel-qemu-kvm) + +Upon an KVM_EXIT_MEMORY_FAULT exit, userspace needs to do the memory +conversion on the RAMBlock to turn the memory into desired attribute, +switching between private and shared. + +Currently only KVM_MEMORY_EXIT_FLAG_PRIVATE in flags is valid when +KVM_EXIT_MEMORY_FAULT happens. + +Note, KVM_EXIT_MEMORY_FAULT makes sense only when the RAMBlock has +guest_memfd memory backend. + +Note, KVM_EXIT_MEMORY_FAULT returns with -EFAULT, so special handling is +added. + +When page is converted from shared to private, the original shared +memory can be discarded via ram_block_discard_range(). Note, shared +memory can be discarded only when it's not back'ed by hugetlb because +hugetlb is supposed to be pre-allocated and no need for discarding. + +Signed-off-by: Chao Peng +Co-developed-by: Xiaoyao Li +Signed-off-by: Xiaoyao Li + +Message-ID: <20240320083945.991426-13-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c15e5684071d93174e446be318f49d8d59b15d6d) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 98 +++++++++++++++++++++++++++++++++++++----- + accel/kvm/trace-events | 2 + + include/sysemu/kvm.h | 2 + + 3 files changed, 92 insertions(+), 10 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 3f99efc8cc..09164e346c 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2900,6 +2900,69 @@ static void kvm_eat_signals(CPUState *cpu) + } while (sigismember(&chkset, SIG_IPI)); + } + ++int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) ++{ ++ MemoryRegionSection section; ++ ram_addr_t offset; ++ MemoryRegion *mr; ++ RAMBlock *rb; ++ void *addr; ++ int ret = -1; ++ ++ trace_kvm_convert_memory(start, size, to_private ? "shared_to_private" : "private_to_shared"); ++ ++ if (!QEMU_PTR_IS_ALIGNED(start, qemu_real_host_page_size()) || ++ !QEMU_PTR_IS_ALIGNED(size, qemu_real_host_page_size())) { ++ return -1; ++ } ++ ++ if (!size) { ++ return -1; ++ } ++ ++ section = memory_region_find(get_system_memory(), start, size); ++ mr = section.mr; ++ if (!mr) { ++ return -1; ++ } ++ ++ if (!memory_region_has_guest_memfd(mr)) { ++ error_report("Converting non guest_memfd backed memory region " ++ "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s", ++ start, size, to_private ? "private" : "shared"); ++ goto out_unref; ++ } ++ ++ if (to_private) { ++ ret = kvm_set_memory_attributes_private(start, size); ++ } else { ++ ret = kvm_set_memory_attributes_shared(start, size); ++ } ++ if (ret) { ++ goto out_unref; ++ } ++ ++ addr = memory_region_get_ram_ptr(mr) + section.offset_within_region; ++ rb = qemu_ram_block_from_host(addr, false, &offset); ++ ++ if (to_private) { ++ if (rb->page_size != qemu_real_host_page_size()) { ++ /* ++ * shared memory is backed by hugetlb, which is supposed to be ++ * pre-allocated and doesn't need to be discarded ++ */ ++ goto out_unref; ++ } ++ ret = ram_block_discard_range(rb, offset, size); ++ } else { ++ ret = ram_block_discard_guest_memfd_range(rb, offset, size); ++ } ++ ++out_unref: ++ memory_region_unref(mr); ++ return ret; ++} ++ + int kvm_cpu_exec(CPUState *cpu) + { + struct kvm_run *run = cpu->kvm_run; +@@ -2967,18 +3030,20 @@ int kvm_cpu_exec(CPUState *cpu) + ret = EXCP_INTERRUPT; + break; + } +- fprintf(stderr, "error: kvm run failed %s\n", +- strerror(-run_ret)); ++ if (!(run_ret == -EFAULT && run->exit_reason == KVM_EXIT_MEMORY_FAULT)) { ++ fprintf(stderr, "error: kvm run failed %s\n", ++ strerror(-run_ret)); + #ifdef TARGET_PPC +- if (run_ret == -EBUSY) { +- fprintf(stderr, +- "This is probably because your SMT is enabled.\n" +- "VCPU can only run on primary threads with all " +- "secondary threads offline.\n"); +- } ++ if (run_ret == -EBUSY) { ++ fprintf(stderr, ++ "This is probably because your SMT is enabled.\n" ++ "VCPU can only run on primary threads with all " ++ "secondary threads offline.\n"); ++ } + #endif +- ret = -1; +- break; ++ ret = -1; ++ break; ++ } + } + + trace_kvm_run_exit(cpu->cpu_index, run->exit_reason); +@@ -3061,6 +3126,19 @@ int kvm_cpu_exec(CPUState *cpu) + break; + } + break; ++ case KVM_EXIT_MEMORY_FAULT: ++ trace_kvm_memory_fault(run->memory_fault.gpa, ++ run->memory_fault.size, ++ run->memory_fault.flags); ++ if (run->memory_fault.flags & ~KVM_MEMORY_EXIT_FLAG_PRIVATE) { ++ error_report("KVM_EXIT_MEMORY_FAULT: Unknown flag 0x%" PRIx64, ++ (uint64_t)run->memory_fault.flags); ++ ret = -1; ++ break; ++ } ++ ret = kvm_convert_memory(run->memory_fault.gpa, run->memory_fault.size, ++ run->memory_fault.flags & KVM_MEMORY_EXIT_FLAG_PRIVATE); ++ break; + default: + ret = kvm_arch_handle_exit(cpu, run); + break; +diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events +index e8c52cb9e7..681ccb667d 100644 +--- a/accel/kvm/trace-events ++++ b/accel/kvm/trace-events +@@ -31,3 +31,5 @@ kvm_cpu_exec(void) "" + kvm_interrupt_exit_request(void) "" + kvm_io_window_exit(void) "" + kvm_run_exit_system_event(int cpu_index, uint32_t event_type) "cpu_index %d, system_even_type %"PRIu32 ++kvm_convert_memory(uint64_t start, uint64_t size, const char *msg) "start 0x%" PRIx64 " size 0x%" PRIx64 " %s" ++kvm_memory_fault(uint64_t start, uint64_t size, uint64_t flags) "start 0x%" PRIx64 " size 0x%" PRIx64 " flags 0x%" PRIx64 +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 9e4ab7ae89..74f23dff9c 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -542,4 +542,6 @@ int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp); + int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); + int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); + ++int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private); ++ + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch b/SOURCES/kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch new file mode 100644 index 0000000..8f9756b --- /dev/null +++ b/SOURCES/kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch @@ -0,0 +1,56 @@ +From f9dc55dd179bb534d589af371c5c2a7886bd461e Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:11 -0500 +Subject: [PATCH 030/100] kvm/memory: Make memory type private by default if it + has guest memfd backend + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [30/91] 5e21edf844b5629ee32c4075843b028561b97ae2 (bonzini/rhel-qemu-kvm) + +KVM side leaves the memory to shared by default, which may incur the +overhead of paging conversion on the first visit of each page. Because +the expectation is that page is likely to private for the VMs that +require private memory (has guest memfd). + +Explicitly set the memory to private when memory region has valid +guest memfd backend. + +Signed-off-by: Xiaoyao Li +Signed-off-by: Michael Roth +Message-ID: <20240320083945.991426-16-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit bd3bcf6962b664ca3bf9c60fdcc4534e8e3d0641) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 5ef55e4dd7..3f99efc8cc 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -1431,6 +1431,16 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + strerror(-err)); + abort(); + } ++ ++ if (memory_region_has_guest_memfd(mr)) { ++ err = kvm_set_memory_attributes_private(start_addr, slot_size); ++ if (err) { ++ error_report("%s: failed to set memory attribute private: %s", ++ __func__, strerror(-err)); ++ exit(1); ++ } ++ } ++ + start_addr += slot_size; + ram_start_offset += slot_size; + ram += slot_size; +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch b/SOURCES/kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch new file mode 100644 index 0000000..7b578b5 --- /dev/null +++ b/SOURCES/kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch @@ -0,0 +1,61 @@ +From aeaa7061139202448d466b7e18682081f9cd2097 Mon Sep 17 00:00:00 2001 +From: Isaku Yamahata +Date: Thu, 29 Feb 2024 01:36:54 -0500 +Subject: [PATCH 035/100] kvm/tdx: Don't complain when converting vMMIO region + to shared + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [35/91] c42870771d7af5badc2e10d42be9b5620d72f95d (bonzini/rhel-qemu-kvm) + +Because vMMIO region needs to be shared region, guest TD may explicitly +convert such region from private to shared. Don't complain such +conversion. + +Signed-off-by: Isaku Yamahata +Signed-off-by: Xiaoyao Li +Message-ID: <20240229063726.610065-34-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c5d9425ef4da9f43fc0903905ad415456d1ab843) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 19 ++++++++++++++++--- + 1 file changed, 16 insertions(+), 3 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 09164e346c..6efaff90a7 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2927,9 +2927,22 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) + } + + if (!memory_region_has_guest_memfd(mr)) { +- error_report("Converting non guest_memfd backed memory region " +- "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s", +- start, size, to_private ? "private" : "shared"); ++ /* ++ * Because vMMIO region must be shared, guest TD may convert vMMIO ++ * region to shared explicitly. Don't complain such case. See ++ * memory_region_type() for checking if the region is MMIO region. ++ */ ++ if (!to_private && ++ !memory_region_is_ram(mr) && ++ !memory_region_is_ram_device(mr) && ++ !memory_region_is_rom(mr) && ++ !memory_region_is_romd(mr)) { ++ ret = 0; ++ } else { ++ error_report("Convert non guest_memfd backed memory region " ++ "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s", ++ start, size, to_private ? "private" : "shared"); ++ } + goto out_unref; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch b/SOURCES/kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch new file mode 100644 index 0000000..c0f2bc6 --- /dev/null +++ b/SOURCES/kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch @@ -0,0 +1,62 @@ +From 2b2dfff3e383c99d0f759a8c12659d1a0ce50e8e Mon Sep 17 00:00:00 2001 +From: Isaku Yamahata +Date: Thu, 29 Feb 2024 01:36:55 -0500 +Subject: [PATCH 036/100] kvm/tdx: Ignore memory conversion to shared of + unassigned region + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [36/91] 84515b9dcfc2e07b272bb2477acf6430e9d33f28 (bonzini/rhel-qemu-kvm) + +TDX requires vMMIO region to be shared. For KVM, MMIO region is the region +which kvm memslot isn't assigned to (except in-kernel emulation). +qemu has the memory region for vMMIO at each device level. + +While OVMF issues MapGPA(to-shared) conservatively on 32bit PCI MMIO +region, qemu doesn't find corresponding vMMIO region because it's before +PCI device allocation and memory_region_find() finds the device region, not +PCI bus region. It's safe to ignore MapGPA(to-shared) because when guest +accesses those region they use GPA with shared bit set for vMMIO. Ignore +memory conversion request of non-assigned region to shared and return +success. Otherwise OVMF is confused and panics there. + +Signed-off-by: Isaku Yamahata +Signed-off-by: Xiaoyao Li +Message-ID: <20240229063726.610065-35-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 565f4768bb9cf840b2f8cca41483bb91aa3196a3) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 6efaff90a7..f6268855b4 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2923,6 +2923,18 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) + section = memory_region_find(get_system_memory(), start, size); + mr = section.mr; + if (!mr) { ++ /* ++ * Ignore converting non-assigned region to shared. ++ * ++ * TDX requires vMMIO region to be shared to inject #VE to guest. ++ * OVMF issues conservatively MapGPA(shared) on 32bit PCI MMIO region, ++ * and vIO-APIC 0xFEC00000 4K page. ++ * OVMF assigns 32bit PCI MMIO region to ++ * [top of low memory: typically 2GB=0xC000000, 0xFC00000) ++ */ ++ if (!to_private) { ++ return 0; ++ } + return -1; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch b/SOURCES/kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch index 391e618..1aba040 100644 --- a/SOURCES/kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch +++ b/SOURCES/kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch @@ -1,13 +1,14 @@ -From 9d00965910d5a7818ffe55b18d3e9dd4c7210e1b Mon Sep 17 00:00:00 2001 +From 287ebf9f0b8a62dc49fd7802472c1ae57f653e44 Mon Sep 17 00:00:00 2001 From: Prasad Pandit Date: Thu, 25 Apr 2024 12:34:12 +0530 -Subject: [PATCH 4/4] linux-aio: add IO_CMD_FDSYNC command support +Subject: [PATCH 1/5] linux-aio: add IO_CMD_FDSYNC command support RH-Author: Prasad Pandit -RH-MergeRequest: 378: linux-aio: add IO_CMD_FDSYNC command support -RH-Jira: RHEL-43261 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] 3b80d4a162aad1a87322078a7d7b060a9496035b +RH-MergeRequest: 249: linux-aio: add IO_CMD_FDSYNC command support +RH-Jira: RHEL-42411 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf +RH-Commit: [1/1] 9beff6506d2eca7741b1c11b5acdc19b635c7c75 (pjp/cs-qemu-kvm) Libaio defines IO_CMD_FDSYNC command to sync all outstanding asynchronous I/O operations, by flushing out file data to the @@ -18,7 +19,7 @@ pthreads, and destroying these pthreads results in TLB flushes. In a real-time guest environment, TLB flushes cause a latency spike. This patch helps to avoid such spikes. -Jira: https://issues.redhat.com/browse/RHEL-43261 +Jira: https://issues.redhat.com/browse/RHEL-42411 Reviewed-by: Stefan Hajnoczi Signed-off-by: Prasad Pandit Message-ID: <20240425070412.37248-1-ppandit@redhat.com> @@ -110,7 +111,7 @@ index ec05d946f3..e3b5ec9aba 100644 + return (ret == -EINVAL) ? false : true; +} diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h -index 0f63c2800c..3166903a56 100644 +index 20e000b8ef..626706827f 100644 --- a/include/block/raw-aio.h +++ b/include/block/raw-aio.h @@ -60,6 +60,7 @@ void laio_cleanup(LinuxAioState *s); diff --git a/SOURCES/kvm-linux-headers-Update-to-current-kvm-next.patch b/SOURCES/kvm-linux-headers-Update-to-current-kvm-next.patch new file mode 100644 index 0000000..2fd35fd --- /dev/null +++ b/SOURCES/kvm-linux-headers-Update-to-current-kvm-next.patch @@ -0,0 +1,189 @@ +From c3e2bc3319882c16fa36eafc7a613073746cfc8b Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:14 -0500 +Subject: [PATCH 052/100] linux-headers: Update to current kvm/next + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [52/91] df77e867072f60110b8387a54ba2db6226b35007 (bonzini/rhel-qemu-kvm) + +This updates kernel headers to commit 6f627b425378 ("KVM: SVM: Add module +parameter to enable SEV-SNP", 2024-05-12). The SNP host patches will +be included in Linux 6.11, to be released next July. + +Also brings in an linux-headers/linux/vhost.h fix from v6.9-rc4. + +Co-developed-by: Michael Roth +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-3-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 5f69e42da5b40a2213f4db70ca461f554abca686) +Signed-off-by: Paolo Bonzini +--- + linux-headers/asm-loongarch/kvm.h | 4 +++ + linux-headers/asm-riscv/kvm.h | 1 + + linux-headers/asm-x86/kvm.h | 52 ++++++++++++++++++++++++++++++- + linux-headers/linux/vhost.h | 15 ++++----- + 4 files changed, 64 insertions(+), 8 deletions(-) + +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +index 109785922c..f9abef3823 100644 +--- a/linux-headers/asm-loongarch/kvm.h ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -17,6 +17,8 @@ + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_DIRTY_LOG_PAGE_OFFSET 64 + ++#define KVM_GUESTDBG_USE_SW_BP 0x00010000 ++ + /* + * for KVM_GET_REGS and KVM_SET_REGS + */ +@@ -72,6 +74,8 @@ struct kvm_fpu { + + #define KVM_REG_LOONGARCH_COUNTER (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 1) + #define KVM_REG_LOONGARCH_VCPU_RESET (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 2) ++/* Debugging: Special instruction for software breakpoint */ ++#define KVM_REG_LOONGARCH_DEBUG_INST (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3) + + #define LOONGARCH_REG_SHIFT 3 + #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) +diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h +index b1c503c295..e878e7cc39 100644 +--- a/linux-headers/asm-riscv/kvm.h ++++ b/linux-headers/asm-riscv/kvm.h +@@ -167,6 +167,7 @@ enum KVM_RISCV_ISA_EXT_ID { + KVM_RISCV_ISA_EXT_ZFA, + KVM_RISCV_ISA_EXT_ZTSO, + KVM_RISCV_ISA_EXT_ZACAS, ++ KVM_RISCV_ISA_EXT_SSCOFPMF, + KVM_RISCV_ISA_EXT_MAX, + }; + +diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h +index 31c95c2dfe..1c8f918234 100644 +--- a/linux-headers/asm-x86/kvm.h ++++ b/linux-headers/asm-x86/kvm.h +@@ -695,6 +695,11 @@ enum sev_cmd_id { + /* Second time is the charm; improved versions of the above ioctls. */ + KVM_SEV_INIT2, + ++ /* SNP-specific commands */ ++ KVM_SEV_SNP_LAUNCH_START = 100, ++ KVM_SEV_SNP_LAUNCH_UPDATE, ++ KVM_SEV_SNP_LAUNCH_FINISH, ++ + KVM_SEV_NR_MAX, + }; + +@@ -709,7 +714,9 @@ struct kvm_sev_cmd { + struct kvm_sev_init { + __u64 vmsa_features; + __u32 flags; +- __u32 pad[9]; ++ __u16 ghcb_version; ++ __u16 pad1; ++ __u32 pad2[8]; + }; + + struct kvm_sev_launch_start { +@@ -820,6 +827,48 @@ struct kvm_sev_receive_update_data { + __u32 pad2; + }; + ++struct kvm_sev_snp_launch_start { ++ __u64 policy; ++ __u8 gosvw[16]; ++ __u16 flags; ++ __u8 pad0[6]; ++ __u64 pad1[4]; ++}; ++ ++/* Kept in sync with firmware values for simplicity. */ ++#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1 ++#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3 ++#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4 ++#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5 ++#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6 ++ ++struct kvm_sev_snp_launch_update { ++ __u64 gfn_start; ++ __u64 uaddr; ++ __u64 len; ++ __u8 type; ++ __u8 pad0; ++ __u16 flags; ++ __u32 pad1; ++ __u64 pad2[4]; ++}; ++ ++#define KVM_SEV_SNP_ID_BLOCK_SIZE 96 ++#define KVM_SEV_SNP_ID_AUTH_SIZE 4096 ++#define KVM_SEV_SNP_FINISH_DATA_SIZE 32 ++ ++struct kvm_sev_snp_launch_finish { ++ __u64 id_block_uaddr; ++ __u64 id_auth_uaddr; ++ __u8 id_block_en; ++ __u8 auth_key_en; ++ __u8 vcek_disabled; ++ __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE]; ++ __u8 pad0[3]; ++ __u16 flags; ++ __u64 pad1[4]; ++}; ++ + #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) + #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) + +@@ -870,5 +919,6 @@ struct kvm_hyperv_eventfd { + #define KVM_X86_SW_PROTECTED_VM 1 + #define KVM_X86_SEV_VM 2 + #define KVM_X86_SEV_ES_VM 3 ++#define KVM_X86_SNP_VM 4 + + #endif /* _ASM_X86_KVM_H */ +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index bea6973906..b95dd84eef 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -179,12 +179,6 @@ + /* Get the config size */ + #define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + +-/* Get the count of all virtqueues */ +-#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) +- +-/* Get the number of virtqueue groups. */ +-#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) +- + /* Get the number of address spaces. */ + #define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int) + +@@ -228,10 +222,17 @@ + #define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ + struct vhost_vring_state) + ++ ++/* Get the count of all virtqueues */ ++#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) ++ ++/* Get the number of virtqueue groups. */ ++#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) ++ + /* Get the queue size of a specific virtqueue. + * userspace set the vring index in vhost_vring_state.index + * kernel set the queue size in vhost_vring_state.num + */ +-#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x80, \ ++#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \ + struct vhost_vring_state) + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-linux-headers-update-to-current-kvm-next.patch b/SOURCES/kvm-linux-headers-update-to-current-kvm-next.patch new file mode 100644 index 0000000..4c3dd73 --- /dev/null +++ b/SOURCES/kvm-linux-headers-update-to-current-kvm-next.patch @@ -0,0 +1,2471 @@ +From 530296e1669c9730f261a269d5b911ea56dfcce7 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 23 Apr 2024 11:46:47 +0200 +Subject: [PATCH 017/100] linux-headers: update to current kvm/next + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [17/91] 5660d4967f10a84802de16c24540e95095eaffd5 (bonzini/rhel-qemu-kvm) + +Signed-off-by: Paolo Bonzini +(cherry picked from commit ab0c7fb22b56523f24d6e127cd4d10ecff67bf85) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 8 - + include/standard-headers/asm-x86/bootparam.h | 17 +- + include/standard-headers/asm-x86/kvm_para.h | 3 +- + include/standard-headers/asm-x86/setup_data.h | 83 +++ + include/standard-headers/linux/ethtool.h | 48 ++ + include/standard-headers/linux/fuse.h | 39 +- + .../linux/input-event-codes.h | 1 + + include/standard-headers/linux/virtio_gpu.h | 2 + + include/standard-headers/linux/virtio_pci.h | 10 +- + include/standard-headers/linux/virtio_snd.h | 154 ++++ + linux-headers/asm-arm64/kvm.h | 15 +- + linux-headers/asm-arm64/sve_context.h | 11 + + linux-headers/asm-generic/bitsperlong.h | 4 + + linux-headers/asm-loongarch/kvm.h | 2 - + linux-headers/asm-mips/kvm.h | 2 - + linux-headers/asm-powerpc/kvm.h | 45 +- + linux-headers/asm-riscv/kvm.h | 3 +- + linux-headers/asm-s390/kvm.h | 315 +++++++- + linux-headers/asm-x86/kvm.h | 328 ++++++++- + linux-headers/linux/bits.h | 15 + + linux-headers/linux/kvm.h | 689 +----------------- + linux-headers/linux/psp-sev.h | 59 ++ + linux-headers/linux/vhost.h | 7 + + 23 files changed, 1120 insertions(+), 740 deletions(-) + create mode 100644 include/standard-headers/asm-x86/setup_data.h + create mode 100644 linux-headers/linux/bits.h + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index ffbda48917..84a4801977 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -679,14 +679,6 @@ DeviceState *ioapic_init_secondary(GSIState *gsi_state) + return dev; + } + +-struct setup_data { +- uint64_t next; +- uint32_t type; +- uint32_t len; +- uint8_t data[]; +-} __attribute__((packed)); +- +- + /* + * The entry point into the kernel for PVH boot is different from + * the native entry point. The PVH entry is defined by the x86/HVM +diff --git a/include/standard-headers/asm-x86/bootparam.h b/include/standard-headers/asm-x86/bootparam.h +index 0b06d2bff1..b582a105c0 100644 +--- a/include/standard-headers/asm-x86/bootparam.h ++++ b/include/standard-headers/asm-x86/bootparam.h +@@ -2,21 +2,7 @@ + #ifndef _ASM_X86_BOOTPARAM_H + #define _ASM_X86_BOOTPARAM_H + +-/* setup_data/setup_indirect types */ +-#define SETUP_NONE 0 +-#define SETUP_E820_EXT 1 +-#define SETUP_DTB 2 +-#define SETUP_PCI 3 +-#define SETUP_EFI 4 +-#define SETUP_APPLE_PROPERTIES 5 +-#define SETUP_JAILHOUSE 6 +-#define SETUP_CC_BLOB 7 +-#define SETUP_IMA 8 +-#define SETUP_RNG_SEED 9 +-#define SETUP_ENUM_MAX SETUP_RNG_SEED +- +-#define SETUP_INDIRECT (1<<31) +-#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) ++#include "standard-headers/asm-x86/setup_data.h" + + /* ram_size flags */ + #define RAMDISK_IMAGE_START_MASK 0x07FF +@@ -38,6 +24,7 @@ + #define XLF_EFI_KEXEC (1<<4) + #define XLF_5LEVEL (1<<5) + #define XLF_5LEVEL_ENABLED (1<<6) ++#define XLF_MEM_ENCRYPTION (1<<7) + + + #endif /* _ASM_X86_BOOTPARAM_H */ +diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h +index f0235e58a1..9a011d20f0 100644 +--- a/include/standard-headers/asm-x86/kvm_para.h ++++ b/include/standard-headers/asm-x86/kvm_para.h +@@ -92,7 +92,7 @@ struct kvm_clock_pairing { + #define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3) + + /* MSR_KVM_ASYNC_PF_INT */ +-#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0) ++#define KVM_ASYNC_PF_VEC_MASK __GENMASK(7, 0) + + /* MSR_KVM_MIGRATION_CONTROL */ + #define KVM_MIGRATION_READY (1 << 0) +@@ -142,7 +142,6 @@ struct kvm_vcpu_pv_apf_data { + uint32_t token; + + uint8_t pad[56]; +- uint32_t enabled; + }; + + #define KVM_PV_EOI_BIT 0 +diff --git a/include/standard-headers/asm-x86/setup_data.h b/include/standard-headers/asm-x86/setup_data.h +new file mode 100644 +index 0000000000..09355f54c5 +--- /dev/null ++++ b/include/standard-headers/asm-x86/setup_data.h +@@ -0,0 +1,83 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef _ASM_X86_SETUP_DATA_H ++#define _ASM_X86_SETUP_DATA_H ++ ++/* setup_data/setup_indirect types */ ++#define SETUP_NONE 0 ++#define SETUP_E820_EXT 1 ++#define SETUP_DTB 2 ++#define SETUP_PCI 3 ++#define SETUP_EFI 4 ++#define SETUP_APPLE_PROPERTIES 5 ++#define SETUP_JAILHOUSE 6 ++#define SETUP_CC_BLOB 7 ++#define SETUP_IMA 8 ++#define SETUP_RNG_SEED 9 ++#define SETUP_ENUM_MAX SETUP_RNG_SEED ++ ++#define SETUP_INDIRECT (1<<31) ++#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) ++ ++#ifndef __ASSEMBLY__ ++ ++#include "standard-headers/linux/types.h" ++ ++/* extensible setup data list node */ ++struct setup_data { ++ uint64_t next; ++ uint32_t type; ++ uint32_t len; ++ uint8_t data[]; ++}; ++ ++/* extensible setup indirect data node */ ++struct setup_indirect { ++ uint32_t type; ++ uint32_t reserved; /* Reserved, must be set to zero. */ ++ uint64_t len; ++ uint64_t addr; ++}; ++ ++/* ++ * The E820 memory region entry of the boot protocol ABI: ++ */ ++struct boot_e820_entry { ++ uint64_t addr; ++ uint64_t size; ++ uint32_t type; ++} QEMU_PACKED; ++ ++/* ++ * The boot loader is passing platform information via this Jailhouse-specific ++ * setup data structure. ++ */ ++struct jailhouse_setup_data { ++ struct { ++ uint16_t version; ++ uint16_t compatible_version; ++ } QEMU_PACKED hdr; ++ struct { ++ uint16_t pm_timer_address; ++ uint16_t num_cpus; ++ uint64_t pci_mmconfig_base; ++ uint32_t tsc_khz; ++ uint32_t apic_khz; ++ uint8_t standard_ioapic; ++ uint8_t cpu_ids[255]; ++ } QEMU_PACKED v1; ++ struct { ++ uint32_t flags; ++ } QEMU_PACKED v2; ++} QEMU_PACKED; ++ ++/* ++ * IMA buffer setup data information from the previous kernel during kexec ++ */ ++struct ima_setup_data { ++ uint64_t addr; ++ uint64_t size; ++} QEMU_PACKED; ++ ++#endif /* __ASSEMBLY__ */ ++ ++#endif /* _ASM_X86_SETUP_DATA_H */ +diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h +index dfb54eff6f..01503784d2 100644 +--- a/include/standard-headers/linux/ethtool.h ++++ b/include/standard-headers/linux/ethtool.h +@@ -2023,6 +2023,53 @@ static inline int ethtool_validate_duplex(uint8_t duplex) + #define IPV4_FLOW 0x10 /* hash only */ + #define IPV6_FLOW 0x11 /* hash only */ + #define ETHER_FLOW 0x12 /* spec only (ether_spec) */ ++ ++/* Used for GTP-U IPv4 and IPv6. ++ * The format of GTP packets only includes ++ * elements such as TEID and GTP version. ++ * It is primarily intended for data communication of the UE. ++ */ ++#define GTPU_V4_FLOW 0x13 /* hash only */ ++#define GTPU_V6_FLOW 0x14 /* hash only */ ++ ++/* Use for GTP-C IPv4 and v6. ++ * The format of these GTP packets does not include TEID. ++ * Primarily expected to be used for communication ++ * to create sessions for UE data communication, ++ * commonly referred to as CSR (Create Session Request). ++ */ ++#define GTPC_V4_FLOW 0x15 /* hash only */ ++#define GTPC_V6_FLOW 0x16 /* hash only */ ++ ++/* Use for GTP-C IPv4 and v6. ++ * Unlike GTPC_V4_FLOW, the format of these GTP packets includes TEID. ++ * After session creation, it becomes this packet. ++ * This is mainly used for requests to realize UE handover. ++ */ ++#define GTPC_TEID_V4_FLOW 0x17 /* hash only */ ++#define GTPC_TEID_V6_FLOW 0x18 /* hash only */ ++ ++/* Use for GTP-U and extended headers for the PSC (PDU Session Container). ++ * The format of these GTP packets includes TEID and QFI. ++ * In 5G communication using UPF (User Plane Function), ++ * data communication with this extended header is performed. ++ */ ++#define GTPU_EH_V4_FLOW 0x19 /* hash only */ ++#define GTPU_EH_V6_FLOW 0x1a /* hash only */ ++ ++/* Use for GTP-U IPv4 and v6 PSC (PDU Session Container) extended headers. ++ * This differs from GTPU_EH_V(4|6)_FLOW in that it is distinguished by ++ * UL/DL included in the PSC. ++ * There are differences in the data included based on Downlink/Uplink, ++ * and can be used to distinguish packets. ++ * The functions described so far are useful when you want to ++ * handle communication from the mobile network in UPF, PGW, etc. ++ */ ++#define GTPU_UL_V4_FLOW 0x1b /* hash only */ ++#define GTPU_UL_V6_FLOW 0x1c /* hash only */ ++#define GTPU_DL_V4_FLOW 0x1d /* hash only */ ++#define GTPU_DL_V6_FLOW 0x1e /* hash only */ ++ + /* Flag to enable additional fields in struct ethtool_rx_flow_spec */ + #define FLOW_EXT 0x80000000 + #define FLOW_MAC_EXT 0x40000000 +@@ -2037,6 +2084,7 @@ static inline int ethtool_validate_duplex(uint8_t duplex) + #define RXH_IP_DST (1 << 5) + #define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */ + #define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */ ++#define RXH_GTP_TEID (1 << 8) /* teid in case of GTP */ + #define RXH_DISCARD (1 << 31) + + #define RX_CLS_FLOW_DISC 0xffffffffffffffffULL +diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h +index fc0dcd10ae..bac9dbc49f 100644 +--- a/include/standard-headers/linux/fuse.h ++++ b/include/standard-headers/linux/fuse.h +@@ -211,6 +211,12 @@ + * 7.39 + * - add FUSE_DIRECT_IO_ALLOW_MMAP + * - add FUSE_STATX and related structures ++ * ++ * 7.40 ++ * - add max_stack_depth to fuse_init_out, add FUSE_PASSTHROUGH init flag ++ * - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag ++ * - add FUSE_NO_EXPORT_SUPPORT init flag ++ * - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag + */ + + #ifndef _LINUX_FUSE_H +@@ -242,7 +248,7 @@ + #define FUSE_KERNEL_VERSION 7 + + /** Minor version number of this interface */ +-#define FUSE_KERNEL_MINOR_VERSION 39 ++#define FUSE_KERNEL_MINOR_VERSION 40 + + /** The node ID of the root inode */ + #define FUSE_ROOT_ID 1 +@@ -349,6 +355,7 @@ struct fuse_file_lock { + * FOPEN_STREAM: the file is stream-like (no file position at all) + * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) + * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode ++ * FOPEN_PASSTHROUGH: passthrough read/write io for this open file + */ + #define FOPEN_DIRECT_IO (1 << 0) + #define FOPEN_KEEP_CACHE (1 << 1) +@@ -357,6 +364,7 @@ struct fuse_file_lock { + #define FOPEN_STREAM (1 << 4) + #define FOPEN_NOFLUSH (1 << 5) + #define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6) ++#define FOPEN_PASSTHROUGH (1 << 7) + + /** + * INIT request/reply flags +@@ -406,6 +414,9 @@ struct fuse_file_lock { + * symlink and mknod (single group that matches parent) + * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation + * FUSE_DIRECT_IO_ALLOW_MMAP: allow shared mmap in FOPEN_DIRECT_IO mode. ++ * FUSE_NO_EXPORT_SUPPORT: explicitly disable export support ++ * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit ++ * of the request ID indicates resend requests + */ + #define FUSE_ASYNC_READ (1 << 0) + #define FUSE_POSIX_LOCKS (1 << 1) +@@ -445,6 +456,9 @@ struct fuse_file_lock { + #define FUSE_CREATE_SUPP_GROUP (1ULL << 34) + #define FUSE_HAS_EXPIRE_ONLY (1ULL << 35) + #define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36) ++#define FUSE_PASSTHROUGH (1ULL << 37) ++#define FUSE_NO_EXPORT_SUPPORT (1ULL << 38) ++#define FUSE_HAS_RESEND (1ULL << 39) + + /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ + #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP +@@ -631,6 +645,7 @@ enum fuse_notify_code { + FUSE_NOTIFY_STORE = 4, + FUSE_NOTIFY_RETRIEVE = 5, + FUSE_NOTIFY_DELETE = 6, ++ FUSE_NOTIFY_RESEND = 7, + FUSE_NOTIFY_CODE_MAX, + }; + +@@ -757,7 +772,7 @@ struct fuse_create_in { + struct fuse_open_out { + uint64_t fh; + uint32_t open_flags; +- uint32_t padding; ++ int32_t backing_id; + }; + + struct fuse_release_in { +@@ -873,7 +888,8 @@ struct fuse_init_out { + uint16_t max_pages; + uint16_t map_alignment; + uint32_t flags2; +- uint32_t unused[7]; ++ uint32_t max_stack_depth; ++ uint32_t unused[6]; + }; + + #define CUSE_INIT_INFO_MAX 4096 +@@ -956,6 +972,14 @@ struct fuse_fallocate_in { + uint32_t padding; + }; + ++/** ++ * FUSE request unique ID flag ++ * ++ * Indicates whether this is a resend request. The receiver should handle this ++ * request accordingly. ++ */ ++#define FUSE_UNIQUE_RESEND (1ULL << 63) ++ + struct fuse_in_header { + uint32_t len; + uint32_t opcode; +@@ -1045,9 +1069,18 @@ struct fuse_notify_retrieve_in { + uint64_t dummy4; + }; + ++struct fuse_backing_map { ++ int32_t fd; ++ uint32_t flags; ++ uint64_t padding; ++}; ++ + /* Device ioctls: */ + #define FUSE_DEV_IOC_MAGIC 229 + #define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t) ++#define FUSE_DEV_IOC_BACKING_OPEN _IOW(FUSE_DEV_IOC_MAGIC, 1, \ ++ struct fuse_backing_map) ++#define FUSE_DEV_IOC_BACKING_CLOSE _IOW(FUSE_DEV_IOC_MAGIC, 2, uint32_t) + + struct fuse_lseek_in { + uint64_t fh; +diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h +index f6bab08540..2221b0c383 100644 +--- a/include/standard-headers/linux/input-event-codes.h ++++ b/include/standard-headers/linux/input-event-codes.h +@@ -602,6 +602,7 @@ + + #define KEY_ALS_TOGGLE 0x230 /* Ambient light sensor */ + #define KEY_ROTATE_LOCK_TOGGLE 0x231 /* Display rotation lock */ ++#define KEY_REFRESH_RATE_TOGGLE 0x232 /* Display refresh rate toggle */ + + #define KEY_BUTTONCONFIG 0x240 /* AL Button Configuration */ + #define KEY_TASKMANAGER 0x241 /* AL Task/Project Manager */ +diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h +index 2da48d3d4c..2db643ed8f 100644 +--- a/include/standard-headers/linux/virtio_gpu.h ++++ b/include/standard-headers/linux/virtio_gpu.h +@@ -309,6 +309,8 @@ struct virtio_gpu_cmd_submit { + + #define VIRTIO_GPU_CAPSET_VIRGL 1 + #define VIRTIO_GPU_CAPSET_VIRGL2 2 ++/* 3 is reserved for gfxstream */ ++#define VIRTIO_GPU_CAPSET_VENUS 4 + + /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ + struct virtio_gpu_get_capset_info { +diff --git a/include/standard-headers/linux/virtio_pci.h b/include/standard-headers/linux/virtio_pci.h +index 3e2bc2c97e..4010216103 100644 +--- a/include/standard-headers/linux/virtio_pci.h ++++ b/include/standard-headers/linux/virtio_pci.h +@@ -240,7 +240,7 @@ struct virtio_pci_cfg_cap { + #define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ 0x5 + #define VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO 0x6 + +-struct QEMU_PACKED virtio_admin_cmd_hdr { ++struct virtio_admin_cmd_hdr { + uint16_t opcode; + /* + * 1 - SR-IOV +@@ -252,20 +252,20 @@ struct QEMU_PACKED virtio_admin_cmd_hdr { + uint64_t group_member_id; + }; + +-struct QEMU_PACKED virtio_admin_cmd_status { ++struct virtio_admin_cmd_status { + uint16_t status; + uint16_t status_qualifier; + /* Unused, reserved for future extensions. */ + uint8_t reserved2[4]; + }; + +-struct QEMU_PACKED virtio_admin_cmd_legacy_wr_data { ++struct virtio_admin_cmd_legacy_wr_data { + uint8_t offset; /* Starting offset of the register(s) to write. */ + uint8_t reserved[7]; + uint8_t registers[]; + }; + +-struct QEMU_PACKED virtio_admin_cmd_legacy_rd_data { ++struct virtio_admin_cmd_legacy_rd_data { + uint8_t offset; /* Starting offset of the register(s) to read. */ + }; + +@@ -275,7 +275,7 @@ struct QEMU_PACKED virtio_admin_cmd_legacy_rd_data { + + #define VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO 4 + +-struct QEMU_PACKED virtio_admin_cmd_notify_info_data { ++struct virtio_admin_cmd_notify_info_data { + uint8_t flags; /* 0 = end of list, 1 = owner device, 2 = member device */ + uint8_t bar; /* BAR of the member or the owner device */ + uint8_t padding[6]; +diff --git a/include/standard-headers/linux/virtio_snd.h b/include/standard-headers/linux/virtio_snd.h +index 1af96b9fc6..860f12e0a4 100644 +--- a/include/standard-headers/linux/virtio_snd.h ++++ b/include/standard-headers/linux/virtio_snd.h +@@ -7,6 +7,14 @@ + + #include "standard-headers/linux/virtio_types.h" + ++/******************************************************************************* ++ * FEATURE BITS ++ */ ++enum { ++ /* device supports control elements */ ++ VIRTIO_SND_F_CTLS = 0 ++}; ++ + /******************************************************************************* + * CONFIGURATION SPACE + */ +@@ -17,6 +25,8 @@ struct virtio_snd_config { + uint32_t streams; + /* # of available channel maps */ + uint32_t chmaps; ++ /* # of available control elements */ ++ uint32_t controls; + }; + + enum { +@@ -55,6 +65,15 @@ enum { + /* channel map control request types */ + VIRTIO_SND_R_CHMAP_INFO = 0x0200, + ++ /* control element request types */ ++ VIRTIO_SND_R_CTL_INFO = 0x0300, ++ VIRTIO_SND_R_CTL_ENUM_ITEMS, ++ VIRTIO_SND_R_CTL_READ, ++ VIRTIO_SND_R_CTL_WRITE, ++ VIRTIO_SND_R_CTL_TLV_READ, ++ VIRTIO_SND_R_CTL_TLV_WRITE, ++ VIRTIO_SND_R_CTL_TLV_COMMAND, ++ + /* jack event types */ + VIRTIO_SND_EVT_JACK_CONNECTED = 0x1000, + VIRTIO_SND_EVT_JACK_DISCONNECTED, +@@ -63,6 +82,9 @@ enum { + VIRTIO_SND_EVT_PCM_PERIOD_ELAPSED = 0x1100, + VIRTIO_SND_EVT_PCM_XRUN, + ++ /* control element event types */ ++ VIRTIO_SND_EVT_CTL_NOTIFY = 0x1200, ++ + /* common status codes */ + VIRTIO_SND_S_OK = 0x8000, + VIRTIO_SND_S_BAD_MSG, +@@ -331,4 +353,136 @@ struct virtio_snd_chmap_info { + uint8_t positions[VIRTIO_SND_CHMAP_MAX_SIZE]; + }; + ++/******************************************************************************* ++ * CONTROL ELEMENTS MESSAGES ++ */ ++struct virtio_snd_ctl_hdr { ++ /* VIRTIO_SND_R_CTL_XXX */ ++ struct virtio_snd_hdr hdr; ++ /* 0 ... virtio_snd_config::controls - 1 */ ++ uint32_t control_id; ++}; ++ ++/* supported roles for control elements */ ++enum { ++ VIRTIO_SND_CTL_ROLE_UNDEFINED = 0, ++ VIRTIO_SND_CTL_ROLE_VOLUME, ++ VIRTIO_SND_CTL_ROLE_MUTE, ++ VIRTIO_SND_CTL_ROLE_GAIN ++}; ++ ++/* supported value types for control elements */ ++enum { ++ VIRTIO_SND_CTL_TYPE_BOOLEAN = 0, ++ VIRTIO_SND_CTL_TYPE_INTEGER, ++ VIRTIO_SND_CTL_TYPE_INTEGER64, ++ VIRTIO_SND_CTL_TYPE_ENUMERATED, ++ VIRTIO_SND_CTL_TYPE_BYTES, ++ VIRTIO_SND_CTL_TYPE_IEC958 ++}; ++ ++/* supported access rights for control elements */ ++enum { ++ VIRTIO_SND_CTL_ACCESS_READ = 0, ++ VIRTIO_SND_CTL_ACCESS_WRITE, ++ VIRTIO_SND_CTL_ACCESS_VOLATILE, ++ VIRTIO_SND_CTL_ACCESS_INACTIVE, ++ VIRTIO_SND_CTL_ACCESS_TLV_READ, ++ VIRTIO_SND_CTL_ACCESS_TLV_WRITE, ++ VIRTIO_SND_CTL_ACCESS_TLV_COMMAND ++}; ++ ++struct virtio_snd_ctl_info { ++ /* common header */ ++ struct virtio_snd_info hdr; ++ /* element role (VIRTIO_SND_CTL_ROLE_XXX) */ ++ uint32_t role; ++ /* element value type (VIRTIO_SND_CTL_TYPE_XXX) */ ++ uint32_t type; ++ /* element access right bit map (1 << VIRTIO_SND_CTL_ACCESS_XXX) */ ++ uint32_t access; ++ /* # of members in the element value */ ++ uint32_t count; ++ /* index for an element with a non-unique name */ ++ uint32_t index; ++ /* name identifier string for the element */ ++ uint8_t name[44]; ++ /* additional information about the element's value */ ++ union { ++ /* VIRTIO_SND_CTL_TYPE_INTEGER */ ++ struct { ++ /* minimum supported value */ ++ uint32_t min; ++ /* maximum supported value */ ++ uint32_t max; ++ /* fixed step size for value (0 = variable size) */ ++ uint32_t step; ++ } integer; ++ /* VIRTIO_SND_CTL_TYPE_INTEGER64 */ ++ struct { ++ /* minimum supported value */ ++ uint64_t min; ++ /* maximum supported value */ ++ uint64_t max; ++ /* fixed step size for value (0 = variable size) */ ++ uint64_t step; ++ } integer64; ++ /* VIRTIO_SND_CTL_TYPE_ENUMERATED */ ++ struct { ++ /* # of options supported for value */ ++ uint32_t items; ++ } enumerated; ++ } value; ++}; ++ ++struct virtio_snd_ctl_enum_item { ++ /* option name */ ++ uint8_t item[64]; ++}; ++ ++struct virtio_snd_ctl_iec958 { ++ /* AES/IEC958 channel status bits */ ++ uint8_t status[24]; ++ /* AES/IEC958 subcode bits */ ++ uint8_t subcode[147]; ++ /* nothing */ ++ uint8_t pad; ++ /* AES/IEC958 subframe bits */ ++ uint8_t dig_subframe[4]; ++}; ++ ++struct virtio_snd_ctl_value { ++ union { ++ /* VIRTIO_SND_CTL_TYPE_BOOLEAN|INTEGER value */ ++ uint32_t integer[128]; ++ /* VIRTIO_SND_CTL_TYPE_INTEGER64 value */ ++ uint64_t integer64[64]; ++ /* VIRTIO_SND_CTL_TYPE_ENUMERATED value (option indexes) */ ++ uint32_t enumerated[128]; ++ /* VIRTIO_SND_CTL_TYPE_BYTES value */ ++ uint8_t bytes[512]; ++ /* VIRTIO_SND_CTL_TYPE_IEC958 value */ ++ struct virtio_snd_ctl_iec958 iec958; ++ } value; ++}; ++ ++/* supported event reason types */ ++enum { ++ /* element's value has changed */ ++ VIRTIO_SND_CTL_EVT_MASK_VALUE = 0, ++ /* element's information has changed */ ++ VIRTIO_SND_CTL_EVT_MASK_INFO, ++ /* element's metadata has changed */ ++ VIRTIO_SND_CTL_EVT_MASK_TLV ++}; ++ ++struct virtio_snd_ctl_event { ++ /* VIRTIO_SND_EVT_CTL_NOTIFY */ ++ struct virtio_snd_hdr hdr; ++ /* 0 ... virtio_snd_config::controls - 1 */ ++ uint16_t control_id; ++ /* event reason bit map (1 << VIRTIO_SND_CTL_EVT_MASK_XXX) */ ++ uint16_t mask; ++}; ++ + #endif /* VIRTIO_SND_IF_H */ +diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h +index c59ea55cd8..2af9931ae9 100644 +--- a/linux-headers/asm-arm64/kvm.h ++++ b/linux-headers/asm-arm64/kvm.h +@@ -37,9 +37,7 @@ + #include + #include + +-#define __KVM_HAVE_GUEST_DEBUG + #define __KVM_HAVE_IRQ_LINE +-#define __KVM_HAVE_READONLY_MEM + #define __KVM_HAVE_VCPU_EVENTS + + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +@@ -76,11 +74,11 @@ struct kvm_regs { + + /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ + #define KVM_ARM_DEVICE_TYPE_SHIFT 0 +-#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ +- KVM_ARM_DEVICE_TYPE_SHIFT) ++#define KVM_ARM_DEVICE_TYPE_MASK __GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ ++ KVM_ARM_DEVICE_TYPE_SHIFT) + #define KVM_ARM_DEVICE_ID_SHIFT 16 +-#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ +- KVM_ARM_DEVICE_ID_SHIFT) ++#define KVM_ARM_DEVICE_ID_MASK __GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ ++ KVM_ARM_DEVICE_ID_SHIFT) + + /* Supported device IDs */ + #define KVM_ARM_DEVICE_VGIC_V2 0 +@@ -162,6 +160,11 @@ struct kvm_sync_regs { + __u64 device_irq_level; + }; + ++/* Bits for run->s.regs.device_irq_level */ ++#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) ++#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) ++#define KVM_ARM_DEV_PMU (1 << 2) ++ + /* + * PMU filter structure. Describe a range of events with a particular + * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. +diff --git a/linux-headers/asm-arm64/sve_context.h b/linux-headers/asm-arm64/sve_context.h +index 1d0e3e1d09..d1b1ec8cb1 100644 +--- a/linux-headers/asm-arm64/sve_context.h ++++ b/linux-headers/asm-arm64/sve_context.h +@@ -13,6 +13,17 @@ + + #define __SVE_VQ_BYTES 16 /* number of bytes per quadword */ + ++/* ++ * Yes, __SVE_VQ_MAX is 512 QUADWORDS. ++ * ++ * To help ensure forward portability, this is much larger than the ++ * current maximum value defined by the SVE architecture. While arrays ++ * or static allocations can be sized based on this value, watch out! ++ * It will waste a surprisingly large amount of memory. ++ * ++ * Dynamic sizing based on the actual runtime vector length is likely to ++ * be preferable for most purposes. ++ */ + #define __SVE_VQ_MIN 1 + #define __SVE_VQ_MAX 512 + +diff --git a/linux-headers/asm-generic/bitsperlong.h b/linux-headers/asm-generic/bitsperlong.h +index 75f320fa91..1fb4f0c9f2 100644 +--- a/linux-headers/asm-generic/bitsperlong.h ++++ b/linux-headers/asm-generic/bitsperlong.h +@@ -24,4 +24,8 @@ + #endif + #endif + ++#ifndef __BITS_PER_LONG_LONG ++#define __BITS_PER_LONG_LONG 64 ++#endif ++ + #endif /* __ASM_GENERIC_BITS_PER_LONG */ +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +index 923d0bd382..109785922c 100644 +--- a/linux-headers/asm-loongarch/kvm.h ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -14,8 +14,6 @@ + * Some parts derived from the x86 version of this file. + */ + +-#define __KVM_HAVE_READONLY_MEM +- + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_DIRTY_LOG_PAGE_OFFSET 64 + +diff --git a/linux-headers/asm-mips/kvm.h b/linux-headers/asm-mips/kvm.h +index edcf717c43..9673dc9cb3 100644 +--- a/linux-headers/asm-mips/kvm.h ++++ b/linux-headers/asm-mips/kvm.h +@@ -20,8 +20,6 @@ + * Some parts derived from the x86 version of this file. + */ + +-#define __KVM_HAVE_READONLY_MEM +- + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + + /* +diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h +index 9f18fa090f..1691297a76 100644 +--- a/linux-headers/asm-powerpc/kvm.h ++++ b/linux-headers/asm-powerpc/kvm.h +@@ -28,7 +28,6 @@ + #define __KVM_HAVE_PPC_SMT + #define __KVM_HAVE_IRQCHIP + #define __KVM_HAVE_IRQ_LINE +-#define __KVM_HAVE_GUEST_DEBUG + + /* Not always available, but if it is, this is the correct offset. */ + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +@@ -733,4 +732,48 @@ struct kvm_ppc_xive_eq { + #define KVM_XIVE_TIMA_PAGE_OFFSET 0 + #define KVM_XIVE_ESB_PAGE_OFFSET 4 + ++/* for KVM_PPC_GET_PVINFO */ ++ ++#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) ++ ++struct kvm_ppc_pvinfo { ++ /* out */ ++ __u32 flags; ++ __u32 hcall[4]; ++ __u8 pad[108]; ++}; ++ ++/* for KVM_PPC_GET_SMMU_INFO */ ++#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 ++ ++struct kvm_ppc_one_page_size { ++ __u32 page_shift; /* Page shift (or 0) */ ++ __u32 pte_enc; /* Encoding in the HPTE (>>12) */ ++}; ++ ++struct kvm_ppc_one_seg_page_size { ++ __u32 page_shift; /* Base page shift of segment (or 0) */ ++ __u32 slb_enc; /* SLB encoding for BookS */ ++ struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; ++}; ++ ++#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 ++#define KVM_PPC_1T_SEGMENTS 0x00000002 ++#define KVM_PPC_NO_HASH 0x00000004 ++ ++struct kvm_ppc_smmu_info { ++ __u64 flags; ++ __u32 slb_size; ++ __u16 data_keys; /* # storage keys supported for data */ ++ __u16 instr_keys; /* # storage keys supported for instructions */ ++ struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; ++}; ++ ++/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ ++struct kvm_ppc_resize_hpt { ++ __u64 flags; ++ __u32 shift; ++ __u32 pad; ++}; ++ + #endif /* __LINUX_KVM_POWERPC_H */ +diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h +index 7499e88a94..b1c503c295 100644 +--- a/linux-headers/asm-riscv/kvm.h ++++ b/linux-headers/asm-riscv/kvm.h +@@ -16,7 +16,6 @@ + #include + + #define __KVM_HAVE_IRQ_LINE +-#define __KVM_HAVE_READONLY_MEM + + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + +@@ -166,6 +165,8 @@ enum KVM_RISCV_ISA_EXT_ID { + KVM_RISCV_ISA_EXT_ZVFH, + KVM_RISCV_ISA_EXT_ZVFHMIN, + KVM_RISCV_ISA_EXT_ZFA, ++ KVM_RISCV_ISA_EXT_ZTSO, ++ KVM_RISCV_ISA_EXT_ZACAS, + KVM_RISCV_ISA_EXT_MAX, + }; + +diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h +index 023a2763a9..684c4e1205 100644 +--- a/linux-headers/asm-s390/kvm.h ++++ b/linux-headers/asm-s390/kvm.h +@@ -12,7 +12,320 @@ + #include + + #define __KVM_S390 +-#define __KVM_HAVE_GUEST_DEBUG ++ ++struct kvm_s390_skeys { ++ __u64 start_gfn; ++ __u64 count; ++ __u64 skeydata_addr; ++ __u32 flags; ++ __u32 reserved[9]; ++}; ++ ++#define KVM_S390_CMMA_PEEK (1 << 0) ++ ++/** ++ * kvm_s390_cmma_log - Used for CMMA migration. ++ * ++ * Used both for input and output. ++ * ++ * @start_gfn: Guest page number to start from. ++ * @count: Size of the result buffer. ++ * @flags: Control operation mode via KVM_S390_CMMA_* flags ++ * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty ++ * pages are still remaining. ++ * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set ++ * in the PGSTE. ++ * @values: Pointer to the values buffer. ++ * ++ * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. ++ */ ++struct kvm_s390_cmma_log { ++ __u64 start_gfn; ++ __u32 count; ++ __u32 flags; ++ union { ++ __u64 remaining; ++ __u64 mask; ++ }; ++ __u64 values; ++}; ++ ++#define KVM_S390_RESET_POR 1 ++#define KVM_S390_RESET_CLEAR 2 ++#define KVM_S390_RESET_SUBSYSTEM 4 ++#define KVM_S390_RESET_CPU_INIT 8 ++#define KVM_S390_RESET_IPL 16 ++ ++/* for KVM_S390_MEM_OP */ ++struct kvm_s390_mem_op { ++ /* in */ ++ __u64 gaddr; /* the guest address */ ++ __u64 flags; /* flags */ ++ __u32 size; /* amount of bytes */ ++ __u32 op; /* type of operation */ ++ __u64 buf; /* buffer in userspace */ ++ union { ++ struct { ++ __u8 ar; /* the access register number */ ++ __u8 key; /* access key, ignored if flag unset */ ++ __u8 pad1[6]; /* ignored */ ++ __u64 old_addr; /* ignored if cmpxchg flag unset */ ++ }; ++ __u32 sida_offset; /* offset into the sida */ ++ __u8 reserved[32]; /* ignored */ ++ }; ++}; ++/* types for kvm_s390_mem_op->op */ ++#define KVM_S390_MEMOP_LOGICAL_READ 0 ++#define KVM_S390_MEMOP_LOGICAL_WRITE 1 ++#define KVM_S390_MEMOP_SIDA_READ 2 ++#define KVM_S390_MEMOP_SIDA_WRITE 3 ++#define KVM_S390_MEMOP_ABSOLUTE_READ 4 ++#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 ++#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 ++ ++/* flags for kvm_s390_mem_op->flags */ ++#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) ++#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) ++#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) ++ ++/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ ++#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) ++#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) ++ ++struct kvm_s390_psw { ++ __u64 mask; ++ __u64 addr; ++}; ++ ++/* valid values for type in kvm_s390_interrupt */ ++#define KVM_S390_SIGP_STOP 0xfffe0000u ++#define KVM_S390_PROGRAM_INT 0xfffe0001u ++#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u ++#define KVM_S390_RESTART 0xfffe0003u ++#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u ++#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u ++#define KVM_S390_MCHK 0xfffe1000u ++#define KVM_S390_INT_CLOCK_COMP 0xffff1004u ++#define KVM_S390_INT_CPU_TIMER 0xffff1005u ++#define KVM_S390_INT_VIRTIO 0xffff2603u ++#define KVM_S390_INT_SERVICE 0xffff2401u ++#define KVM_S390_INT_EMERGENCY 0xffff1201u ++#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u ++/* Anything below 0xfffe0000u is taken by INT_IO */ ++#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ ++ (((schid)) | \ ++ ((ssid) << 16) | \ ++ ((cssid) << 18) | \ ++ ((ai) << 26)) ++#define KVM_S390_INT_IO_MIN 0x00000000u ++#define KVM_S390_INT_IO_MAX 0xfffdffffu ++#define KVM_S390_INT_IO_AI_MASK 0x04000000u ++ ++ ++struct kvm_s390_interrupt { ++ __u32 type; ++ __u32 parm; ++ __u64 parm64; ++}; ++ ++struct kvm_s390_io_info { ++ __u16 subchannel_id; ++ __u16 subchannel_nr; ++ __u32 io_int_parm; ++ __u32 io_int_word; ++}; ++ ++struct kvm_s390_ext_info { ++ __u32 ext_params; ++ __u32 pad; ++ __u64 ext_params2; ++}; ++ ++struct kvm_s390_pgm_info { ++ __u64 trans_exc_code; ++ __u64 mon_code; ++ __u64 per_address; ++ __u32 data_exc_code; ++ __u16 code; ++ __u16 mon_class_nr; ++ __u8 per_code; ++ __u8 per_atmid; ++ __u8 exc_access_id; ++ __u8 per_access_id; ++ __u8 op_access_id; ++#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 ++#define KVM_S390_PGM_FLAGS_ILC_0 0x02 ++#define KVM_S390_PGM_FLAGS_ILC_1 0x04 ++#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 ++#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 ++ __u8 flags; ++ __u8 pad[2]; ++}; ++ ++struct kvm_s390_prefix_info { ++ __u32 address; ++}; ++ ++struct kvm_s390_extcall_info { ++ __u16 code; ++}; ++ ++struct kvm_s390_emerg_info { ++ __u16 code; ++}; ++ ++#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01 ++struct kvm_s390_stop_info { ++ __u32 flags; ++}; ++ ++struct kvm_s390_mchk_info { ++ __u64 cr14; ++ __u64 mcic; ++ __u64 failing_storage_address; ++ __u32 ext_damage_code; ++ __u32 pad; ++ __u8 fixed_logout[16]; ++}; ++ ++struct kvm_s390_irq { ++ __u64 type; ++ union { ++ struct kvm_s390_io_info io; ++ struct kvm_s390_ext_info ext; ++ struct kvm_s390_pgm_info pgm; ++ struct kvm_s390_emerg_info emerg; ++ struct kvm_s390_extcall_info extcall; ++ struct kvm_s390_prefix_info prefix; ++ struct kvm_s390_stop_info stop; ++ struct kvm_s390_mchk_info mchk; ++ char reserved[64]; ++ } u; ++}; ++ ++struct kvm_s390_irq_state { ++ __u64 buf; ++ __u32 flags; /* will stay unused for compatibility reasons */ ++ __u32 len; ++ __u32 reserved[4]; /* will stay unused for compatibility reasons */ ++}; ++ ++struct kvm_s390_ucas_mapping { ++ __u64 user_addr; ++ __u64 vcpu_addr; ++ __u64 length; ++}; ++ ++struct kvm_s390_pv_sec_parm { ++ __u64 origin; ++ __u64 length; ++}; ++ ++struct kvm_s390_pv_unp { ++ __u64 addr; ++ __u64 size; ++ __u64 tweak; ++}; ++ ++enum pv_cmd_dmp_id { ++ KVM_PV_DUMP_INIT, ++ KVM_PV_DUMP_CONFIG_STOR_STATE, ++ KVM_PV_DUMP_COMPLETE, ++ KVM_PV_DUMP_CPU, ++}; ++ ++struct kvm_s390_pv_dmp { ++ __u64 subcmd; ++ __u64 buff_addr; ++ __u64 buff_len; ++ __u64 gaddr; /* For dump storage state */ ++ __u64 reserved[4]; ++}; ++ ++enum pv_cmd_info_id { ++ KVM_PV_INFO_VM, ++ KVM_PV_INFO_DUMP, ++}; ++ ++struct kvm_s390_pv_info_dump { ++ __u64 dump_cpu_buffer_len; ++ __u64 dump_config_mem_buffer_per_1m; ++ __u64 dump_config_finalize_len; ++}; ++ ++struct kvm_s390_pv_info_vm { ++ __u64 inst_calls_list[4]; ++ __u64 max_cpus; ++ __u64 max_guests; ++ __u64 max_guest_addr; ++ __u64 feature_indication; ++}; ++ ++struct kvm_s390_pv_info_header { ++ __u32 id; ++ __u32 len_max; ++ __u32 len_written; ++ __u32 reserved; ++}; ++ ++struct kvm_s390_pv_info { ++ struct kvm_s390_pv_info_header header; ++ union { ++ struct kvm_s390_pv_info_dump dump; ++ struct kvm_s390_pv_info_vm vm; ++ }; ++}; ++ ++enum pv_cmd_id { ++ KVM_PV_ENABLE, ++ KVM_PV_DISABLE, ++ KVM_PV_SET_SEC_PARMS, ++ KVM_PV_UNPACK, ++ KVM_PV_VERIFY, ++ KVM_PV_PREP_RESET, ++ KVM_PV_UNSHARE_ALL, ++ KVM_PV_INFO, ++ KVM_PV_DUMP, ++ KVM_PV_ASYNC_CLEANUP_PREPARE, ++ KVM_PV_ASYNC_CLEANUP_PERFORM, ++}; ++ ++struct kvm_pv_cmd { ++ __u32 cmd; /* Command to be executed */ ++ __u16 rc; /* Ultravisor return code */ ++ __u16 rrc; /* Ultravisor return reason code */ ++ __u64 data; /* Data or address */ ++ __u32 flags; /* flags for future extensions. Must be 0 for now */ ++ __u32 reserved[3]; ++}; ++ ++struct kvm_s390_zpci_op { ++ /* in */ ++ __u32 fh; /* target device */ ++ __u8 op; /* operation to perform */ ++ __u8 pad[3]; ++ union { ++ /* for KVM_S390_ZPCIOP_REG_AEN */ ++ struct { ++ __u64 ibv; /* Guest addr of interrupt bit vector */ ++ __u64 sb; /* Guest addr of summary bit */ ++ __u32 flags; ++ __u32 noi; /* Number of interrupts */ ++ __u8 isc; /* Guest interrupt subclass */ ++ __u8 sbo; /* Offset of guest summary bit vector */ ++ __u16 pad; ++ } reg_aen; ++ __u64 reserved[8]; ++ } u; ++}; ++ ++/* types for kvm_s390_zpci_op->op */ ++#define KVM_S390_ZPCIOP_REG_AEN 0 ++#define KVM_S390_ZPCIOP_DEREG_AEN 1 ++ ++/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ ++#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) + + /* Device control API: s390-specific devices */ + #define KVM_DEV_FLIC_GET_ALL_IRQS 1 +diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h +index 003fb74534..31c95c2dfe 100644 +--- a/linux-headers/asm-x86/kvm.h ++++ b/linux-headers/asm-x86/kvm.h +@@ -7,6 +7,8 @@ + * + */ + ++#include ++#include + #include + #include + #include +@@ -40,7 +42,6 @@ + #define __KVM_HAVE_IRQ_LINE + #define __KVM_HAVE_MSI + #define __KVM_HAVE_USER_NMI +-#define __KVM_HAVE_GUEST_DEBUG + #define __KVM_HAVE_MSIX + #define __KVM_HAVE_MCE + #define __KVM_HAVE_PIT_STATE2 +@@ -49,7 +50,6 @@ + #define __KVM_HAVE_DEBUGREGS + #define __KVM_HAVE_XSAVE + #define __KVM_HAVE_XCRS +-#define __KVM_HAVE_READONLY_MEM + + /* Architectural interrupt line count. */ + #define KVM_NR_INTERRUPTS 256 +@@ -455,8 +455,13 @@ struct kvm_sync_regs { + + #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 + +-/* attributes for system fd (group 0) */ +-#define KVM_X86_XCOMP_GUEST_SUPP 0 ++/* vendor-independent attributes for system fd (group 0) */ ++#define KVM_X86_GRP_SYSTEM 0 ++# define KVM_X86_XCOMP_GUEST_SUPP 0 ++ ++/* vendor-specific groups and attributes for system fd */ ++#define KVM_X86_GRP_SEV 1 ++# define KVM_X86_SEV_VMSA_FEATURES 0 + + struct kvm_vmx_nested_state_data { + __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; +@@ -524,9 +529,310 @@ struct kvm_pmu_event_filter { + #define KVM_PMU_EVENT_ALLOW 0 + #define KVM_PMU_EVENT_DENY 1 + +-#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS BIT(0) ++#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS _BITUL(0) + #define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS) + ++/* for KVM_CAP_MCE */ ++struct kvm_x86_mce { ++ __u64 status; ++ __u64 addr; ++ __u64 misc; ++ __u64 mcg_status; ++ __u8 bank; ++ __u8 pad1[7]; ++ __u64 pad2[3]; ++}; ++ ++/* for KVM_CAP_XEN_HVM */ ++#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) ++#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) ++#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) ++#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) ++#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) ++#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) ++#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) ++#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) ++#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8) ++ ++struct kvm_xen_hvm_config { ++ __u32 flags; ++ __u32 msr; ++ __u64 blob_addr_32; ++ __u64 blob_addr_64; ++ __u8 blob_size_32; ++ __u8 blob_size_64; ++ __u8 pad2[30]; ++}; ++ ++struct kvm_xen_hvm_attr { ++ __u16 type; ++ __u16 pad[3]; ++ union { ++ __u8 long_mode; ++ __u8 vector; ++ __u8 runstate_update_flag; ++ union { ++ __u64 gfn; ++#define KVM_XEN_INVALID_GFN ((__u64)-1) ++ __u64 hva; ++ } shared_info; ++ struct { ++ __u32 send_port; ++ __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ ++ __u32 flags; ++#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) ++#define KVM_XEN_EVTCHN_UPDATE (1 << 1) ++#define KVM_XEN_EVTCHN_RESET (1 << 2) ++ /* ++ * Events sent by the guest are either looped back to ++ * the guest itself (potentially on a different port#) ++ * or signalled via an eventfd. ++ */ ++ union { ++ struct { ++ __u32 port; ++ __u32 vcpu; ++ __u32 priority; ++ } port; ++ struct { ++ __u32 port; /* Zero for eventfd */ ++ __s32 fd; ++ } eventfd; ++ __u32 padding[4]; ++ } deliver; ++ } evtchn; ++ __u32 xen_version; ++ __u64 pad[8]; ++ } u; ++}; ++ ++ ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ ++#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 ++#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 ++#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ ++#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 ++#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ ++#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */ ++#define KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA 0x6 ++ ++struct kvm_xen_vcpu_attr { ++ __u16 type; ++ __u16 pad[3]; ++ union { ++ __u64 gpa; ++#define KVM_XEN_INVALID_GPA ((__u64)-1) ++ __u64 hva; ++ __u64 pad[8]; ++ struct { ++ __u64 state; ++ __u64 state_entry_time; ++ __u64 time_running; ++ __u64 time_runnable; ++ __u64 time_blocked; ++ __u64 time_offline; ++ } runstate; ++ __u32 vcpu_id; ++ struct { ++ __u32 port; ++ __u32 priority; ++ __u64 expires_ns; ++ } timer; ++ __u8 vector; ++ } u; ++}; ++ ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ ++#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 ++#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ ++#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 ++#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 ++#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */ ++#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA 0x9 ++ ++/* Secure Encrypted Virtualization command */ ++enum sev_cmd_id { ++ /* Guest initialization commands */ ++ KVM_SEV_INIT = 0, ++ KVM_SEV_ES_INIT, ++ /* Guest launch commands */ ++ KVM_SEV_LAUNCH_START, ++ KVM_SEV_LAUNCH_UPDATE_DATA, ++ KVM_SEV_LAUNCH_UPDATE_VMSA, ++ KVM_SEV_LAUNCH_SECRET, ++ KVM_SEV_LAUNCH_MEASURE, ++ KVM_SEV_LAUNCH_FINISH, ++ /* Guest migration commands (outgoing) */ ++ KVM_SEV_SEND_START, ++ KVM_SEV_SEND_UPDATE_DATA, ++ KVM_SEV_SEND_UPDATE_VMSA, ++ KVM_SEV_SEND_FINISH, ++ /* Guest migration commands (incoming) */ ++ KVM_SEV_RECEIVE_START, ++ KVM_SEV_RECEIVE_UPDATE_DATA, ++ KVM_SEV_RECEIVE_UPDATE_VMSA, ++ KVM_SEV_RECEIVE_FINISH, ++ /* Guest status and debug commands */ ++ KVM_SEV_GUEST_STATUS, ++ KVM_SEV_DBG_DECRYPT, ++ KVM_SEV_DBG_ENCRYPT, ++ /* Guest certificates commands */ ++ KVM_SEV_CERT_EXPORT, ++ /* Attestation report */ ++ KVM_SEV_GET_ATTESTATION_REPORT, ++ /* Guest Migration Extension */ ++ KVM_SEV_SEND_CANCEL, ++ ++ /* Second time is the charm; improved versions of the above ioctls. */ ++ KVM_SEV_INIT2, ++ ++ KVM_SEV_NR_MAX, ++}; ++ ++struct kvm_sev_cmd { ++ __u32 id; ++ __u32 pad0; ++ __u64 data; ++ __u32 error; ++ __u32 sev_fd; ++}; ++ ++struct kvm_sev_init { ++ __u64 vmsa_features; ++ __u32 flags; ++ __u32 pad[9]; ++}; ++ ++struct kvm_sev_launch_start { ++ __u32 handle; ++ __u32 policy; ++ __u64 dh_uaddr; ++ __u32 dh_len; ++ __u32 pad0; ++ __u64 session_uaddr; ++ __u32 session_len; ++ __u32 pad1; ++}; ++ ++struct kvm_sev_launch_update_data { ++ __u64 uaddr; ++ __u32 len; ++ __u32 pad0; ++}; ++ ++ ++struct kvm_sev_launch_secret { ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u32 pad0; ++ __u64 guest_uaddr; ++ __u32 guest_len; ++ __u32 pad1; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++ __u32 pad2; ++}; ++ ++struct kvm_sev_launch_measure { ++ __u64 uaddr; ++ __u32 len; ++ __u32 pad0; ++}; ++ ++struct kvm_sev_guest_status { ++ __u32 handle; ++ __u32 policy; ++ __u32 state; ++}; ++ ++struct kvm_sev_dbg { ++ __u64 src_uaddr; ++ __u64 dst_uaddr; ++ __u32 len; ++ __u32 pad0; ++}; ++ ++struct kvm_sev_attestation_report { ++ __u8 mnonce[16]; ++ __u64 uaddr; ++ __u32 len; ++ __u32 pad0; ++}; ++ ++struct kvm_sev_send_start { ++ __u32 policy; ++ __u32 pad0; ++ __u64 pdh_cert_uaddr; ++ __u32 pdh_cert_len; ++ __u32 pad1; ++ __u64 plat_certs_uaddr; ++ __u32 plat_certs_len; ++ __u32 pad2; ++ __u64 amd_certs_uaddr; ++ __u32 amd_certs_len; ++ __u32 pad3; ++ __u64 session_uaddr; ++ __u32 session_len; ++ __u32 pad4; ++}; ++ ++struct kvm_sev_send_update_data { ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u32 pad0; ++ __u64 guest_uaddr; ++ __u32 guest_len; ++ __u32 pad1; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++ __u32 pad2; ++}; ++ ++struct kvm_sev_receive_start { ++ __u32 handle; ++ __u32 policy; ++ __u64 pdh_uaddr; ++ __u32 pdh_len; ++ __u32 pad0; ++ __u64 session_uaddr; ++ __u32 session_len; ++ __u32 pad1; ++}; ++ ++struct kvm_sev_receive_update_data { ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u32 pad0; ++ __u64 guest_uaddr; ++ __u32 guest_len; ++ __u32 pad1; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++ __u32 pad2; ++}; ++ ++#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) ++#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) ++ ++struct kvm_hyperv_eventfd { ++ __u32 conn_id; ++ __s32 fd; ++ __u32 flags; ++ __u32 padding[3]; ++}; ++ ++#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff ++#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) ++ + /* + * Masked event layout. + * Bits Description +@@ -547,10 +853,10 @@ struct kvm_pmu_event_filter { + ((__u64)(!!(exclude)) << 55)) + + #define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \ +- (GENMASK_ULL(7, 0) | GENMASK_ULL(35, 32)) +-#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (GENMASK_ULL(63, 56)) +-#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (GENMASK_ULL(15, 8)) +-#define KVM_PMU_MASKED_ENTRY_EXCLUDE (BIT_ULL(55)) ++ (__GENMASK_ULL(7, 0) | __GENMASK_ULL(35, 32)) ++#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (__GENMASK_ULL(63, 56)) ++#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (__GENMASK_ULL(15, 8)) ++#define KVM_PMU_MASKED_ENTRY_EXCLUDE (_BITULL(55)) + #define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT (56) + + /* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ +@@ -558,9 +864,11 @@ struct kvm_pmu_event_filter { + #define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ + + /* x86-specific KVM_EXIT_HYPERCALL flags. */ +-#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0) ++#define KVM_EXIT_HYPERCALL_LONG_MODE _BITULL(0) + + #define KVM_X86_DEFAULT_VM 0 + #define KVM_X86_SW_PROTECTED_VM 1 ++#define KVM_X86_SEV_VM 2 ++#define KVM_X86_SEV_ES_VM 3 + + #endif /* _ASM_X86_KVM_H */ +diff --git a/linux-headers/linux/bits.h b/linux-headers/linux/bits.h +new file mode 100644 +index 0000000000..d9897771be +--- /dev/null ++++ b/linux-headers/linux/bits.h +@@ -0,0 +1,15 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* bits.h: Macros for dealing with bitmasks. */ ++ ++#ifndef _LINUX_BITS_H ++#define _LINUX_BITS_H ++ ++#define __GENMASK(h, l) \ ++ (((~_UL(0)) - (_UL(1) << (l)) + 1) & \ ++ (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) ++ ++#define __GENMASK_ULL(h, l) \ ++ (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \ ++ (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) ++ ++#endif /* _LINUX_BITS_H */ +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 17839229b2..038731cdef 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -16,6 +16,11 @@ + + #define KVM_API_VERSION 12 + ++/* ++ * Backwards-compatible definitions. ++ */ ++#define __KVM_HAVE_GUEST_DEBUG ++ + /* for KVM_SET_USER_MEMORY_REGION */ + struct kvm_userspace_memory_region { + __u32 slot; +@@ -85,43 +90,6 @@ struct kvm_pit_config { + + #define KVM_PIT_SPEAKER_DUMMY 1 + +-struct kvm_s390_skeys { +- __u64 start_gfn; +- __u64 count; +- __u64 skeydata_addr; +- __u32 flags; +- __u32 reserved[9]; +-}; +- +-#define KVM_S390_CMMA_PEEK (1 << 0) +- +-/** +- * kvm_s390_cmma_log - Used for CMMA migration. +- * +- * Used both for input and output. +- * +- * @start_gfn: Guest page number to start from. +- * @count: Size of the result buffer. +- * @flags: Control operation mode via KVM_S390_CMMA_* flags +- * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty +- * pages are still remaining. +- * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set +- * in the PGSTE. +- * @values: Pointer to the values buffer. +- * +- * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. +- */ +-struct kvm_s390_cmma_log { +- __u64 start_gfn; +- __u32 count; +- __u32 flags; +- union { +- __u64 remaining; +- __u64 mask; +- }; +- __u64 values; +-}; +- + struct kvm_hyperv_exit { + #define KVM_EXIT_HYPERV_SYNIC 1 + #define KVM_EXIT_HYPERV_HCALL 2 +@@ -313,11 +281,6 @@ struct kvm_run { + __u32 ipb; + } s390_sieic; + /* KVM_EXIT_S390_RESET */ +-#define KVM_S390_RESET_POR 1 +-#define KVM_S390_RESET_CLEAR 2 +-#define KVM_S390_RESET_SUBSYSTEM 4 +-#define KVM_S390_RESET_CPU_INIT 8 +-#define KVM_S390_RESET_IPL 16 + __u64 s390_reset_flags; + /* KVM_EXIT_S390_UCONTROL */ + struct { +@@ -532,43 +495,6 @@ struct kvm_translation { + __u8 pad[5]; + }; + +-/* for KVM_S390_MEM_OP */ +-struct kvm_s390_mem_op { +- /* in */ +- __u64 gaddr; /* the guest address */ +- __u64 flags; /* flags */ +- __u32 size; /* amount of bytes */ +- __u32 op; /* type of operation */ +- __u64 buf; /* buffer in userspace */ +- union { +- struct { +- __u8 ar; /* the access register number */ +- __u8 key; /* access key, ignored if flag unset */ +- __u8 pad1[6]; /* ignored */ +- __u64 old_addr; /* ignored if cmpxchg flag unset */ +- }; +- __u32 sida_offset; /* offset into the sida */ +- __u8 reserved[32]; /* ignored */ +- }; +-}; +-/* types for kvm_s390_mem_op->op */ +-#define KVM_S390_MEMOP_LOGICAL_READ 0 +-#define KVM_S390_MEMOP_LOGICAL_WRITE 1 +-#define KVM_S390_MEMOP_SIDA_READ 2 +-#define KVM_S390_MEMOP_SIDA_WRITE 3 +-#define KVM_S390_MEMOP_ABSOLUTE_READ 4 +-#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 +-#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 +- +-/* flags for kvm_s390_mem_op->flags */ +-#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) +-#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) +-#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) +- +-/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ +-#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) +-#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) +- + /* for KVM_INTERRUPT */ + struct kvm_interrupt { + /* in */ +@@ -633,124 +559,6 @@ struct kvm_mp_state { + __u32 mp_state; + }; + +-struct kvm_s390_psw { +- __u64 mask; +- __u64 addr; +-}; +- +-/* valid values for type in kvm_s390_interrupt */ +-#define KVM_S390_SIGP_STOP 0xfffe0000u +-#define KVM_S390_PROGRAM_INT 0xfffe0001u +-#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u +-#define KVM_S390_RESTART 0xfffe0003u +-#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u +-#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u +-#define KVM_S390_MCHK 0xfffe1000u +-#define KVM_S390_INT_CLOCK_COMP 0xffff1004u +-#define KVM_S390_INT_CPU_TIMER 0xffff1005u +-#define KVM_S390_INT_VIRTIO 0xffff2603u +-#define KVM_S390_INT_SERVICE 0xffff2401u +-#define KVM_S390_INT_EMERGENCY 0xffff1201u +-#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u +-/* Anything below 0xfffe0000u is taken by INT_IO */ +-#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ +- (((schid)) | \ +- ((ssid) << 16) | \ +- ((cssid) << 18) | \ +- ((ai) << 26)) +-#define KVM_S390_INT_IO_MIN 0x00000000u +-#define KVM_S390_INT_IO_MAX 0xfffdffffu +-#define KVM_S390_INT_IO_AI_MASK 0x04000000u +- +- +-struct kvm_s390_interrupt { +- __u32 type; +- __u32 parm; +- __u64 parm64; +-}; +- +-struct kvm_s390_io_info { +- __u16 subchannel_id; +- __u16 subchannel_nr; +- __u32 io_int_parm; +- __u32 io_int_word; +-}; +- +-struct kvm_s390_ext_info { +- __u32 ext_params; +- __u32 pad; +- __u64 ext_params2; +-}; +- +-struct kvm_s390_pgm_info { +- __u64 trans_exc_code; +- __u64 mon_code; +- __u64 per_address; +- __u32 data_exc_code; +- __u16 code; +- __u16 mon_class_nr; +- __u8 per_code; +- __u8 per_atmid; +- __u8 exc_access_id; +- __u8 per_access_id; +- __u8 op_access_id; +-#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 +-#define KVM_S390_PGM_FLAGS_ILC_0 0x02 +-#define KVM_S390_PGM_FLAGS_ILC_1 0x04 +-#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 +-#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 +- __u8 flags; +- __u8 pad[2]; +-}; +- +-struct kvm_s390_prefix_info { +- __u32 address; +-}; +- +-struct kvm_s390_extcall_info { +- __u16 code; +-}; +- +-struct kvm_s390_emerg_info { +- __u16 code; +-}; +- +-#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01 +-struct kvm_s390_stop_info { +- __u32 flags; +-}; +- +-struct kvm_s390_mchk_info { +- __u64 cr14; +- __u64 mcic; +- __u64 failing_storage_address; +- __u32 ext_damage_code; +- __u32 pad; +- __u8 fixed_logout[16]; +-}; +- +-struct kvm_s390_irq { +- __u64 type; +- union { +- struct kvm_s390_io_info io; +- struct kvm_s390_ext_info ext; +- struct kvm_s390_pgm_info pgm; +- struct kvm_s390_emerg_info emerg; +- struct kvm_s390_extcall_info extcall; +- struct kvm_s390_prefix_info prefix; +- struct kvm_s390_stop_info stop; +- struct kvm_s390_mchk_info mchk; +- char reserved[64]; +- } u; +-}; +- +-struct kvm_s390_irq_state { +- __u64 buf; +- __u32 flags; /* will stay unused for compatibility reasons */ +- __u32 len; +- __u32 reserved[4]; /* will stay unused for compatibility reasons */ +-}; +- + /* for KVM_SET_GUEST_DEBUG */ + + #define KVM_GUESTDBG_ENABLE 0x00000001 +@@ -806,50 +614,6 @@ struct kvm_enable_cap { + __u8 pad[64]; + }; + +-/* for KVM_PPC_GET_PVINFO */ +- +-#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) +- +-struct kvm_ppc_pvinfo { +- /* out */ +- __u32 flags; +- __u32 hcall[4]; +- __u8 pad[108]; +-}; +- +-/* for KVM_PPC_GET_SMMU_INFO */ +-#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 +- +-struct kvm_ppc_one_page_size { +- __u32 page_shift; /* Page shift (or 0) */ +- __u32 pte_enc; /* Encoding in the HPTE (>>12) */ +-}; +- +-struct kvm_ppc_one_seg_page_size { +- __u32 page_shift; /* Base page shift of segment (or 0) */ +- __u32 slb_enc; /* SLB encoding for BookS */ +- struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; +-}; +- +-#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 +-#define KVM_PPC_1T_SEGMENTS 0x00000002 +-#define KVM_PPC_NO_HASH 0x00000004 +- +-struct kvm_ppc_smmu_info { +- __u64 flags; +- __u32 slb_size; +- __u16 data_keys; /* # storage keys supported for data */ +- __u16 instr_keys; /* # storage keys supported for instructions */ +- struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; +-}; +- +-/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ +-struct kvm_ppc_resize_hpt { +- __u64 flags; +- __u32 shift; +- __u32 pad; +-}; +- + #define KVMIO 0xAE + + /* machine type bits, to be used as argument to KVM_CREATE_VM */ +@@ -919,9 +683,7 @@ struct kvm_ppc_resize_hpt { + /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ + #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 + #define KVM_CAP_USER_NMI 22 +-#ifdef __KVM_HAVE_GUEST_DEBUG + #define KVM_CAP_SET_GUEST_DEBUG 23 +-#endif + #ifdef __KVM_HAVE_PIT + #define KVM_CAP_REINJECT_CONTROL 24 + #endif +@@ -1152,8 +914,6 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_GUEST_MEMFD 234 + #define KVM_CAP_VM_TYPES 235 + +-#ifdef KVM_CAP_IRQ_ROUTING +- + struct kvm_irq_routing_irqchip { + __u32 irqchip; + __u32 pin; +@@ -1218,42 +978,6 @@ struct kvm_irq_routing { + struct kvm_irq_routing_entry entries[]; + }; + +-#endif +- +-#ifdef KVM_CAP_MCE +-/* x86 MCE */ +-struct kvm_x86_mce { +- __u64 status; +- __u64 addr; +- __u64 misc; +- __u64 mcg_status; +- __u8 bank; +- __u8 pad1[7]; +- __u64 pad2[3]; +-}; +-#endif +- +-#ifdef KVM_CAP_XEN_HVM +-#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) +-#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) +-#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) +-#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) +-#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) +-#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) +-#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) +-#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) +- +-struct kvm_xen_hvm_config { +- __u32 flags; +- __u32 msr; +- __u64 blob_addr_32; +- __u64 blob_addr_64; +- __u8 blob_size_32; +- __u8 blob_size_64; +- __u8 pad2[30]; +-}; +-#endif +- + #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) + /* + * Available with KVM_CAP_IRQFD_RESAMPLE +@@ -1438,11 +1162,6 @@ struct kvm_vfio_spapr_tce { + struct kvm_userspace_memory_region2) + + /* enable ucontrol for s390 */ +-struct kvm_s390_ucas_mapping { +- __u64 user_addr; +- __u64 vcpu_addr; +- __u64 length; +-}; + #define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping) + #define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping) + #define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long) +@@ -1637,89 +1356,6 @@ struct kvm_enc_region { + #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) + #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) + +-struct kvm_s390_pv_sec_parm { +- __u64 origin; +- __u64 length; +-}; +- +-struct kvm_s390_pv_unp { +- __u64 addr; +- __u64 size; +- __u64 tweak; +-}; +- +-enum pv_cmd_dmp_id { +- KVM_PV_DUMP_INIT, +- KVM_PV_DUMP_CONFIG_STOR_STATE, +- KVM_PV_DUMP_COMPLETE, +- KVM_PV_DUMP_CPU, +-}; +- +-struct kvm_s390_pv_dmp { +- __u64 subcmd; +- __u64 buff_addr; +- __u64 buff_len; +- __u64 gaddr; /* For dump storage state */ +- __u64 reserved[4]; +-}; +- +-enum pv_cmd_info_id { +- KVM_PV_INFO_VM, +- KVM_PV_INFO_DUMP, +-}; +- +-struct kvm_s390_pv_info_dump { +- __u64 dump_cpu_buffer_len; +- __u64 dump_config_mem_buffer_per_1m; +- __u64 dump_config_finalize_len; +-}; +- +-struct kvm_s390_pv_info_vm { +- __u64 inst_calls_list[4]; +- __u64 max_cpus; +- __u64 max_guests; +- __u64 max_guest_addr; +- __u64 feature_indication; +-}; +- +-struct kvm_s390_pv_info_header { +- __u32 id; +- __u32 len_max; +- __u32 len_written; +- __u32 reserved; +-}; +- +-struct kvm_s390_pv_info { +- struct kvm_s390_pv_info_header header; +- union { +- struct kvm_s390_pv_info_dump dump; +- struct kvm_s390_pv_info_vm vm; +- }; +-}; +- +-enum pv_cmd_id { +- KVM_PV_ENABLE, +- KVM_PV_DISABLE, +- KVM_PV_SET_SEC_PARMS, +- KVM_PV_UNPACK, +- KVM_PV_VERIFY, +- KVM_PV_PREP_RESET, +- KVM_PV_UNSHARE_ALL, +- KVM_PV_INFO, +- KVM_PV_DUMP, +- KVM_PV_ASYNC_CLEANUP_PREPARE, +- KVM_PV_ASYNC_CLEANUP_PERFORM, +-}; +- +-struct kvm_pv_cmd { +- __u32 cmd; /* Command to be executed */ +- __u16 rc; /* Ultravisor return code */ +- __u16 rrc; /* Ultravisor return reason code */ +- __u64 data; /* Data or address */ +- __u32 flags; /* flags for future extensions. Must be 0 for now */ +- __u32 reserved[3]; +-}; +- + /* Available with KVM_CAP_S390_PROTECTED */ + #define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) + +@@ -1733,58 +1369,6 @@ struct kvm_pv_cmd { + #define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr) + #define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr) + +-struct kvm_xen_hvm_attr { +- __u16 type; +- __u16 pad[3]; +- union { +- __u8 long_mode; +- __u8 vector; +- __u8 runstate_update_flag; +- struct { +- __u64 gfn; +-#define KVM_XEN_INVALID_GFN ((__u64)-1) +- } shared_info; +- struct { +- __u32 send_port; +- __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ +- __u32 flags; +-#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) +-#define KVM_XEN_EVTCHN_UPDATE (1 << 1) +-#define KVM_XEN_EVTCHN_RESET (1 << 2) +- /* +- * Events sent by the guest are either looped back to +- * the guest itself (potentially on a different port#) +- * or signalled via an eventfd. +- */ +- union { +- struct { +- __u32 port; +- __u32 vcpu; +- __u32 priority; +- } port; +- struct { +- __u32 port; /* Zero for eventfd */ +- __s32 fd; +- } eventfd; +- __u32 padding[4]; +- } deliver; +- } evtchn; +- __u32 xen_version; +- __u64 pad[8]; +- } u; +-}; +- +- +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +-#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 +-#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 +-#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +-#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 +-#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ +-#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 +- + /* Per-vCPU Xen attributes */ + #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) + #define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) +@@ -1795,242 +1379,6 @@ struct kvm_xen_hvm_attr { + #define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2) + #define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2) + +-struct kvm_xen_vcpu_attr { +- __u16 type; +- __u16 pad[3]; +- union { +- __u64 gpa; +-#define KVM_XEN_INVALID_GPA ((__u64)-1) +- __u64 pad[8]; +- struct { +- __u64 state; +- __u64 state_entry_time; +- __u64 time_running; +- __u64 time_runnable; +- __u64 time_blocked; +- __u64 time_offline; +- } runstate; +- __u32 vcpu_id; +- struct { +- __u32 port; +- __u32 priority; +- __u64 expires_ns; +- } timer; +- __u8 vector; +- } u; +-}; +- +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 +-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 +-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 +-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 +-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 +-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 +-#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 +-#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 +- +-/* Secure Encrypted Virtualization command */ +-enum sev_cmd_id { +- /* Guest initialization commands */ +- KVM_SEV_INIT = 0, +- KVM_SEV_ES_INIT, +- /* Guest launch commands */ +- KVM_SEV_LAUNCH_START, +- KVM_SEV_LAUNCH_UPDATE_DATA, +- KVM_SEV_LAUNCH_UPDATE_VMSA, +- KVM_SEV_LAUNCH_SECRET, +- KVM_SEV_LAUNCH_MEASURE, +- KVM_SEV_LAUNCH_FINISH, +- /* Guest migration commands (outgoing) */ +- KVM_SEV_SEND_START, +- KVM_SEV_SEND_UPDATE_DATA, +- KVM_SEV_SEND_UPDATE_VMSA, +- KVM_SEV_SEND_FINISH, +- /* Guest migration commands (incoming) */ +- KVM_SEV_RECEIVE_START, +- KVM_SEV_RECEIVE_UPDATE_DATA, +- KVM_SEV_RECEIVE_UPDATE_VMSA, +- KVM_SEV_RECEIVE_FINISH, +- /* Guest status and debug commands */ +- KVM_SEV_GUEST_STATUS, +- KVM_SEV_DBG_DECRYPT, +- KVM_SEV_DBG_ENCRYPT, +- /* Guest certificates commands */ +- KVM_SEV_CERT_EXPORT, +- /* Attestation report */ +- KVM_SEV_GET_ATTESTATION_REPORT, +- /* Guest Migration Extension */ +- KVM_SEV_SEND_CANCEL, +- +- KVM_SEV_NR_MAX, +-}; +- +-struct kvm_sev_cmd { +- __u32 id; +- __u64 data; +- __u32 error; +- __u32 sev_fd; +-}; +- +-struct kvm_sev_launch_start { +- __u32 handle; +- __u32 policy; +- __u64 dh_uaddr; +- __u32 dh_len; +- __u64 session_uaddr; +- __u32 session_len; +-}; +- +-struct kvm_sev_launch_update_data { +- __u64 uaddr; +- __u32 len; +-}; +- +- +-struct kvm_sev_launch_secret { +- __u64 hdr_uaddr; +- __u32 hdr_len; +- __u64 guest_uaddr; +- __u32 guest_len; +- __u64 trans_uaddr; +- __u32 trans_len; +-}; +- +-struct kvm_sev_launch_measure { +- __u64 uaddr; +- __u32 len; +-}; +- +-struct kvm_sev_guest_status { +- __u32 handle; +- __u32 policy; +- __u32 state; +-}; +- +-struct kvm_sev_dbg { +- __u64 src_uaddr; +- __u64 dst_uaddr; +- __u32 len; +-}; +- +-struct kvm_sev_attestation_report { +- __u8 mnonce[16]; +- __u64 uaddr; +- __u32 len; +-}; +- +-struct kvm_sev_send_start { +- __u32 policy; +- __u64 pdh_cert_uaddr; +- __u32 pdh_cert_len; +- __u64 plat_certs_uaddr; +- __u32 plat_certs_len; +- __u64 amd_certs_uaddr; +- __u32 amd_certs_len; +- __u64 session_uaddr; +- __u32 session_len; +-}; +- +-struct kvm_sev_send_update_data { +- __u64 hdr_uaddr; +- __u32 hdr_len; +- __u64 guest_uaddr; +- __u32 guest_len; +- __u64 trans_uaddr; +- __u32 trans_len; +-}; +- +-struct kvm_sev_receive_start { +- __u32 handle; +- __u32 policy; +- __u64 pdh_uaddr; +- __u32 pdh_len; +- __u64 session_uaddr; +- __u32 session_len; +-}; +- +-struct kvm_sev_receive_update_data { +- __u64 hdr_uaddr; +- __u32 hdr_len; +- __u64 guest_uaddr; +- __u32 guest_len; +- __u64 trans_uaddr; +- __u32 trans_len; +-}; +- +-#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) +-#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) +-#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) +- +-struct kvm_assigned_pci_dev { +- __u32 assigned_dev_id; +- __u32 busnr; +- __u32 devfn; +- __u32 flags; +- __u32 segnr; +- union { +- __u32 reserved[11]; +- }; +-}; +- +-#define KVM_DEV_IRQ_HOST_INTX (1 << 0) +-#define KVM_DEV_IRQ_HOST_MSI (1 << 1) +-#define KVM_DEV_IRQ_HOST_MSIX (1 << 2) +- +-#define KVM_DEV_IRQ_GUEST_INTX (1 << 8) +-#define KVM_DEV_IRQ_GUEST_MSI (1 << 9) +-#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) +- +-#define KVM_DEV_IRQ_HOST_MASK 0x00ff +-#define KVM_DEV_IRQ_GUEST_MASK 0xff00 +- +-struct kvm_assigned_irq { +- __u32 assigned_dev_id; +- __u32 host_irq; /* ignored (legacy field) */ +- __u32 guest_irq; +- __u32 flags; +- union { +- __u32 reserved[12]; +- }; +-}; +- +-struct kvm_assigned_msix_nr { +- __u32 assigned_dev_id; +- __u16 entry_nr; +- __u16 padding; +-}; +- +-#define KVM_MAX_MSIX_PER_DEV 256 +-struct kvm_assigned_msix_entry { +- __u32 assigned_dev_id; +- __u32 gsi; +- __u16 entry; /* The index of entry in the MSI-X table */ +- __u16 padding[3]; +-}; +- +-#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) +-#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) +- +-/* Available with KVM_CAP_ARM_USER_IRQ */ +- +-/* Bits for run->s.regs.device_irq_level */ +-#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) +-#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) +-#define KVM_ARM_DEV_PMU (1 << 2) +- +-struct kvm_hyperv_eventfd { +- __u32 conn_id; +- __s32 fd; +- __u32 flags; +- __u32 padding[3]; +-}; +- +-#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff +-#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) +- + #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) + #define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) + +@@ -2176,33 +1524,6 @@ struct kvm_stats_desc { + /* Available with KVM_CAP_S390_ZPCI_OP */ + #define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op) + +-struct kvm_s390_zpci_op { +- /* in */ +- __u32 fh; /* target device */ +- __u8 op; /* operation to perform */ +- __u8 pad[3]; +- union { +- /* for KVM_S390_ZPCIOP_REG_AEN */ +- struct { +- __u64 ibv; /* Guest addr of interrupt bit vector */ +- __u64 sb; /* Guest addr of summary bit */ +- __u32 flags; +- __u32 noi; /* Number of interrupts */ +- __u8 isc; /* Guest interrupt subclass */ +- __u8 sbo; /* Offset of guest summary bit vector */ +- __u16 pad; +- } reg_aen; +- __u64 reserved[8]; +- } u; +-}; +- +-/* types for kvm_s390_zpci_op->op */ +-#define KVM_S390_ZPCIOP_REG_AEN 0 +-#define KVM_S390_ZPCIOP_DEREG_AEN 1 +- +-/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ +-#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) +- + /* Available with KVM_CAP_MEMORY_ATTRIBUTES */ + #define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes) + +diff --git a/linux-headers/linux/psp-sev.h b/linux-headers/linux/psp-sev.h +index bcb21339ee..c3046c6bff 100644 +--- a/linux-headers/linux/psp-sev.h ++++ b/linux-headers/linux/psp-sev.h +@@ -28,6 +28,9 @@ enum { + SEV_PEK_CERT_IMPORT, + SEV_GET_ID, /* This command is deprecated, use SEV_GET_ID2 */ + SEV_GET_ID2, ++ SNP_PLATFORM_STATUS, ++ SNP_COMMIT, ++ SNP_SET_CONFIG, + + SEV_MAX, + }; +@@ -69,6 +72,12 @@ typedef enum { + SEV_RET_RESOURCE_LIMIT, + SEV_RET_SECURE_DATA_INVALID, + SEV_RET_INVALID_KEY = 0x27, ++ SEV_RET_INVALID_PAGE_SIZE, ++ SEV_RET_INVALID_PAGE_STATE, ++ SEV_RET_INVALID_MDATA_ENTRY, ++ SEV_RET_INVALID_PAGE_OWNER, ++ SEV_RET_INVALID_PAGE_AEAD_OFLOW, ++ SEV_RET_RMP_INIT_REQUIRED, + SEV_RET_MAX, + } sev_ret_code; + +@@ -155,6 +164,56 @@ struct sev_user_data_get_id2 { + __u32 length; /* In/Out */ + } __attribute__((packed)); + ++/** ++ * struct sev_user_data_snp_status - SNP status ++ * ++ * @api_major: API major version ++ * @api_minor: API minor version ++ * @state: current platform state ++ * @is_rmp_initialized: whether RMP is initialized or not ++ * @rsvd: reserved ++ * @build_id: firmware build id for the API version ++ * @mask_chip_id: whether chip id is present in attestation reports or not ++ * @mask_chip_key: whether attestation reports are signed or not ++ * @vlek_en: VLEK (Version Loaded Endorsement Key) hashstick is loaded ++ * @rsvd1: reserved ++ * @guest_count: the number of guest currently managed by the firmware ++ * @current_tcb_version: current TCB version ++ * @reported_tcb_version: reported TCB version ++ */ ++struct sev_user_data_snp_status { ++ __u8 api_major; /* Out */ ++ __u8 api_minor; /* Out */ ++ __u8 state; /* Out */ ++ __u8 is_rmp_initialized:1; /* Out */ ++ __u8 rsvd:7; ++ __u32 build_id; /* Out */ ++ __u32 mask_chip_id:1; /* Out */ ++ __u32 mask_chip_key:1; /* Out */ ++ __u32 vlek_en:1; /* Out */ ++ __u32 rsvd1:29; ++ __u32 guest_count; /* Out */ ++ __u64 current_tcb_version; /* Out */ ++ __u64 reported_tcb_version; /* Out */ ++} __attribute__((packed)); ++ ++/** ++ * struct sev_user_data_snp_config - system wide configuration value for SNP. ++ * ++ * @reported_tcb: the TCB version to report in the guest attestation report. ++ * @mask_chip_id: whether chip id is present in attestation reports or not ++ * @mask_chip_key: whether attestation reports are signed or not ++ * @rsvd: reserved ++ * @rsvd1: reserved ++ */ ++struct sev_user_data_snp_config { ++ __u64 reported_tcb ; /* In */ ++ __u32 mask_chip_id:1; /* In */ ++ __u32 mask_chip_key:1; /* In */ ++ __u32 rsvd:30; /* In */ ++ __u8 rsvd1[52]; ++} __attribute__((packed)); ++ + /** + * struct sev_issue_cmd - SEV ioctl parameters + * +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index 649560c685..bea6973906 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -227,4 +227,11 @@ + */ + #define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ + struct vhost_vring_state) ++ ++/* Get the queue size of a specific virtqueue. ++ * userspace set the vring index in vhost_vring_state.index ++ * kernel set the queue size in vhost_vring_state.num ++ */ ++#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x80, \ ++ struct vhost_vring_state) + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-machine-allow-early-use-of-machine_require_guest_mem.patch b/SOURCES/kvm-machine-allow-early-use-of-machine_require_guest_mem.patch new file mode 100644 index 0000000..6524d6a --- /dev/null +++ b/SOURCES/kvm-machine-allow-early-use-of-machine_require_guest_mem.patch @@ -0,0 +1,71 @@ +From 9f485c8df885bcd1ff6c5692463c6168bfec07fb Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 31 May 2024 13:29:53 +0200 +Subject: [PATCH 054/100] machine: allow early use of + machine_require_guest_memfd + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [54/91] fd8c1a6624d5f27268215c8aa70dfc9d37bdb981 (bonzini/rhel-qemu-kvm) + +Ask the ConfidentialGuestSupport object whether to use guest_memfd +for KVM-backend private memory. This bool can be set in instance_init +(or user_complete) so that it is available when the machine is created. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit dc0d28ca46c0e7ee3c055ad4da24022995bd3765) +Signed-off-by: Paolo Bonzini +--- + hw/core/machine.c | 2 +- + include/exec/confidential-guest-support.h | 5 +++++ + include/hw/boards.h | 1 - + 3 files changed, 6 insertions(+), 2 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 07b994e136..2055e0d312 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -1482,7 +1482,7 @@ bool machine_mem_merge(MachineState *machine) + + bool machine_require_guest_memfd(MachineState *machine) + { +- return machine->require_guest_memfd; ++ return machine->cgs && machine->cgs->require_guest_memfd; + } + + static char *cpu_slot_to_string(const CPUArchId *cpu) +diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h +index e5b188cffb..02dc4e518f 100644 +--- a/include/exec/confidential-guest-support.h ++++ b/include/exec/confidential-guest-support.h +@@ -31,6 +31,11 @@ OBJECT_DECLARE_TYPE(ConfidentialGuestSupport, + struct ConfidentialGuestSupport { + Object parent; + ++ /* ++ * True if the machine should use guest_memfd for RAM. ++ */ ++ bool require_guest_memfd; ++ + /* + * ready: flag set by CGS initialization code once it's ready to + * start executing instructions in a potentially-secure +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 815a1c4b26..0d1f9533ef 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -373,7 +373,6 @@ struct MachineState { + char *dt_compatible; + bool dump_guest_core; + bool mem_merge; +- bool require_guest_memfd; + bool usb; + bool usb_disabled; + char *firmware; +-- +2.39.3 + diff --git a/SOURCES/kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch b/SOURCES/kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch new file mode 100644 index 0000000..538e82d --- /dev/null +++ b/SOURCES/kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch @@ -0,0 +1,85 @@ +From 331c58d87dde8b4757e1d1e09d9b16bac2952d22 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 30 May 2024 06:16:15 -0500 +Subject: [PATCH 081/100] memory: Introduce + memory_region_init_ram_guest_memfd() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [81/91] d5b0898d791f3f90d1acda0230f96ca9bf5be5e4 (bonzini/rhel-qemu-kvm) + +Introduce memory_region_init_ram_guest_memfd() to allocate private +guset memfd on the MemoryRegion initialization. It's for the use case of +TDVF, which must be private on TDX case. + +Signed-off-by: Xiaoyao Li +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-4-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a0aa6db7ce72a08703774107185e639e73e7754c) +Signed-off-by: Paolo Bonzini +--- + include/exec/memory.h | 6 ++++++ + system/memory.c | 24 ++++++++++++++++++++++++ + 2 files changed, 30 insertions(+) + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 679a847685..1e351f6fc8 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -1603,6 +1603,12 @@ bool memory_region_init_ram(MemoryRegion *mr, + uint64_t size, + Error **errp); + ++bool memory_region_init_ram_guest_memfd(MemoryRegion *mr, ++ Object *owner, ++ const char *name, ++ uint64_t size, ++ Error **errp); ++ + /** + * memory_region_init_rom: Initialize a ROM memory region. + * +diff --git a/system/memory.c b/system/memory.c +index c756950c0c..b09065eef3 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -3606,6 +3606,30 @@ bool memory_region_init_ram(MemoryRegion *mr, + return true; + } + ++bool memory_region_init_ram_guest_memfd(MemoryRegion *mr, ++ Object *owner, ++ const char *name, ++ uint64_t size, ++ Error **errp) ++{ ++ DeviceState *owner_dev; ++ ++ if (!memory_region_init_ram_flags_nomigrate(mr, owner, name, size, ++ RAM_GUEST_MEMFD, errp)) { ++ return false; ++ } ++ /* This will assert if owner is neither NULL nor a DeviceState. ++ * We only want the owner here for the purposes of defining a ++ * unique name for migration. TODO: Ideally we should implement ++ * a naming scheme for Objects which are not DeviceStates, in ++ * which case we can relax this restriction. ++ */ ++ owner_dev = DEVICE(owner); ++ vmstate_register_ram(mr, owner_dev); ++ ++ return true; ++} ++ + bool memory_region_init_rom(MemoryRegion *mr, + Object *owner, + const char *name, +-- +2.39.3 + diff --git a/SOURCES/kvm-memory-device-reintroduce-memory-region-size-check.patch b/SOURCES/kvm-memory-device-reintroduce-memory-region-size-check.patch deleted file mode 100644 index 5b531f5..0000000 --- a/SOURCES/kvm-memory-device-reintroduce-memory-region-size-check.patch +++ /dev/null @@ -1,112 +0,0 @@ -From 633c6a52ac88526534466ae311522fe5447bcf91 Mon Sep 17 00:00:00 2001 -From: David Hildenbrand -Date: Wed, 17 Jan 2024 14:55:54 +0100 -Subject: [PATCH 02/22] memory-device: reintroduce memory region size check - -RH-Author: David Hildenbrand -RH-MergeRequest: 221: memory-device: reintroduce memory region size check -RH-Jira: RHEL-20341 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Igor Mammedov -RH-Commit: [2/2] e9ff2339b0c07c3f48f5834c9c80cd6d4cbc8f71 - -JIRA: https://issues.redhat.com/browse/RHEL-20341 - -We used to check that the memory region size is multiples of the overall -requested address alignment for the device memory address. - -We removed that check, because there are cases (i.e., hv-balloon) where -devices unconditionally request an address alignment that has a very large -alignment (i.e., 32 GiB), but the actual memory device size might not be -multiples of that alignment. - -However, this change: - -(a) allows for some practically impossible DIMM sizes, like "1GB+1 byte". -(b) allows for DIMMs that partially cover hugetlb pages, previously - reported in [1]. - -Both scenarios don't make any sense: we might even waste memory. - -So let's reintroduce that check, but only check that the -memory region size is multiples of the memory region alignment (i.e., -page size, huge page size), but not any additional memory device -requirements communicated using md->get_min_alignment(). - -The following examples now fail again as expected: - -(a) 1M with 2M THP - qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \ - -object memory-backend-ram,id=mem1,size=1M \ - -device pc-dimm,id=dimm1,memdev=mem1 - -> backend memory size must be multiple of 0x200000 - -(b) 1G+1byte - - qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \ - -object memory-backend-ram,id=mem1,size=1073741825B \ - -device pc-dimm,id=dimm1,memdev=mem1 - -> backend memory size must be multiple of 0x200000 - -(c) Unliagned hugetlb size (2M) - - qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \ - -object memory-backend-file,id=mem1,mem-path=/dev/hugepages/tmp,size=511M \ - -device pc-dimm,id=dimm1,memdev=mem1 - backend memory size must be multiple of 0x200000 - -(d) Unliagned hugetlb size (1G) - - qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \ - -object memory-backend-file,id=mem1,mem-path=/dev/hugepages1G/tmp,size=2047M \ - -device pc-dimm,id=dimm1,memdev=mem1 - -> backend memory size must be multiple of 0x40000000 - -Note that this fix depends on a hv-balloon change to communicate its -additional alignment requirements using get_min_alignment() instead of -through the memory region. - -[1] https://lkml.kernel.org/r/f77d641d500324525ac036fe1827b3070de75fc1.1701088320.git.mprivozn@redhat.com - -Message-ID: <20240117135554.787344-3-david@redhat.com> -Reported-by: Zhenyu Zhang -Reported-by: Michal Privoznik -Fixes: eb1b7c4bd413 ("memory-device: Drop size alignment check") -Tested-by: Zhenyu Zhang -Tested-by: Mario Casquero -Reviewed-by: Maciej S. Szmigiero -Signed-off-by: David Hildenbrand -(cherry picked from commit 540a1abbf0b243e4cfb4333c5d30a041f7080ba4) -Signed-off-by: David Hildenbrand ---- - hw/mem/memory-device.c | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c -index a1b1af26bc..e098585cda 100644 ---- a/hw/mem/memory-device.c -+++ b/hw/mem/memory-device.c -@@ -374,6 +374,20 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms, - goto out; - } - -+ /* -+ * We always want the memory region size to be multiples of the memory -+ * region alignment: for example, DIMMs with 1G+1byte size don't make -+ * any sense. Note that we don't check that the size is multiples -+ * of any additional alignment requirements the memory device might -+ * have when it comes to the address in physical address space. -+ */ -+ if (!QEMU_IS_ALIGNED(memory_region_size(mr), -+ memory_region_get_alignment(mr))) { -+ error_setg(errp, "backend memory size must be multiple of 0x%" -+ PRIx64, memory_region_get_alignment(mr)); -+ return; -+ } -+ - if (legacy_align) { - align = *legacy_align; - } else { --- -2.39.3 - diff --git a/SOURCES/kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch b/SOURCES/kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch deleted file mode 100644 index 871a80e..0000000 --- a/SOURCES/kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 21cadc8ed200ad9af13b217b441b9f12cd2163ee Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 13 Mar 2024 16:30:00 +0100 -Subject: [PATCH 1/4] mirror: Don't call job_pause_point() under graph lock - -RH-Author: Kevin Wolf -RH-MergeRequest: 231: Fix deadlock and crash during storage migration -RH-Jira: RHEL-28125 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/3] 093910fcd5ea1516d0ed48a9cd73dad6170590f3 (kmwolf/centos-qemu-kvm) - -Calling job_pause_point() while holding the graph reader lock -potentially results in a deadlock: bdrv_graph_wrlock() first drains -everything, including the mirror job, which pauses it. The job is only -unpaused at the end of the drain section, which is when the graph writer -lock has been successfully taken. However, if the job happens to be -paused at a pause point where it still holds the reader lock, the writer -lock can't be taken as long as the job is still paused. - -Mark job_pause_point() as GRAPH_UNLOCKED and fix mirror accordingly. - -Cc: qemu-stable@nongnu.org -Buglink: https://issues.redhat.com/browse/RHEL-28125 -Fixes: 004915a96a7a ("block: Protect bs->backing with graph_lock") -Signed-off-by: Kevin Wolf -Message-ID: <20240313153000.33121-1-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit ae5a40e8581185654a667fbbf7e4adbc2a2a3e45) -Signed-off-by: Kevin Wolf ---- - block/mirror.c | 10 ++++++---- - include/qemu/job.h | 2 +- - 2 files changed, 7 insertions(+), 5 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index 5145eb53e1..1bdce3b657 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -479,9 +479,9 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, - return bytes_handled; - } - --static void coroutine_fn GRAPH_RDLOCK mirror_iteration(MirrorBlockJob *s) -+static void coroutine_fn GRAPH_UNLOCKED mirror_iteration(MirrorBlockJob *s) - { -- BlockDriverState *source = s->mirror_top_bs->backing->bs; -+ BlockDriverState *source; - MirrorOp *pseudo_op; - int64_t offset; - /* At least the first dirty chunk is mirrored in one iteration. */ -@@ -489,6 +489,10 @@ static void coroutine_fn GRAPH_RDLOCK mirror_iteration(MirrorBlockJob *s) - bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)); - int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES); - -+ bdrv_graph_co_rdlock(); -+ source = s->mirror_top_bs->backing->bs; -+ bdrv_graph_co_rdunlock(); -+ - bdrv_dirty_bitmap_lock(s->dirty_bitmap); - offset = bdrv_dirty_iter_next(s->dbi); - if (offset < 0) { -@@ -1066,9 +1070,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) - mirror_wait_for_free_in_flight_slot(s); - continue; - } else if (cnt != 0) { -- bdrv_graph_co_rdlock(); - mirror_iteration(s); -- bdrv_graph_co_rdunlock(); - } - } - -diff --git a/include/qemu/job.h b/include/qemu/job.h -index 9ea98b5927..2b873f2576 100644 ---- a/include/qemu/job.h -+++ b/include/qemu/job.h -@@ -483,7 +483,7 @@ void job_enter(Job *job); - * - * Called with job_mutex *not* held. - */ --void coroutine_fn job_pause_point(Job *job); -+void coroutine_fn GRAPH_UNLOCKED job_pause_point(Job *job); - - /** - * @job: The job that calls the function. --- -2.39.3 - diff --git a/SOURCES/kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch b/SOURCES/kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch deleted file mode 100644 index 345a2b4..0000000 --- a/SOURCES/kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch +++ /dev/null @@ -1,1630 +0,0 @@ -From 972e553e605e8916fc47c2d51cdbde940fd7d855 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 18 Jan 2024 09:48:23 -0500 -Subject: [PATCH 13/22] monitor: only run coroutine commands in - qemu_aio_context - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [9/17] ec5690fcade04a88bd1815bf2ae0377e80fe3d51 (stefanha/centos-stream-qemu-kvm) - -monitor_qmp_dispatcher_co() runs in the iohandler AioContext that is not -polled during nested event loops. The coroutine currently reschedules -itself in the main loop's qemu_aio_context AioContext, which is polled -during nested event loops. One known problem is that QMP device-add -calls drain_call_rcu(), which temporarily drops the BQL, leading to all -sorts of havoc like other vCPU threads re-entering device emulation code -while another vCPU thread is waiting in device emulation code with -aio_poll(). - -Paolo Bonzini suggested running non-coroutine QMP handlers in the -iohandler AioContext. This avoids trouble with nested event loops. His -original idea was to move coroutine rescheduling to -monitor_qmp_dispatch(), but I resorted to moving it to qmp_dispatch() -because we don't know if the QMP handler needs to run in coroutine -context in monitor_qmp_dispatch(). monitor_qmp_dispatch() would have -been nicer since it's associated with the monitor implementation and not -as general as qmp_dispatch(), which is also used by qemu-ga. - -A number of qemu-iotests need updated .out files because the order of -QMP events vs QMP responses has changed. - -Solves Issue #1933. - -Cc: qemu-stable@nongnu.org -Fixes: 7bed89958bfbf40df9ca681cefbdca63abdde39d ("device_core: use drain_call_rcu in in qmp_device_add") -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2215192 -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2214985 -Buglink: https://issues.redhat.com/browse/RHEL-17369 -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240118144823.1497953-4-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Tested-by: Fiona Ebner -Signed-off-by: Kevin Wolf -(cherry picked from commit effd60c878176bcaf97fa7ce2b12d04bb8ead6f7) -Signed-off-by: Stefan Hajnoczi ---- - monitor/qmp.c | 17 ---- - qapi/qmp-dispatch.c | 24 +++++- - tests/qemu-iotests/060.out | 4 +- - tests/qemu-iotests/071.out | 4 +- - tests/qemu-iotests/081.out | 16 ++-- - tests/qemu-iotests/087.out | 12 +-- - tests/qemu-iotests/108.out | 2 +- - tests/qemu-iotests/109 | 4 +- - tests/qemu-iotests/109.out | 78 ++++++++----------- - tests/qemu-iotests/117.out | 2 +- - tests/qemu-iotests/120.out | 2 +- - tests/qemu-iotests/127.out | 2 +- - tests/qemu-iotests/140.out | 2 +- - tests/qemu-iotests/143.out | 2 +- - tests/qemu-iotests/156.out | 2 +- - tests/qemu-iotests/176.out | 16 ++-- - tests/qemu-iotests/182.out | 2 +- - tests/qemu-iotests/183.out | 4 +- - tests/qemu-iotests/184.out | 32 ++++---- - tests/qemu-iotests/185 | 6 +- - tests/qemu-iotests/185.out | 45 +++++++++-- - tests/qemu-iotests/191.out | 16 ++-- - tests/qemu-iotests/195.out | 16 ++-- - tests/qemu-iotests/223.out | 12 +-- - tests/qemu-iotests/227.out | 32 ++++---- - tests/qemu-iotests/247.out | 2 +- - tests/qemu-iotests/273.out | 8 +- - tests/qemu-iotests/308 | 4 +- - tests/qemu-iotests/308.out | 4 +- - tests/qemu-iotests/tests/file-io-error | 5 +- - tests/qemu-iotests/tests/iothreads-resize.out | 2 +- - tests/qemu-iotests/tests/qsd-jobs.out | 4 +- - 32 files changed, 205 insertions(+), 178 deletions(-) - -diff --git a/monitor/qmp.c b/monitor/qmp.c -index 6eee450fe4..a239945e8d 100644 ---- a/monitor/qmp.c -+++ b/monitor/qmp.c -@@ -321,14 +321,6 @@ void coroutine_fn monitor_qmp_dispatcher_co(void *data) - qemu_coroutine_yield(); - } - -- /* -- * Move the coroutine from iohandler_ctx to qemu_aio_context for -- * executing the command handler so that it can make progress if it -- * involves an AIO_WAIT_WHILE(). -- */ -- aio_co_schedule(qemu_get_aio_context(), qmp_dispatcher_co); -- qemu_coroutine_yield(); -- - /* Process request */ - if (req_obj->req) { - if (trace_event_get_state(TRACE_MONITOR_QMP_CMD_IN_BAND)) { -@@ -355,15 +347,6 @@ void coroutine_fn monitor_qmp_dispatcher_co(void *data) - } - - qmp_request_free(req_obj); -- -- /* -- * Yield and reschedule so the main loop stays responsive. -- * -- * Move back to iohandler_ctx so that nested event loops for -- * qemu_aio_context don't start new monitor commands. -- */ -- aio_co_schedule(iohandler_get_aio_context(), qmp_dispatcher_co); -- qemu_coroutine_yield(); - } - qatomic_set(&qmp_dispatcher_co, NULL); - } -diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c -index 555528b6bb..176b549473 100644 ---- a/qapi/qmp-dispatch.c -+++ b/qapi/qmp-dispatch.c -@@ -206,9 +206,31 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ - assert(!(oob && qemu_in_coroutine())); - assert(monitor_cur() == NULL); - if (!!(cmd->options & QCO_COROUTINE) == qemu_in_coroutine()) { -+ if (qemu_in_coroutine()) { -+ /* -+ * Move the coroutine from iohandler_ctx to qemu_aio_context for -+ * executing the command handler so that it can make progress if it -+ * involves an AIO_WAIT_WHILE(). -+ */ -+ aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self()); -+ qemu_coroutine_yield(); -+ } -+ - monitor_set_cur(qemu_coroutine_self(), cur_mon); - cmd->fn(args, &ret, &err); - monitor_set_cur(qemu_coroutine_self(), NULL); -+ -+ if (qemu_in_coroutine()) { -+ /* -+ * Yield and reschedule so the main loop stays responsive. -+ * -+ * Move back to iohandler_ctx so that nested event loops for -+ * qemu_aio_context don't start new monitor commands. -+ */ -+ aio_co_schedule(iohandler_get_aio_context(), -+ qemu_coroutine_self()); -+ qemu_coroutine_yield(); -+ } - } else { - /* - * Actual context doesn't match the one the command needs. -@@ -232,7 +254,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ - .errp = &err, - .co = qemu_coroutine_self(), - }; -- aio_bh_schedule_oneshot(qemu_get_aio_context(), do_qmp_dispatch_bh, -+ aio_bh_schedule_oneshot(iohandler_get_aio_context(), do_qmp_dispatch_bh, - &data); - qemu_coroutine_yield(); - } -diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out -index 329977d9b9..a37bf446e9 100644 ---- a/tests/qemu-iotests/060.out -+++ b/tests/qemu-iotests/060.out -@@ -421,8 +421,8 @@ QMP_VERSION - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_IMAGE_CORRUPTED", "data": {"device": "none0", "msg": "Preventing invalid write on metadata (overlaps with refcount table)", "offset": 65536, "node-name": "drive", "fatal": true, "size": 65536}} - write failed: Input/output error - {"return": ""} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - === Testing incoming inactive corrupted image === - -@@ -432,8 +432,8 @@ QMP_VERSION - qcow2: Image is corrupt: L2 table offset 0x2a2a2a00 unaligned (L1 index: 0); further non-fatal corruption events will be suppressed - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_IMAGE_CORRUPTED", "data": {"device": "", "msg": "L2 table offset 0x2a2a2a00 unaligned (L1 index: 0)", "node-name": "drive", "fatal": false}} - {"return": ""} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - corrupt: false - *** done -diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out -index bca0c02f5c..a2923b05c2 100644 ---- a/tests/qemu-iotests/071.out -+++ b/tests/qemu-iotests/071.out -@@ -45,8 +45,8 @@ QMP_VERSION - {"return": {}} - read failed: Input/output error - {"return": ""} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - - === Testing blkverify on existing block device === -@@ -84,9 +84,9 @@ wrote 512/512 bytes at offset 0 - {"return": ""} - read failed: Input/output error - {"return": ""} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - QEMU_PROG: Failed to flush the L2 table cache: Input/output error - QEMU_PROG: Failed to flush the refcount block cache: Input/output error -+{"return": {}} - - *** done -diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out -index 615c083549..aba85ea564 100644 ---- a/tests/qemu-iotests/081.out -+++ b/tests/qemu-iotests/081.out -@@ -35,8 +35,8 @@ QMP_VERSION - read 10485760/10485760 bytes at offset 0 - 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - {"return": ""} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - - == using quorum rewrite corrupted mode == -@@ -67,8 +67,8 @@ QMP_VERSION - read 10485760/10485760 bytes at offset 0 - 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - {"return": ""} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - -- checking that the image has been corrected -- - read 10485760/10485760 bytes at offset 0 -@@ -106,8 +106,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - Testing: - QMP_VERSION -@@ -115,8 +115,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"error": {"class": "GenericError", "desc": "Cannot add a child to a quorum in blkverify mode"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - - == dynamically removing a child from a quorum == -@@ -125,31 +125,31 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - Testing: - QMP_VERSION - {"return": {}} - {"return": {}} - {"error": {"class": "GenericError", "desc": "The number of children cannot be lower than the vote threshold 2"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - Testing: - QMP_VERSION - {"return": {}} - {"error": {"class": "GenericError", "desc": "blkverify=on can only be set if there are exactly two files and vote-threshold is 2"}} - {"error": {"class": "GenericError", "desc": "Cannot find device='drive0-quorum' nor node-name='drive0-quorum'"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - Testing: - QMP_VERSION - {"return": {}} - {"return": {}} - {"error": {"class": "GenericError", "desc": "The number of children cannot be lower than the vote threshold 2"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - *** done -diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out -index e1c23a6983..97b6d8036d 100644 ---- a/tests/qemu-iotests/087.out -+++ b/tests/qemu-iotests/087.out -@@ -7,8 +7,8 @@ Testing: - QMP_VERSION - {"return": {}} - {"error": {"class": "GenericError", "desc": "'node-name' must be specified for the root node"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - - === Duplicate ID === -@@ -18,8 +18,8 @@ QMP_VERSION - {"return": {}} - {"error": {"class": "GenericError", "desc": "node-name=disk is conflicting with a device id"}} - {"error": {"class": "GenericError", "desc": "Duplicate nodes with node-name='test-node'"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - - === aio=native without O_DIRECT === -@@ -28,8 +28,8 @@ Testing: - QMP_VERSION - {"return": {}} - {"error": {"class": "GenericError", "desc": "aio=native was specified, but it requires cache.direct=on, which was not specified."}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - - === Encrypted image QCow === -@@ -40,8 +40,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"error": {"class": "GenericError", "desc": "Use of AES-CBC encrypted IMGFMT images is no longer supported in system emulators"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - - === Encrypted image LUKS === -@@ -52,8 +52,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - - === Missing driver === -@@ -63,7 +63,7 @@ Testing: -S - QMP_VERSION - {"return": {}} - {"error": {"class": "GenericError", "desc": "Parameter 'driver' is missing"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - *** done -diff --git a/tests/qemu-iotests/108.out b/tests/qemu-iotests/108.out -index b5401d788d..b9c876b394 100644 ---- a/tests/qemu-iotests/108.out -+++ b/tests/qemu-iotests/108.out -@@ -173,8 +173,8 @@ OK: Reftable is where we expect it - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "create"}} - {"return": {}} - { "execute": "quit" } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - wrote 65536/65536 bytes at offset 0 - 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -diff --git a/tests/qemu-iotests/109 b/tests/qemu-iotests/109 -index e207a555f3..0fb580f9a5 100755 ---- a/tests/qemu-iotests/109 -+++ b/tests/qemu-iotests/109 -@@ -57,13 +57,13 @@ run_qemu() - _launch_qemu -drive file="${source_img}",format=raw,cache=${CACHEMODE},aio=${AIOMODE},id=src - _send_qemu_cmd $QEMU_HANDLE "{ 'execute': 'qmp_capabilities' }" "return" - -- _send_qemu_cmd $QEMU_HANDLE \ -+ capture_events="$qmp_event" _send_qemu_cmd $QEMU_HANDLE \ - "{'execute':'drive-mirror', 'arguments':{ - 'device': 'src', 'target': '$raw_img', $qmp_format - 'mode': 'existing', 'sync': 'full'}}" \ - "return" - -- _send_qemu_cmd $QEMU_HANDLE '' "$qmp_event" -+ capture_events="$qmp_event JOB_STATUS_CHANGE" _wait_event $QEMU_HANDLE "$qmp_event" - if test "$qmp_event" = BLOCK_JOB_ERROR; then - _send_qemu_cmd $QEMU_HANDLE '' '"status": "null"' - fi -diff --git a/tests/qemu-iotests/109.out b/tests/qemu-iotests/109.out -index 965c9a6a0a..3ae8552ff7 100644 ---- a/tests/qemu-iotests/109.out -+++ b/tests/qemu-iotests/109.out -@@ -7,7 +7,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -23,8 +23,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -35,12 +35,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 1024, "offset": 1024, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -50,6 +48,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Writing a qcow2 header into raw === -@@ -59,7 +58,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -75,8 +74,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -87,12 +86,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 197120, "offset": 197120, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 197120, "offset": 197120, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -102,6 +99,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 197120, "offset": 197120, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Writing a qed header into raw === -@@ -111,7 +109,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -127,8 +125,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -139,12 +137,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 327680, "offset": 327680, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -154,6 +150,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Writing a vdi header into raw === -@@ -163,7 +160,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -179,8 +176,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -191,12 +188,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 1024, "offset": 1024, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -206,6 +201,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Writing a vmdk header into raw === -@@ -215,7 +211,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -231,8 +227,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -243,12 +239,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 65536, "offset": 65536, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 65536, "offset": 65536, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -258,6 +252,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 65536, "offset": 65536, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Writing a vpc header into raw === -@@ -267,7 +262,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -283,8 +278,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -295,12 +290,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 2560, "offset": 2560, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -310,6 +303,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Copying sample image empty.bochs into raw === -@@ -318,7 +312,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -334,8 +328,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -346,12 +340,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 2560, "offset": 2560, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -361,6 +353,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Copying sample image iotest-dirtylog-10G-4M.vhdx into raw === -@@ -369,7 +362,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -385,8 +378,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -397,12 +390,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 31457280, "offset": 31457280, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 31457280, "offset": 31457280, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -412,6 +403,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 31457280, "offset": 31457280, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Copying sample image parallels-v1 into raw === -@@ -420,7 +412,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -436,8 +428,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -448,12 +440,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 327680, "offset": 327680, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -463,6 +453,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Copying sample image simple-pattern.cloop into raw === -@@ -471,7 +462,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -487,8 +478,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -499,12 +490,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 2048, "offset": 2048, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 2048, "offset": 2048, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -514,6 +503,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2048, "offset": 2048, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Write legitimate MBR into raw === -@@ -522,7 +512,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -530,12 +520,10 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 512, "offset": 512, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -545,6 +533,7 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - { 'execute': 'qmp_capabilities' } - {"return": {}} -@@ -554,12 +543,10 @@ Images are identical. - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 512, "offset": 512, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -569,5 +556,6 @@ Images are identical. - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - *** done -diff --git a/tests/qemu-iotests/117.out b/tests/qemu-iotests/117.out -index 735ffd25c6..1cea9e0217 100644 ---- a/tests/qemu-iotests/117.out -+++ b/tests/qemu-iotests/117.out -@@ -18,8 +18,8 @@ wrote 65536/65536 bytes at offset 0 - 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - {"return": ""} - { 'execute': 'quit' } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - No errors were found on the image. - read 65536/65536 bytes at offset 0 - 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -diff --git a/tests/qemu-iotests/120.out b/tests/qemu-iotests/120.out -index 0744c1f136..35d84a5bc5 100644 ---- a/tests/qemu-iotests/120.out -+++ b/tests/qemu-iotests/120.out -@@ -5,8 +5,8 @@ QMP_VERSION - wrote 65536/65536 bytes at offset 0 - 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - {"return": ""} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 65536/65536 bytes at offset 0 - 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - read 65536/65536 bytes at offset 0 -diff --git a/tests/qemu-iotests/127.out b/tests/qemu-iotests/127.out -index 1685c4850a..dd8c4a8aa9 100644 ---- a/tests/qemu-iotests/127.out -+++ b/tests/qemu-iotests/127.out -@@ -28,6 +28,6 @@ wrote 42/42 bytes at offset 0 - { 'execute': 'quit' } - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "mirror"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - *** done -diff --git a/tests/qemu-iotests/140.out b/tests/qemu-iotests/140.out -index 312f76d5da..32866440ae 100644 ---- a/tests/qemu-iotests/140.out -+++ b/tests/qemu-iotests/140.out -@@ -19,6 +19,6 @@ read 65536/65536 bytes at offset 0 - qemu-io: can't open device nbd+unix:///drv?socket=SOCK_DIR/nbd: Requested export not available - server reported: export 'drv' not present - { 'execute': 'quit' } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - *** done -diff --git a/tests/qemu-iotests/143.out b/tests/qemu-iotests/143.out -index 9ec5888e0e..d6afa32abc 100644 ---- a/tests/qemu-iotests/143.out -+++ b/tests/qemu-iotests/143.out -@@ -10,6 +10,6 @@ server reported: export 'no_such_export' not present - qemu-io: can't open device nbd+unix:///aa--aa1?socket=SOCK_DIR/nbd: Requested export not available - server reported: export 'aa--aa...' not present - { 'execute': 'quit' } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - *** done -diff --git a/tests/qemu-iotests/156.out b/tests/qemu-iotests/156.out -index 4a22f0c41a..07e5e83f5d 100644 ---- a/tests/qemu-iotests/156.out -+++ b/tests/qemu-iotests/156.out -@@ -72,8 +72,8 @@ read 65536/65536 bytes at offset 196608 - {"return": ""} - - { 'execute': 'quit' } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - read 65536/65536 bytes at offset 0 - 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -diff --git a/tests/qemu-iotests/176.out b/tests/qemu-iotests/176.out -index 9d09b60452..45e9153ef3 100644 ---- a/tests/qemu-iotests/176.out -+++ b/tests/qemu-iotests/176.out -@@ -169,8 +169,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - wrote 196608/196608 bytes at offset 2147287040 - 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - wrote 131072/131072 bytes at offset 2147352576 -@@ -206,8 +206,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {"sha256": HASH}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - === Test pass bitmap.1 === - -@@ -218,8 +218,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - wrote 196608/196608 bytes at offset 2147287040 - 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - wrote 131072/131072 bytes at offset 2147352576 -@@ -256,8 +256,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {"sha256": HASH}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - === Test pass bitmap.2 === - -@@ -268,8 +268,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - wrote 196608/196608 bytes at offset 2147287040 - 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - wrote 131072/131072 bytes at offset 2147352576 -@@ -306,8 +306,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {"sha256": HASH}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - === Test pass bitmap.3 === - -@@ -318,8 +318,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - wrote 196608/196608 bytes at offset 2147287040 - 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - wrote 131072/131072 bytes at offset 2147352576 -@@ -353,6 +353,6 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {"sha256": HASH}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - *** done -diff --git a/tests/qemu-iotests/182.out b/tests/qemu-iotests/182.out -index 57f7265458..83fc1a4797 100644 ---- a/tests/qemu-iotests/182.out -+++ b/tests/qemu-iotests/182.out -@@ -53,6 +53,6 @@ Formatting 'TEST_DIR/t.qcow2.overlay', fmt=qcow2 cluster_size=65536 extended_l2= - {'execute': 'qmp_capabilities'} - {"return": {}} - {'execute': 'quit'} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - *** done -diff --git a/tests/qemu-iotests/183.out b/tests/qemu-iotests/183.out -index fd9c2e52a5..51aa41c888 100644 ---- a/tests/qemu-iotests/183.out -+++ b/tests/qemu-iotests/183.out -@@ -53,11 +53,11 @@ wrote 65536/65536 bytes at offset 1048576 - === Shut down and check image === - - {"execute":"quit"} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"return": {}} - {"execute":"quit"} --{"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - No errors were found on the image. - No errors were found on the image. - wrote 65536/65536 bytes at offset 1048576 -diff --git a/tests/qemu-iotests/184.out b/tests/qemu-iotests/184.out -index 77e5489d65..e8f631f853 100644 ---- a/tests/qemu-iotests/184.out -+++ b/tests/qemu-iotests/184.out -@@ -89,10 +89,6 @@ Testing: - "return": [ - ] - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -104,6 +100,10 @@ Testing: - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - - == property changes in ThrottleGroup == -@@ -169,10 +169,6 @@ Testing: - "iops-total-max": 0 - } - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -184,6 +180,10 @@ Testing: - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - - == object creation/set errors == -@@ -211,10 +211,6 @@ Testing: - "desc": "bps/iops/max total values and read/write values cannot be used at the same time" - } - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -226,6 +222,10 @@ Testing: - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - - == don't specify group == -@@ -247,10 +247,6 @@ Testing: - "desc": "Parameter 'throttle-group' is missing" - } - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -262,6 +258,10 @@ Testing: - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - - *** done -diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185 -index 2ae0a85bbf..17489fb91c 100755 ---- a/tests/qemu-iotests/185 -+++ b/tests/qemu-iotests/185 -@@ -344,14 +344,14 @@ wait_for_job_and_quit() { - - sleep 1 - -+ # List of expected events -+ capture_events='BLOCK_JOB_CANCELLED JOB_STATUS_CHANGE SHUTDOWN' -+ - _send_qemu_cmd $h \ - '{"execute": "quit"}' \ - 'return' - -- # List of expected events -- capture_events='BLOCK_JOB_CANCELLED JOB_STATUS_CHANGE SHUTDOWN' - _wait_event $h 'SHUTDOWN' -- QEMU_EVENTS= # Ignore all JOB_STATUS_CHANGE events that came before SHUTDOWN - _wait_event $h 'JOB_STATUS_CHANGE' # standby - _wait_event $h 'JOB_STATUS_CHANGE' # ready - _wait_event $h 'JOB_STATUS_CHANGE' # standby -diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out -index 7292c26bae..6af0953c4d 100644 ---- a/tests/qemu-iotests/185.out -+++ b/tests/qemu-iotests/185.out -@@ -40,9 +40,16 @@ Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off comp - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} - {"return": {}} - { 'execute': 'quit' } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 524288, "speed": 65536, "type": "commit"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} -+{"return": {}} - - === Start active commit job and exit qemu === - -@@ -56,9 +63,16 @@ Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off comp - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} - {"return": {}} - { 'execute': 'quit' } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 4194304, "offset": 4194304, "speed": 65536, "type": "commit"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} -+{"return": {}} - - === Start mirror job and exit qemu === - -@@ -75,9 +89,16 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 extended_l2=off - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} - {"return": {}} - { 'execute': 'quit' } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 4194304, "offset": 4194304, "speed": 65536, "type": "mirror"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} -+{"return": {}} - - === Start backup job and exit qemu === - -@@ -97,9 +118,16 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 extended_l2=off - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} - {"return": {}} - { 'execute': 'quit' } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 65536, "speed": 65536, "type": "backup"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} -+{"return": {}} - - === Start streaming job and exit qemu === - -@@ -112,9 +140,16 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 extended_l2=off - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} - {"return": {}} - { 'execute': 'quit' } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 524288, "speed": 65536, "type": "stream"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} -+{"return": {}} - No errors were found on the image. - - === Start mirror to throttled QSD and exit qemu === -diff --git a/tests/qemu-iotests/191.out b/tests/qemu-iotests/191.out -index ea88777374..c3309e4bc6 100644 ---- a/tests/qemu-iotests/191.out -+++ b/tests/qemu-iotests/191.out -@@ -378,10 +378,6 @@ wrote 65536/65536 bytes at offset 1048576 - ] - } - { 'execute': 'quit' } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -393,6 +389,10 @@ wrote 65536/65536 bytes at offset 1048576 - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - image: TEST_DIR/t.IMGFMT - file format: IMGFMT - virtual size: 64 MiB (67108864 bytes) -@@ -796,10 +796,6 @@ wrote 65536/65536 bytes at offset 1048576 - ] - } - { 'execute': 'quit' } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -811,6 +807,10 @@ wrote 65536/65536 bytes at offset 1048576 - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - image: TEST_DIR/t.IMGFMT - file format: IMGFMT - virtual size: 64 MiB (67108864 bytes) -diff --git a/tests/qemu-iotests/195.out b/tests/qemu-iotests/195.out -index ec84df5012..91717d302e 100644 ---- a/tests/qemu-iotests/195.out -+++ b/tests/qemu-iotests/195.out -@@ -17,10 +17,6 @@ Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,backing.node-name=mid - "return": { - } - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -32,6 +28,10 @@ Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,backing.node-name=mid - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - image: TEST_DIR/t.IMGFMT.mid - file format: IMGFMT -@@ -55,10 +55,6 @@ Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,node-name=top - "return": { - } - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -70,6 +66,10 @@ Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,node-name=top - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - image: TEST_DIR/t.IMGFMT - file format: IMGFMT -diff --git a/tests/qemu-iotests/223.out b/tests/qemu-iotests/223.out -index e5e7f42caa..5f5b42e2dc 100644 ---- a/tests/qemu-iotests/223.out -+++ b/tests/qemu-iotests/223.out -@@ -11,8 +11,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - - === Write part of the file under active bitmap === -@@ -145,14 +145,14 @@ read 2097152/2097152 bytes at offset 2097152 - - {"execute":"nbd-server-remove", - "arguments":{"name":"n"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n"}} - {"return": {}} - {"execute":"nbd-server-remove", - "arguments":{"name":"n2"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n2"}} - {"return": {}} - {"execute":"nbd-server-remove", - "arguments":{"name":"n2"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n2"}} - {"error": {"class": "GenericError", "desc": "Export 'n2' is not found"}} - {"execute":"nbd-server-stop"} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n3"}} -@@ -267,14 +267,14 @@ read 2097152/2097152 bytes at offset 2097152 - - {"execute":"nbd-server-remove", - "arguments":{"name":"n"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n"}} - {"return": {}} - {"execute":"nbd-server-remove", - "arguments":{"name":"n2"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n2"}} - {"return": {}} - {"execute":"nbd-server-remove", - "arguments":{"name":"n2"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n2"}} - {"error": {"class": "GenericError", "desc": "Export 'n2' is not found"}} - {"execute":"nbd-server-stop"} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n3"}} -@@ -282,8 +282,8 @@ read 2097152/2097152 bytes at offset 2097152 - {"execute":"nbd-server-stop"} - {"error": {"class": "GenericError", "desc": "NBD server not running"}} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - === Use qemu-nbd as server === - -diff --git a/tests/qemu-iotests/227.out b/tests/qemu-iotests/227.out -index a947b1a87d..d6a1d4ecb6 100644 ---- a/tests/qemu-iotests/227.out -+++ b/tests/qemu-iotests/227.out -@@ -54,10 +54,6 @@ Testing: -drive driver=null-co,read-zeroes=on,if=virtio - } - ] - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -69,6 +65,10 @@ Testing: -drive driver=null-co,read-zeroes=on,if=virtio - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - - === blockstats with -drive if=none === -@@ -124,10 +124,6 @@ Testing: -drive driver=null-co,if=none - } - ] - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -139,6 +135,10 @@ Testing: -drive driver=null-co,if=none - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - - === blockstats with -blockdev === -@@ -155,10 +155,6 @@ Testing: -blockdev driver=null-co,node-name=null - "return": [ - ] - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -170,6 +166,10 @@ Testing: -blockdev driver=null-co,node-name=null - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - - === blockstats with -blockdev and -device === -@@ -226,10 +226,6 @@ Testing: -blockdev driver=null-co,read-zeroes=on,node-name=null -device virtio-b - } - ] - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -241,5 +237,9 @@ Testing: -blockdev driver=null-co,read-zeroes=on,node-name=null -device virtio-b - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - *** done -diff --git a/tests/qemu-iotests/247.out b/tests/qemu-iotests/247.out -index e909e83994..7d252e7fe4 100644 ---- a/tests/qemu-iotests/247.out -+++ b/tests/qemu-iotests/247.out -@@ -17,6 +17,6 @@ QMP_VERSION - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 134217728, "offset": 134217728, "speed": 0, "type": "commit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - *** done -diff --git a/tests/qemu-iotests/273.out b/tests/qemu-iotests/273.out -index 6a74a8138b..71843f02de 100644 ---- a/tests/qemu-iotests/273.out -+++ b/tests/qemu-iotests/273.out -@@ -282,10 +282,6 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev - ] - } - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -297,5 +293,9 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - *** done -diff --git a/tests/qemu-iotests/308 b/tests/qemu-iotests/308 -index de12b2b1b9..ea81dc496a 100755 ---- a/tests/qemu-iotests/308 -+++ b/tests/qemu-iotests/308 -@@ -77,6 +77,7 @@ fuse_export_add() - # $1: Export ID - fuse_export_del() - { -+ capture_events="BLOCK_EXPORT_DELETED" \ - _send_qemu_cmd $QEMU_HANDLE \ - "{'execute': 'block-export-del', - 'arguments': { -@@ -84,8 +85,7 @@ fuse_export_del() - } }" \ - 'return' - -- _send_qemu_cmd $QEMU_HANDLE \ -- '' \ -+ _wait_event $QEMU_HANDLE \ - 'BLOCK_EXPORT_DELETED' - } - -diff --git a/tests/qemu-iotests/308.out b/tests/qemu-iotests/308.out -index d5767133b1..e5e233691d 100644 ---- a/tests/qemu-iotests/308.out -+++ b/tests/qemu-iotests/308.out -@@ -165,9 +165,9 @@ OK: Post-truncate image size is as expected - - === Tear down === - {'execute': 'quit'} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export-mp"}} -+{"return": {}} - - === Compare copy with original === - Images are identical. -@@ -201,9 +201,9 @@ wrote 67108864/67108864 bytes at offset 0 - read 67108864/67108864 bytes at offset 0 - 64 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - {'execute': 'quit'} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export"}} -+{"return": {}} - read 67108864/67108864 bytes at offset 0 - 64 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - *** done -diff --git a/tests/qemu-iotests/tests/file-io-error b/tests/qemu-iotests/tests/file-io-error -index 88ee5f670c..fb8db73b31 100755 ---- a/tests/qemu-iotests/tests/file-io-error -+++ b/tests/qemu-iotests/tests/file-io-error -@@ -99,13 +99,12 @@ echo - $QEMU_IO -f file -c 'write 0 64M' "$TEST_DIR/fuse-export" | _filter_qemu_io - echo - --_send_qemu_cmd $QEMU_HANDLE \ -+capture_events=BLOCK_EXPORT_DELETED _send_qemu_cmd $QEMU_HANDLE \ - "{'execute': 'block-export-del', - 'arguments': {'id': 'exp0'}}" \ - 'return' - --_send_qemu_cmd $QEMU_HANDLE \ -- '' \ -+_wait_event $QEMU_HANDLE \ - 'BLOCK_EXPORT_DELETED' - - _send_qemu_cmd $QEMU_HANDLE \ -diff --git a/tests/qemu-iotests/tests/iothreads-resize.out b/tests/qemu-iotests/tests/iothreads-resize.out -index 2ca5a9d964..2967ac8f0d 100644 ---- a/tests/qemu-iotests/tests/iothreads-resize.out -+++ b/tests/qemu-iotests/tests/iothreads-resize.out -@@ -3,8 +3,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 - QMP_VERSION - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - image: TEST_DIR/t.IMGFMT - file format: IMGFMT - virtual size: 128 MiB (134217728 bytes) -diff --git a/tests/qemu-iotests/tests/qsd-jobs.out b/tests/qemu-iotests/tests/qsd-jobs.out -index c1bc9b8356..aa6b6d1aef 100644 ---- a/tests/qemu-iotests/tests/qsd-jobs.out -+++ b/tests/qemu-iotests/tests/qsd-jobs.out -@@ -7,8 +7,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ - QMP_VERSION - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} -+{"return": {}} - - === Streaming can't get permission on base node === - -@@ -17,6 +17,6 @@ QMP_VERSION - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} - {"error": {"class": "GenericError", "desc": "Permission conflict on node 'fmt_base': permissions 'write' are both required by an unnamed block device (uses node 'fmt_base' as 'root' child) and unshared by stream job 'job0' (uses node 'fmt_base' as 'intermediate node' child)."}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export1"}} -+{"return": {}} - *** done --- -2.39.3 - diff --git a/SOURCES/kvm-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch b/SOURCES/kvm-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch new file mode 100644 index 0000000..f65d293 --- /dev/null +++ b/SOURCES/kvm-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch @@ -0,0 +1,101 @@ +From 6f60c86c5dd747ba68cb4a11084e7b021769e70b Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Thu, 22 Aug 2024 09:35:29 -0500 +Subject: [PATCH] nbd/server: CVE-2024-7409: Avoid use-after-free when closing + server + +RH-Author: Eric Blake +RH-MergeRequest: 266: nbd/server: CVE-2024-7409: Avoid use-after-free when closing server +RH-Jira: RHEL-52617 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Czenczek +RH-Commit: [1/1] e6e12c985cd13dd14336d98ab0719c789b5e914d (ebblake/centos-qemu-kvm) + +Commit 3e7ef738 plugged the use-after-free of the global nbd_server +object, but overlooked a use-after-free of nbd_server->listener. +Although this race is harder to hit, notice that our shutdown path +first drops the reference count of nbd_server->listener, then triggers +actions that can result in a pending client reaching the +nbd_blockdev_client_closed() callback, which in turn calls +qio_net_listener_set_client_func on a potentially stale object. + +If we know we don't want any more clients to connect, and have already +told the listener socket to shut down, then we should not be trying to +update the listener socket's associated function. + +Reproducer: + +> #!/usr/bin/python3 +> +> import os +> from threading import Thread +> +> def start_stop(): +> while 1: +> os.system('virsh qemu-monitor-command VM \'{"execute": "nbd-server-start", ++"arguments":{"addr":{"type":"unix","data":{"path":"/tmp/nbd-sock"}}}}\'') +> os.system('virsh qemu-monitor-command VM \'{"execute": "nbd-server-stop"}\'') +> +> def nbd_list(): +> while 1: +> os.system('/path/to/build/qemu-nbd -L -k /tmp/nbd-sock') +> +> def test(): +> sst = Thread(target=start_stop) +> sst.start() +> nlt = Thread(target=nbd_list) +> nlt.start() +> +> sst.join() +> nlt.join() +> +> test() + +Fixes: CVE-2024-7409 +Fixes: 3e7ef738c8 ("nbd/server: CVE-2024-7409: Close stray clients at server-stop") +CC: qemu-stable@nongnu.org +Reported-by: Andrey Drobyshev +Signed-off-by: Eric Blake +Message-ID: <20240822143617.800419-2-eblake@redhat.com> +Reviewed-by: Stefan Hajnoczi + +(cherry picked from commit 3874f5f73c441c52f1c699c848d463b0eda01e4c) +Jira: https://issues.redhat.com/browse/RHEL-52617 +Signed-off-by: Eric Blake +--- + blockdev-nbd.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index f73409ae49..b36f41b7c5 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -92,10 +92,13 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + + static void nbd_update_server_watch(NBDServerData *s) + { +- if (!s->max_connections || s->connections < s->max_connections) { +- qio_net_listener_set_client_func(s->listener, nbd_accept, NULL, NULL); +- } else { +- qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL); ++ if (s->listener) { ++ if (!s->max_connections || s->connections < s->max_connections) { ++ qio_net_listener_set_client_func(s->listener, nbd_accept, NULL, ++ NULL); ++ } else { ++ qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL); ++ } + } + } + +@@ -113,6 +116,7 @@ static void nbd_server_free(NBDServerData *server) + */ + qio_net_listener_disconnect(server->listener); + object_unref(OBJECT(server->listener)); ++ server->listener = NULL; + QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) { + qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH, + NULL); +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch b/SOURCES/kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch index 7098ffc..5459a51 100644 --- a/SOURCES/kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch +++ b/SOURCES/kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch @@ -1,17 +1,17 @@ -From 3311ddaffa78ad8d2c40c86cd69461ebeabe1052 Mon Sep 17 00:00:00 2001 +From f76d73f62555ad73081558c1f56bcb832fbb8c35 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Tue, 6 Aug 2024 13:53:00 -0500 -Subject: [PATCH 3/5] nbd/server: CVE-2024-7409: Cap default max-connections to - 100 +Subject: [PATCH 098/100] nbd/server: CVE-2024-7409: Cap default + max-connections to 100 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Eric Blake -RH-MergeRequest: 386: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) [rhel-9.4.z] -RH-Jira: RHEL-52616 +RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) +RH-Jira: RHEL-52617 RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/4] 17c9c81282e180485e2f2d584a3e0c73a6fbd66a (ebblake/qemu-kvm) +RH-Commit: [2/4] 1fb3b8cd9781a66bba2f4a6bee2b320e96de86aa (redhat/centos-stream/src/qemu-kvm) Allowing an unlimited number of clients to any web service is a recipe for a rudimentary denial of service attack: the client merely needs to @@ -90,7 +90,7 @@ break corner-case back-compat behavior without a deprecation period] Signed-off-by: Eric Blake (cherry picked from commit c8a76dbd90c2f48df89b75bef74917f90a59b623) -Jira: https://issues.redhat.com/browse/RHEL-52616 +Jira: https://issues.redhat.com/browse/RHEL-52617 Signed-off-by: Eric Blake --- block/monitor/block-hmp-cmds.c | 3 ++- @@ -100,7 +100,7 @@ Signed-off-by: Eric Blake 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c -index bdbb5cb141..2d7b4d80c8 100644 +index d954bec6f1..bdf2eb50b6 100644 --- a/block/monitor/block-hmp-cmds.c +++ b/block/monitor/block-hmp-cmds.c @@ -402,7 +402,8 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict) @@ -158,7 +158,7 @@ index 1d4d65922d..d4f8b21aec 100644 typedef struct NBDOption { diff --git a/qapi/block-export.json b/qapi/block-export.json -index 7874a49ba7..1d255d77e3 100644 +index 3919a2d5b9..f45e4fd481 100644 --- a/qapi/block-export.json +++ b/qapi/block-export.json @@ -28,7 +28,7 @@ @@ -177,8 +177,8 @@ index 7874a49ba7..1d255d77e3 100644 -# default: 0). +# default: 100). # - # Returns: error if the server is already running. - # + # Errors: + # - if the server is already running -- 2.39.3 diff --git a/SOURCES/kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch b/SOURCES/kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch index 834eae3..2ba16e5 100644 --- a/SOURCES/kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch +++ b/SOURCES/kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch @@ -1,17 +1,17 @@ -From 1c2fe81349a99cf0c28549426c7ad0c06d942ae3 Mon Sep 17 00:00:00 2001 +From 6522c68268f00c9c5665f8f98cf6ed1984124cf3 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Wed, 7 Aug 2024 12:23:13 -0500 -Subject: [PATCH 5/5] nbd/server: CVE-2024-7409: Close stray clients at +Subject: [PATCH 100/100] nbd/server: CVE-2024-7409: Close stray clients at server-stop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Eric Blake -RH-MergeRequest: 386: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) [rhel-9.4.z] -RH-Jira: RHEL-52616 +RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) +RH-Jira: RHEL-52617 RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/4] 54d56c0d92d92b2b6a0f81a01853881286beae0e (ebblake/qemu-kvm) +RH-Commit: [4/4] c00bb5a7e73446e9f071ef83e4f1576f73a17059 (redhat/centos-stream/src/qemu-kvm) A malicious client can attempt to connect to an NBD server, and then intentionally delay progress in the handshake, including if it does @@ -76,7 +76,7 @@ Message-ID: <20240807174943.771624-14-eblake@redhat.com> Reviewed-by: Daniel P. Berrangé (cherry picked from commit 3e7ef738c8462c45043a1d39f702a0990406a3b3) -Jira: https://issues.redhat.com/browse/RHEL-52616 +Jira: https://issues.redhat.com/browse/RHEL-52617 Signed-off-by: Eric Blake --- blockdev-nbd.c | 35 ++++++++++++++++++++++++++++++++++- diff --git a/SOURCES/kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch b/SOURCES/kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch index 0b2261e..e1755c2 100644 --- a/SOURCES/kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch +++ b/SOURCES/kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch @@ -1,16 +1,17 @@ -From 71c525e45f3f2a421ad651bc50eff94be925b36c Mon Sep 17 00:00:00 2001 +From ca30846351f1136d15f55717a5534ad927f7cf52 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 8 Aug 2024 16:05:08 -0500 -Subject: [PATCH 4/5] nbd/server: CVE-2024-7409: Drop non-negotiating clients +Subject: [PATCH 099/100] nbd/server: CVE-2024-7409: Drop non-negotiating + clients MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Eric Blake -RH-MergeRequest: 386: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) [rhel-9.4.z] -RH-Jira: RHEL-52616 +RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) +RH-Jira: RHEL-52617 RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/4] 4394e60a8733cd57ee6c8cb140171c7f61cc13a3 (ebblake/qemu-kvm) +RH-Commit: [3/4] 8008a1067766951d9752bcc41c2127a07fce934d (redhat/centos-stream/src/qemu-kvm) A client that opens a socket but does not negotiate is merely hogging qemu's resources (an open fd and a small amount of memory); and a @@ -56,7 +57,7 @@ Reviewed-by: Daniel P. Berrangé Signed-off-by: Eric Blake (cherry picked from commit b9b72cb3ce15b693148bd09cef7e50110566d8a0) -Jira: https://issues.redhat.com/browse/RHEL-52616 +Jira: https://issues.redhat.com/browse/RHEL-52617 Signed-off-by: Eric Blake --- nbd/server.c | 28 +++++++++++++++++++++++++++- diff --git a/SOURCES/kvm-nbd-server-Fix-race-in-draining-the-export.patch b/SOURCES/kvm-nbd-server-Fix-race-in-draining-the-export.patch deleted file mode 100644 index 7298992..0000000 --- a/SOURCES/kvm-nbd-server-Fix-race-in-draining-the-export.patch +++ /dev/null @@ -1,95 +0,0 @@ -From dc4dd11233522d8195782a2196aaae44bd924575 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 14 Mar 2024 17:58:24 +0100 -Subject: [PATCH 2/4] nbd/server: Fix race in draining the export - -RH-Author: Kevin Wolf -RH-MergeRequest: 231: Fix deadlock and crash during storage migration -RH-Jira: RHEL-28125 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [2/3] 036e1b171986099904dee468c59d77437e3d3d61 (kmwolf/centos-qemu-kvm) - -When draining an NBD export, nbd_drained_begin() first sets -client->quiescing so that nbd_client_receive_next_request() won't start -any new request coroutines. Then nbd_drained_poll() tries to makes sure -that we wait for any existing request coroutines by checking that -client->nb_requests has become 0. - -However, there is a small window between creating a new request -coroutine and increasing client->nb_requests. If a coroutine is in this -state, it won't be waited for and drain returns too early. - -In the context of switching to a different AioContext, this means that -blk_aio_attached() will see client->recv_coroutine != NULL and fail its -assertion. - -Fix this by increasing client->nb_requests immediately when starting the -coroutine. Doing this after the checks if we should create a new -coroutine is okay because client->lock is held. - -Cc: qemu-stable@nongnu.org -Fixes: fd6afc501a01 ("nbd/server: Use drained block ops to quiesce the server") -Signed-off-by: Kevin Wolf -Message-ID: <20240314165825.40261-2-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 9c707525cbb1dd1e56876e45c70c0c08f2876d41) -Signed-off-by: Kevin Wolf ---- - nbd/server.c | 15 +++++++-------- - 1 file changed, 7 insertions(+), 8 deletions(-) - -diff --git a/nbd/server.c b/nbd/server.c -index 941832f178..c3484cc1eb 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -3007,8 +3007,8 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, - /* Owns a reference to the NBDClient passed as opaque. */ - static coroutine_fn void nbd_trip(void *opaque) - { -- NBDClient *client = opaque; -- NBDRequestData *req = NULL; -+ NBDRequestData *req = opaque; -+ NBDClient *client = req->client; - NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ - int ret; - Error *local_err = NULL; -@@ -3037,8 +3037,6 @@ static coroutine_fn void nbd_trip(void *opaque) - goto done; - } - -- req = nbd_request_get(client); -- - /* - * nbd_co_receive_request() returns -EAGAIN when nbd_drained_begin() has - * set client->quiescing but by the time we get back nbd_drained_end() may -@@ -3112,9 +3110,7 @@ static coroutine_fn void nbd_trip(void *opaque) - } - - done: -- if (req) { -- nbd_request_put(req); -- } -+ nbd_request_put(req); - - qemu_mutex_unlock(&client->lock); - -@@ -3143,10 +3139,13 @@ disconnect: - */ - static void nbd_client_receive_next_request(NBDClient *client) - { -+ NBDRequestData *req; -+ - if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS && - !client->quiescing) { - nbd_client_get(client); -- client->recv_coroutine = qemu_coroutine_create(nbd_trip, client); -+ req = nbd_request_get(client); -+ client->recv_coroutine = qemu_coroutine_create(nbd_trip, req); - aio_co_schedule(client->exp->common.ctx, client->recv_coroutine); - } - } --- -2.39.3 - diff --git a/SOURCES/kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch b/SOURCES/kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch index 28b0064..7614df8 100644 --- a/SOURCES/kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch +++ b/SOURCES/kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch @@ -1,14 +1,14 @@ -From d1dd79b558fb9b23ae14165ec8edf0085e927091 Mon Sep 17 00:00:00 2001 +From af6f51ad3482513e3ac047eb203f9dc623d47088 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 8 Apr 2024 11:00:44 -0500 -Subject: [PATCH 2/4] nbd/server: Mark negotiation functions as coroutine_fn +Subject: [PATCH 2/2] nbd/server: Mark negotiation functions as coroutine_fn RH-Author: Eric Blake -RH-MergeRequest: 375: Fix regression on nbd+tls -RH-Jira: RHEL-33754 -RH-Acked-by: Kevin Wolf +RH-MergeRequest: 239: avoid destination hang on NBD+TLS storage migration +RH-Jira: RHEL-33440 +RH-Acked-by: Stefan Hajnoczi RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/4] 6dc8ecca16df5ae5cc6dc36e7b96f991396fcf24 (ebblake/qemu-kvm) +RH-Commit: [2/2] acf61b854f80365993d24bcd1d110ed3518b22fa (ebblake/centos-qemu-kvm) nbd_negotiate() is already marked coroutine_fn. And given the fix in the previous patch to have nbd_negotiate_handle_starttls not create @@ -23,8 +23,9 @@ Message-ID: <20240408160214.1200629-6-eblake@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy [eblake: drop one spurious coroutine_fn marking] Signed-off-by: Eric Blake + +Jira: https://issues.redhat.com/browse/RHEL-33440 (cherry picked from commit 4fa333e08dd96395a99ea8dd9e4c73a29dd23344) -Jira: https://issues.redhat.com/browse/RHEL-33754 Signed-off-by: Eric Blake --- nbd/server.c | 102 +++++++++++++++++++++++++++++---------------------- diff --git a/SOURCES/kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch b/SOURCES/kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch index 85a9986..6b4c670 100644 --- a/SOURCES/kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch +++ b/SOURCES/kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch @@ -1,16 +1,16 @@ -From c8c7dc7a445892a820223be2e7617a790e0400f5 Mon Sep 17 00:00:00 2001 +From 70acef52a99e5114699f5fa58de5f0b5c031b880 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Wed, 7 Aug 2024 08:50:01 -0500 -Subject: [PATCH 2/5] nbd/server: Plumb in new args to nbd_client_add() +Subject: [PATCH 097/100] nbd/server: Plumb in new args to nbd_client_add() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Eric Blake -RH-MergeRequest: 386: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) [rhel-9.4.z] -RH-Jira: RHEL-52616 +RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) +RH-Jira: RHEL-52617 RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/4] 658d80c80a1163fcad456a34ec20d7828273a36c (ebblake/qemu-kvm) +RH-Commit: [1/4] 7614e294e1f5b7861386950ae994bea166d19950 (redhat/centos-stream/src/qemu-kvm) Upcoming patches to fix a CVE need to track an opaque pointer passed in by the owner of a client object, as well as request for a time @@ -28,7 +28,7 @@ Reviewed-by: Daniel P. Berrangé Signed-off-by: Eric Blake (cherry picked from commit fb1c2aaa981e0a2fa6362c9985f1296b74f055ac) -Jira: https://issues.redhat.com/browse/RHEL-52616 +Jira: https://issues.redhat.com/browse/RHEL-52617 Signed-off-by: Eric Blake --- blockdev-nbd.c | 6 ++++-- @@ -156,10 +156,10 @@ index 892797bb11..e50012499f 100644 + return client->owner; +} diff --git a/qemu-nbd.c b/qemu-nbd.c -index bac0b5e3ec..01b4a63c31 100644 +index d7b3ccab21..48e2fa5858 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c -@@ -389,7 +389,9 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, +@@ -390,7 +390,9 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, nb_fds++; nbd_update_server_watch(); diff --git a/SOURCES/kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch b/SOURCES/kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch deleted file mode 100644 index 339e234..0000000 --- a/SOURCES/kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch +++ /dev/null @@ -1,53 +0,0 @@ -From cd7788a857a6099206c4063e3ef69cb9e4aebcbc Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 21 Dec 2023 14:24:50 -0500 -Subject: [PATCH 070/101] nbd/server: avoid per-NBDRequest nbd_client_get/put() - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/26] 5acb090ac4adf4260cd9e9c5605a27012b2a33aa (kmwolf/centos-qemu-kvm) - -nbd_trip() processes a single NBD request from start to finish and holds -an NBDClient reference throughout. NBDRequest does not outlive the scope -of nbd_trip(). Therefore it is unnecessary to ref/unref NBDClient for -each NBDRequest. - -Removing these nbd_client_get()/nbd_client_put() calls will make -thread-safety easier in the commits that follow. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Paolo Bonzini -Message-ID: <20231221192452.1785567-5-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - nbd/server.c | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/nbd/server.c b/nbd/server.c -index 895cf0a752..0b09ccc8dc 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -1557,7 +1557,6 @@ static NBDRequestData *nbd_request_get(NBDClient *client) - client->nb_requests++; - - req = g_new0(NBDRequestData, 1); -- nbd_client_get(client); - req->client = client; - return req; - } -@@ -1578,8 +1577,6 @@ static void nbd_request_put(NBDRequestData *req) - } - - nbd_client_receive_next_request(client); -- -- nbd_client_put(client); - } - - static void blk_aio_attached(AioContext *ctx, void *opaque) --- -2.39.3 - diff --git a/SOURCES/kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch b/SOURCES/kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch index 8693400..9f65e99 100644 --- a/SOURCES/kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch +++ b/SOURCES/kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch @@ -1,14 +1,14 @@ -From ef01aeba9f6c4c886719261333a04bc32484f0d0 Mon Sep 17 00:00:00 2001 +From ae9ebfc4ebd6f146951f04cf9e12eeaaf4c2387e Mon Sep 17 00:00:00 2001 From: Zhu Yangyang Date: Mon, 8 Apr 2024 11:00:43 -0500 -Subject: [PATCH 1/4] nbd/server: do not poll within a coroutine context +Subject: [PATCH 1/2] nbd/server: do not poll within a coroutine context RH-Author: Eric Blake -RH-MergeRequest: 375: Fix regression on nbd+tls -RH-Jira: RHEL-33754 -RH-Acked-by: Kevin Wolf +RH-MergeRequest: 239: avoid destination hang on NBD+TLS storage migration +RH-Jira: RHEL-33440 +RH-Acked-by: Stefan Hajnoczi RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/4] 88ca2c90e0c412ba8c1cfd7ea91e01ec58220e9b (ebblake/qemu-kvm) +RH-Commit: [1/2] 27963e92aacca8ed43994113b645f472fba8f8bc (ebblake/centos-qemu-kvm) Coroutines are not supposed to block. Instead, they should yield. @@ -28,8 +28,9 @@ Signed-off-by: Zhu Yangyang Signed-off-by: Eric Blake Message-ID: <20240408160214.1200629-5-eblake@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy + +Jira: https://issues.redhat.com/browse/RHEL-33440 (cherry picked from commit ae6d91a7e9b77abb029ed3fa9fad461422286942) -Jira: https://issues.redhat.com/browse/RHEL-33754 Signed-off-by: Eric Blake --- nbd/client.c | 28 ++++++++++++++++++++++++---- diff --git a/SOURCES/kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch b/SOURCES/kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch deleted file mode 100644 index e0d763d..0000000 --- a/SOURCES/kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch +++ /dev/null @@ -1,373 +0,0 @@ -From bb0a6afff7f23a3ddb460dc1b2e70c06565f8a3f Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 21 Dec 2023 14:24:52 -0500 -Subject: [PATCH 072/101] nbd/server: introduce NBDClient->lock to protect - fields - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [3/26] 49b64adaaf8b1c30f339d1ecc8ea89fb9db63f1c (kmwolf/centos-qemu-kvm) - -NBDClient has a number of fields that are accessed by both the export -AioContext and the main loop thread. When the AioContext lock is removed -these fields will need another form of protection. - -Add NBDClient->lock and protect fields that are accessed by both -threads. Also add assertions where possible and otherwise add doc -comments stating assumptions about which thread and lock holding. - -Note this patch moves the client->recv_coroutine assertion from -nbd_co_receive_request() to nbd_trip() where client->lock is held. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20231221192452.1785567-7-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - nbd/server.c | 144 +++++++++++++++++++++++++++++++++++++++------------ - 1 file changed, 111 insertions(+), 33 deletions(-) - -diff --git a/nbd/server.c b/nbd/server.c -index e91e2e0903..941832f178 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -125,23 +125,25 @@ struct NBDClient { - int refcount; /* atomic */ - void (*close_fn)(NBDClient *client, bool negotiated); - -+ QemuMutex lock; -+ - NBDExport *exp; - QCryptoTLSCreds *tlscreds; - char *tlsauthz; - QIOChannelSocket *sioc; /* The underlying data channel */ - QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ - -- Coroutine *recv_coroutine; -+ Coroutine *recv_coroutine; /* protected by lock */ - - CoMutex send_lock; - Coroutine *send_coroutine; - -- bool read_yielding; -- bool quiescing; -+ bool read_yielding; /* protected by lock */ -+ bool quiescing; /* protected by lock */ - - QTAILQ_ENTRY(NBDClient) next; -- int nb_requests; -- bool closing; -+ int nb_requests; /* protected by lock */ -+ bool closing; /* protected by lock */ - - uint32_t check_align; /* If non-zero, check for aligned client requests */ - -@@ -1415,11 +1417,18 @@ nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp) - - len = qio_channel_readv(client->ioc, &iov, 1, errp); - if (len == QIO_CHANNEL_ERR_BLOCK) { -- client->read_yielding = true; -+ WITH_QEMU_LOCK_GUARD(&client->lock) { -+ client->read_yielding = true; -+ -+ /* Prompt main loop thread to re-run nbd_drained_poll() */ -+ aio_wait_kick(); -+ } - qio_channel_yield(client->ioc, G_IO_IN); -- client->read_yielding = false; -- if (client->quiescing) { -- return -EAGAIN; -+ WITH_QEMU_LOCK_GUARD(&client->lock) { -+ client->read_yielding = false; -+ if (client->quiescing) { -+ return -EAGAIN; -+ } - } - continue; - } else if (len < 0) { -@@ -1528,6 +1537,7 @@ void nbd_client_put(NBDClient *client) - blk_exp_unref(&client->exp->common); - } - g_free(client->contexts.bitmaps); -+ qemu_mutex_destroy(&client->lock); - g_free(client); - } - } -@@ -1561,11 +1571,13 @@ static void client_close(NBDClient *client, bool negotiated) - { - assert(qemu_in_main_thread()); - -- if (client->closing) { -- return; -- } -+ WITH_QEMU_LOCK_GUARD(&client->lock) { -+ if (client->closing) { -+ return; -+ } - -- client->closing = true; -+ client->closing = true; -+ } - - /* Force requests to finish. They will drop their own references, - * then we'll close the socket and free the NBDClient. -@@ -1579,6 +1591,7 @@ static void client_close(NBDClient *client, bool negotiated) - } - } - -+/* Runs in export AioContext with client->lock held */ - static NBDRequestData *nbd_request_get(NBDClient *client) - { - NBDRequestData *req; -@@ -1591,6 +1604,7 @@ static NBDRequestData *nbd_request_get(NBDClient *client) - return req; - } - -+/* Runs in export AioContext with client->lock held */ - static void nbd_request_put(NBDRequestData *req) - { - NBDClient *client = req->client; -@@ -1614,14 +1628,18 @@ static void blk_aio_attached(AioContext *ctx, void *opaque) - NBDExport *exp = opaque; - NBDClient *client; - -+ assert(qemu_in_main_thread()); -+ - trace_nbd_blk_aio_attached(exp->name, ctx); - - exp->common.ctx = ctx; - - QTAILQ_FOREACH(client, &exp->clients, next) { -- assert(client->nb_requests == 0); -- assert(client->recv_coroutine == NULL); -- assert(client->send_coroutine == NULL); -+ WITH_QEMU_LOCK_GUARD(&client->lock) { -+ assert(client->nb_requests == 0); -+ assert(client->recv_coroutine == NULL); -+ assert(client->send_coroutine == NULL); -+ } - } - } - -@@ -1629,6 +1647,8 @@ static void blk_aio_detach(void *opaque) - { - NBDExport *exp = opaque; - -+ assert(qemu_in_main_thread()); -+ - trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); - - exp->common.ctx = NULL; -@@ -1639,8 +1659,12 @@ static void nbd_drained_begin(void *opaque) - NBDExport *exp = opaque; - NBDClient *client; - -+ assert(qemu_in_main_thread()); -+ - QTAILQ_FOREACH(client, &exp->clients, next) { -- client->quiescing = true; -+ WITH_QEMU_LOCK_GUARD(&client->lock) { -+ client->quiescing = true; -+ } - } - } - -@@ -1649,28 +1673,48 @@ static void nbd_drained_end(void *opaque) - NBDExport *exp = opaque; - NBDClient *client; - -+ assert(qemu_in_main_thread()); -+ - QTAILQ_FOREACH(client, &exp->clients, next) { -- client->quiescing = false; -- nbd_client_receive_next_request(client); -+ WITH_QEMU_LOCK_GUARD(&client->lock) { -+ client->quiescing = false; -+ nbd_client_receive_next_request(client); -+ } - } - } - -+/* Runs in export AioContext */ -+static void nbd_wake_read_bh(void *opaque) -+{ -+ NBDClient *client = opaque; -+ qio_channel_wake_read(client->ioc); -+} -+ - static bool nbd_drained_poll(void *opaque) - { - NBDExport *exp = opaque; - NBDClient *client; - -+ assert(qemu_in_main_thread()); -+ - QTAILQ_FOREACH(client, &exp->clients, next) { -- if (client->nb_requests != 0) { -- /* -- * If there's a coroutine waiting for a request on nbd_read_eof() -- * enter it here so we don't depend on the client to wake it up. -- */ -- if (client->recv_coroutine != NULL && client->read_yielding) { -- qio_channel_wake_read(client->ioc); -- } -+ WITH_QEMU_LOCK_GUARD(&client->lock) { -+ if (client->nb_requests != 0) { -+ /* -+ * If there's a coroutine waiting for a request on nbd_read_eof() -+ * enter it here so we don't depend on the client to wake it up. -+ * -+ * Schedule a BH in the export AioContext to avoid missing the -+ * wake up due to the race between qio_channel_wake_read() and -+ * qio_channel_yield(). -+ */ -+ if (client->recv_coroutine != NULL && client->read_yielding) { -+ aio_bh_schedule_oneshot(nbd_export_aio_context(client->exp), -+ nbd_wake_read_bh, client); -+ } - -- return true; -+ return true; -+ } - } - } - -@@ -1681,6 +1725,8 @@ static void nbd_eject_notifier(Notifier *n, void *data) - { - NBDExport *exp = container_of(n, NBDExport, eject_notifier); - -+ assert(qemu_in_main_thread()); -+ - blk_exp_request_shutdown(&exp->common); - } - -@@ -2566,7 +2612,6 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, - int ret; - - g_assert(qemu_in_coroutine()); -- assert(client->recv_coroutine == qemu_coroutine_self()); - ret = nbd_receive_request(client, request, errp); - if (ret < 0) { - return ret; -@@ -2975,6 +3020,9 @@ static coroutine_fn void nbd_trip(void *opaque) - */ - - trace_nbd_trip(); -+ -+ qemu_mutex_lock(&client->lock); -+ - if (client->closing) { - goto done; - } -@@ -2990,7 +3038,21 @@ static coroutine_fn void nbd_trip(void *opaque) - } - - req = nbd_request_get(client); -- ret = nbd_co_receive_request(req, &request, &local_err); -+ -+ /* -+ * nbd_co_receive_request() returns -EAGAIN when nbd_drained_begin() has -+ * set client->quiescing but by the time we get back nbd_drained_end() may -+ * have already cleared client->quiescing. In that case we try again -+ * because nothing else will spawn an nbd_trip() coroutine until we set -+ * client->recv_coroutine = NULL further down. -+ */ -+ do { -+ assert(client->recv_coroutine == qemu_coroutine_self()); -+ qemu_mutex_unlock(&client->lock); -+ ret = nbd_co_receive_request(req, &request, &local_err); -+ qemu_mutex_lock(&client->lock); -+ } while (ret == -EAGAIN && !client->quiescing); -+ - client->recv_coroutine = NULL; - - if (client->closing) { -@@ -3002,15 +3064,16 @@ static coroutine_fn void nbd_trip(void *opaque) - } - - if (ret == -EAGAIN) { -- assert(client->quiescing); - goto done; - } - - nbd_client_receive_next_request(client); -+ - if (ret == -EIO) { - goto disconnect; - } - -+ qemu_mutex_unlock(&client->lock); - qio_channel_set_cork(client->ioc, true); - - if (ret < 0) { -@@ -3030,6 +3093,10 @@ static coroutine_fn void nbd_trip(void *opaque) - g_free(request.contexts->bitmaps); - g_free(request.contexts); - } -+ -+ qio_channel_set_cork(client->ioc, false); -+ qemu_mutex_lock(&client->lock); -+ - if (ret < 0) { - error_prepend(&local_err, "Failed to send reply: "); - goto disconnect; -@@ -3044,11 +3111,13 @@ static coroutine_fn void nbd_trip(void *opaque) - goto disconnect; - } - -- qio_channel_set_cork(client->ioc, false); - done: - if (req) { - nbd_request_put(req); - } -+ -+ qemu_mutex_unlock(&client->lock); -+ - if (!nbd_client_put_nonzero(client)) { - aio_co_reschedule_self(qemu_get_aio_context()); - nbd_client_put(client); -@@ -3059,13 +3128,19 @@ disconnect: - if (local_err) { - error_reportf_err(local_err, "Disconnect client, due to: "); - } -+ - nbd_request_put(req); -+ qemu_mutex_unlock(&client->lock); - - aio_co_reschedule_self(qemu_get_aio_context()); - client_close(client, true); - nbd_client_put(client); - } - -+/* -+ * Runs in export AioContext and main loop thread. Caller must hold -+ * client->lock. -+ */ - static void nbd_client_receive_next_request(NBDClient *client) - { - if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS && -@@ -3091,7 +3166,9 @@ static coroutine_fn void nbd_co_client_start(void *opaque) - return; - } - -- nbd_client_receive_next_request(client); -+ WITH_QEMU_LOCK_GUARD(&client->lock) { -+ nbd_client_receive_next_request(client); -+ } - } - - /* -@@ -3108,6 +3185,7 @@ void nbd_client_new(QIOChannelSocket *sioc, - Coroutine *co; - - client = g_new0(NBDClient, 1); -+ qemu_mutex_init(&client->lock); - client->refcount = 1; - client->tlscreds = tlscreds; - if (tlscreds) { --- -2.39.3 - diff --git a/SOURCES/kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch b/SOURCES/kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch deleted file mode 100644 index 3ca11a9..0000000 --- a/SOURCES/kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch +++ /dev/null @@ -1,176 +0,0 @@ -From 8b60d72532b6511b41d82d591fb4f509314ef15f Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 21 Dec 2023 14:24:51 -0500 -Subject: [PATCH 071/101] nbd/server: only traverse NBDExport->clients from - main loop thread - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [2/26] e7794a3a5c363c7508ee505c4ba03d9ef8862ca9 (kmwolf/centos-qemu-kvm) - -The NBD clients list is currently accessed from both the export -AioContext and the main loop thread. When the AioContext lock is removed -there will be nothing protecting the clients list. - -Adding a lock around the clients list is tricky because NBDClient -structs are refcounted and may be freed from the export AioContext or -the main loop thread. nbd_export_request_shutdown() -> client_close() -> -nbd_client_put() is also tricky because the list lock would be held -while indirectly dropping references to NDBClients. - -A simpler approach is to only allow nbd_client_put() and client_close() -calls from the main loop thread. Then the NBD clients list is only -accessed from the main loop thread and no fancy locking is needed. - -nbd_trip() just needs to reschedule itself in the main loop AioContext -before calling nbd_client_put() and client_close(). This costs more CPU -cycles per NBD request so add nbd_client_put_nonzero() to optimize the -common case where more references to NBDClient remain. - -Note that nbd_client_get() can still be called from either thread, so -make NBDClient->refcount atomic. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20231221192452.1785567-6-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - nbd/server.c | 61 +++++++++++++++++++++++++++++++++++++++++++--------- - 1 file changed, 51 insertions(+), 10 deletions(-) - -diff --git a/nbd/server.c b/nbd/server.c -index 0b09ccc8dc..e91e2e0903 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -122,7 +122,7 @@ struct NBDMetaContexts { - }; - - struct NBDClient { -- int refcount; -+ int refcount; /* atomic */ - void (*close_fn)(NBDClient *client, bool negotiated); - - NBDExport *exp; -@@ -1501,14 +1501,17 @@ static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *reque - - #define MAX_NBD_REQUESTS 16 - -+/* Runs in export AioContext and main loop thread */ - void nbd_client_get(NBDClient *client) - { -- client->refcount++; -+ qatomic_inc(&client->refcount); - } - - void nbd_client_put(NBDClient *client) - { -- if (--client->refcount == 0) { -+ assert(qemu_in_main_thread()); -+ -+ if (qatomic_fetch_dec(&client->refcount) == 1) { - /* The last reference should be dropped by client->close, - * which is called by client_close. - */ -@@ -1529,8 +1532,35 @@ void nbd_client_put(NBDClient *client) - } - } - -+/* -+ * Tries to release the reference to @client, but only if other references -+ * remain. This is an optimization for the common case where we want to avoid -+ * the expense of scheduling nbd_client_put() in the main loop thread. -+ * -+ * Returns true upon success or false if the reference was not released because -+ * it is the last reference. -+ */ -+static bool nbd_client_put_nonzero(NBDClient *client) -+{ -+ int old = qatomic_read(&client->refcount); -+ int expected; -+ -+ do { -+ if (old == 1) { -+ return false; -+ } -+ -+ expected = old; -+ old = qatomic_cmpxchg(&client->refcount, expected, expected - 1); -+ } while (old != expected); -+ -+ return true; -+} -+ - static void client_close(NBDClient *client, bool negotiated) - { -+ assert(qemu_in_main_thread()); -+ - if (client->closing) { - return; - } -@@ -2933,15 +2963,20 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, - static coroutine_fn void nbd_trip(void *opaque) - { - NBDClient *client = opaque; -- NBDRequestData *req; -+ NBDRequestData *req = NULL; - NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ - int ret; - Error *local_err = NULL; - -+ /* -+ * Note that nbd_client_put() and client_close() must be called from the -+ * main loop thread. Use aio_co_reschedule_self() to switch AioContext -+ * before calling these functions. -+ */ -+ - trace_nbd_trip(); - if (client->closing) { -- nbd_client_put(client); -- return; -+ goto done; - } - - if (client->quiescing) { -@@ -2949,10 +2984,9 @@ static coroutine_fn void nbd_trip(void *opaque) - * We're switching between AIO contexts. Don't attempt to receive a new - * request and kick the main context which may be waiting for us. - */ -- nbd_client_put(client); - client->recv_coroutine = NULL; - aio_wait_kick(); -- return; -+ goto done; - } - - req = nbd_request_get(client); -@@ -3012,8 +3046,13 @@ static coroutine_fn void nbd_trip(void *opaque) - - qio_channel_set_cork(client->ioc, false); - done: -- nbd_request_put(req); -- nbd_client_put(client); -+ if (req) { -+ nbd_request_put(req); -+ } -+ if (!nbd_client_put_nonzero(client)) { -+ aio_co_reschedule_self(qemu_get_aio_context()); -+ nbd_client_put(client); -+ } - return; - - disconnect: -@@ -3021,6 +3060,8 @@ disconnect: - error_reportf_err(local_err, "Disconnect client, due to: "); - } - nbd_request_put(req); -+ -+ aio_co_reschedule_self(qemu_get_aio_context()); - client_close(client, true); - nbd_client_put(client); - } --- -2.39.3 - diff --git a/SOURCES/kvm-pc-q35-set-SMBIOS-entry-point-type-to-auto-by-defaul.patch b/SOURCES/kvm-pc-q35-set-SMBIOS-entry-point-type-to-auto-by-defaul.patch deleted file mode 100644 index f37f65f..0000000 --- a/SOURCES/kvm-pc-q35-set-SMBIOS-entry-point-type-to-auto-by-defaul.patch +++ /dev/null @@ -1,115 +0,0 @@ -From 9ca64f73238d9f1b9f13d8e941ba42771a992afb Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Fri, 29 Dec 2023 14:06:05 +0100 -Subject: [PATCH 20/20] pc/q35: set SMBIOS entry point type to 'auto' by - default - -RH-Author: Igor Mammedov -RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS -RH-Jira: RHEL-21705 -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [18/18] c7fc6ac7350bca3ff99e58620710a86218385781 - -JIRA: https://issues.redhat.com/browse/RHEL-21705 - - Use smbios-entry-point-type='auto' for newer machine types as a workaround - for Windows not detecting SMBIOS tables. Which makes QEMU pick SMBIOS tables - based on configuration (with 2.x preferred and fallback to 3.x if the former - isn't compatible with configuration) - - Default compat setting of smbios-entry-point-type after series - for pc/q35 machines: - * 9.0-newer: 'auto' - * 8.1-8.2: '64' - * 8.0-older: '32' - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2008 - Signed-off-by: Igor Mammedov - Reviewed-by: Ani Sinha - Tested-by: Fiona Ebner - -Conflicts: hw/i386/pc_piix.c hw/i386/pc_q35.c - due to RHEL machine types - -REHL only parts: - Fix RHEL 'pc' machine types at SMBIOS 2.X - for the latest RHEL 'q35' machine type use version autoselect - which propagates to RHEL 9.4 q35 macine type while RHEL 9.2 q35 and older - are kept at SMBIOS_ENTRY_POINT_TYPE_32 (see: pc_q35_machine_rhel920_options) - -Signed-off-by: Igor Mammedov ---- - hw/i386/pc.c | 2 +- - hw/i386/pc_piix.c | 7 +++++++ - hw/i386/pc_q35.c | 5 +++++ - 3 files changed, 13 insertions(+), 1 deletion(-) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index ae6777fc1a..d6f267b220 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -2004,7 +2004,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) - mc->nvdimm_supported = true; - mc->smp_props.dies_supported = true; - mc->default_ram_id = "pc.ram"; -- pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64; -+ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_AUTO; - - object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size", - pc_machine_get_max_ram_below_4g, pc_machine_set_max_ram_below_4g, -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 7344b35cf1..54d1c58bce 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -539,9 +539,14 @@ static void pc_i440fx_machine_options(MachineClass *m) - - static void pc_i440fx_8_2_machine_options(MachineClass *m) - { -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ - pc_i440fx_machine_options(m); - m->alias = "pc"; - m->is_default = true; -+ -+ /* For pc-i44fx-8.2 and 8.1, use SMBIOS 3.X by default */ -+ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64; - } - - DEFINE_I440FX_MACHINE(v8_2, "pc-i440fx-8.2", NULL, -@@ -982,6 +987,8 @@ static void pc_machine_rhel7_options(MachineClass *m) - m->alias = "pc"; - m->is_default = 1; - m->smp_props.prefer_sockets = true; -+ /* there aren't ne PC macine types in RHEL9, keep it at SMBIOS 2.X */ -+ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; - } - - static void pc_init_rhel760(MachineState *machine) -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 9a22ff5dd6..cd5fb7380e 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -377,8 +377,11 @@ static void pc_q35_machine_options(MachineClass *m) - - static void pc_q35_8_2_machine_options(MachineClass *m) - { -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_q35_machine_options(m); - m->alias = "q35"; -+ /* For pc-q35-8.2 and 8.1, use SMBIOS 3.X by default */ -+ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64; - } - - DEFINE_Q35_MACHINE(v8_2, "pc-q35-8.2", NULL, -@@ -712,6 +715,8 @@ static void pc_q35_machine_rhel_options(MachineClass *m) - m->alias = "q35"; - m->max_cpus = 710; - compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); -+ /* use SMBIOS version autoselect by default for the latest RHEL machine */ -+ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_AUTO; - } - - static void pc_q35_init_rhel940(MachineState *machine) --- -2.39.3 - diff --git a/SOURCES/kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch b/SOURCES/kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch deleted file mode 100644 index 9d3fd29..0000000 --- a/SOURCES/kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 03cad16743d9a4d377af66611d030eca9eda326d Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Tue, 19 Mar 2024 11:42:04 +0100 -Subject: [PATCH 4/4] pc: smbios: fixup manufacturer/product/version to match - downstream - -RH-Author: Igor Mammedov -RH-MergeRequest: 232: pc: smbios: fixup manufacturer/product/version to match downstream -RH-Jira: RHEL-21705 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Ani Sinha -RH-Commit: [1/1] 9235f64acb5ff46360282e889d0aedcb13374ac1 - -JIRA: https://issues.redhat.com/browse/RHEL-21705 - -commit [1] discarded RHEL only change that customizes -SMBIOS values for manufacturer/product/version for pc/q35 -machine types. -Fix it up by reverting back to ("Red Hat", "KVM", mc->desc) -tuple. - -1) -Fixes: 208239eb2 (hw/i386/pc: Defer smbios_set_defaults() to machine_done) -Signed-off-by: Igor Mammedov ---- - hw/i386/fw_cfg.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c -index 58429bb78d..d6a24177e2 100644 ---- a/hw/i386/fw_cfg.c -+++ b/hw/i386/fw_cfg.c -@@ -63,7 +63,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, - - if (pcmc->smbios_defaults) { - /* These values are guest ABI, do not change */ -- smbios_set_defaults("QEMU", mc->desc, mc->name, -+ smbios_set_defaults("Red Hat", "KVM", mc->desc, - pcmc->smbios_uuid_encoded, - pcmc->smbios_stream_product, - pcmc->smbios_stream_version); --- -2.39.3 - diff --git a/SOURCES/kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch b/SOURCES/kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch new file mode 100644 index 0000000..2c21dde --- /dev/null +++ b/SOURCES/kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch @@ -0,0 +1,68 @@ +From c0a65c752cd83dea27cbeb34074d65fb2c5a6b59 Mon Sep 17 00:00:00 2001 +From: Isaku Yamahata +Date: Wed, 20 Mar 2024 03:39:13 -0500 +Subject: [PATCH 008/100] pci-host/q35: Move PAM initialization above SMRAM + initialization + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [8/91] 22a9221d4726e872aa0f0dc25ae9d823c0611547 (bonzini/rhel-qemu-kvm) + +In mch_realize(), process PAM initialization before SMRAM initialization so +that later patch can skill all the SMRAM related with a single check. + +Signed-off-by: Isaku Yamahata +Signed-off-by: Xiaoyao Li +Signed-off-by: Michael Roth +Message-ID: <20240320083945.991426-18-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 42c11ae2416dcbcd694ec3ee574fe2f3e70099ae) +Signed-off-by: Paolo Bonzini +--- + hw/pci-host/q35.c | 19 ++++++++++--------- + 1 file changed, 10 insertions(+), 9 deletions(-) + +diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c +index 0d7d4e3f08..98d4a7c253 100644 +--- a/hw/pci-host/q35.c ++++ b/hw/pci-host/q35.c +@@ -568,6 +568,16 @@ static void mch_realize(PCIDevice *d, Error **errp) + /* setup pci memory mapping */ + pc_pci_as_mapping_init(mch->system_memory, mch->pci_address_space); + ++ /* PAM */ ++ init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory, ++ mch->system_memory, mch->pci_address_space, ++ PAM_BIOS_BASE, PAM_BIOS_SIZE); ++ for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) { ++ init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory, ++ mch->system_memory, mch->pci_address_space, ++ PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); ++ } ++ + /* if *disabled* show SMRAM to all CPUs */ + memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region", + mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE, +@@ -634,15 +644,6 @@ static void mch_realize(PCIDevice *d, Error **errp) + + object_property_add_const_link(qdev_get_machine(), "smram", + OBJECT(&mch->smram)); +- +- init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory, +- mch->system_memory, mch->pci_address_space, +- PAM_BIOS_BASE, PAM_BIOS_SIZE); +- for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) { +- init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory, +- mch->system_memory, mch->pci_address_space, +- PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); +- } + } + + uint64_t mch_mcfg_base(void) +-- +2.39.3 + diff --git a/SOURCES/kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch b/SOURCES/kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch new file mode 100644 index 0000000..66e0423 --- /dev/null +++ b/SOURCES/kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch @@ -0,0 +1,83 @@ +From c70f6e7e3461e6562c0591079cc71068bf0f2ed8 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:07 -0500 +Subject: [PATCH 033/100] physmem: Introduce + ram_block_discard_guest_memfd_range() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [33/91] b6169fa8d752d83977b18897be24f6ab9f3d3472 (bonzini/rhel-qemu-kvm) + +When memory page is converted from private to shared, the original +private memory is back'ed by guest_memfd. Introduce +ram_block_discard_guest_memfd_range() for discarding memory in +guest_memfd. + +Based on a patch by Isaku Yamahata . + +Signed-off-by: Xiaoyao Li +Reviewed-by: David Hildenbrand +Signed-off-by: Michael Roth +Message-ID: <20240320083945.991426-12-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit b2e9426c04fdd32d93a3a37db6b0c2e67c88c335) +Signed-off-by: Paolo Bonzini +--- + include/exec/cpu-common.h | 2 ++ + system/physmem.c | 23 +++++++++++++++++++++++ + 2 files changed, 25 insertions(+) + +diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h +index 6346df17ce..6d5318895a 100644 +--- a/include/exec/cpu-common.h ++++ b/include/exec/cpu-common.h +@@ -159,6 +159,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); + + int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); + int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length); ++int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, ++ size_t length); + + #endif + +diff --git a/system/physmem.c b/system/physmem.c +index 5ebcf5be11..c3d04ca921 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -3721,6 +3721,29 @@ err: + return ret; + } + ++int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, ++ size_t length) ++{ ++ int ret = -1; ++ ++#ifdef CONFIG_FALLOCATE_PUNCH_HOLE ++ ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, ++ start, length); ++ ++ if (ret) { ++ ret = -errno; ++ error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)", ++ __func__, rb->idstr, start, length, ret); ++ } ++#else ++ ret = -ENOSYS; ++ error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)", ++ __func__, rb->idstr, start, length, ret); ++#endif ++ ++ return ret; ++} ++ + bool ramblock_is_pmem(RAMBlock *rb) + { + return rb->flags & RAM_PMEM; +-- +2.39.3 + diff --git a/SOURCES/kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch b/SOURCES/kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch new file mode 100644 index 0000000..037442c --- /dev/null +++ b/SOURCES/kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch @@ -0,0 +1,140 @@ +From cfb109b393e019398a52f66a5ff0e9581c841335 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:00:37 -0500 +Subject: [PATCH 013/100] ppc/pef: switch to use + confidential_guest_kvm_init/reset() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [13/91] e25c498fc79a4c8e22ca41d9cbd06e40b4cf1f11 (bonzini/rhel-qemu-kvm) + +Use the unified interface to call confidential guest related kvm_init() +and kvm_reset(), to avoid exposing pef specific functions. + +As a bonus, pef.h goes away since there is no direct call from sPAPR +board code to PEF code anymore. + +Signed-off-by: Xiaoyao Li +Signed-off-by: Paolo Bonzini +(cherry picked from commit 00a238b1a845fd5f0acd771664c5e184a63ed9b6) +Signed-off-by: Paolo Bonzini +--- + hw/ppc/pef.c | 9 ++++++--- + hw/ppc/spapr.c | 10 +++++++--- + include/hw/ppc/pef.h | 17 ----------------- + 3 files changed, 13 insertions(+), 23 deletions(-) + delete mode 100644 include/hw/ppc/pef.h + +diff --git a/hw/ppc/pef.c b/hw/ppc/pef.c +index d28ed3ba73..47553348b1 100644 +--- a/hw/ppc/pef.c ++++ b/hw/ppc/pef.c +@@ -15,7 +15,6 @@ + #include "sysemu/kvm.h" + #include "migration/blocker.h" + #include "exec/confidential-guest-support.h" +-#include "hw/ppc/pef.h" + + #define TYPE_PEF_GUEST "pef-guest" + OBJECT_DECLARE_SIMPLE_TYPE(PefGuest, PEF_GUEST) +@@ -93,7 +92,7 @@ static int kvmppc_svm_off(Error **errp) + #endif + } + +-int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++static int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) { + return 0; +@@ -107,7 +106,7 @@ int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + return kvmppc_svm_init(cgs, errp); + } + +-int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp) ++static int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp) + { + if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) { + return 0; +@@ -131,6 +130,10 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(PefGuest, + + static void pef_guest_class_init(ObjectClass *oc, void *data) + { ++ ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); ++ ++ klass->kvm_init = pef_kvm_init; ++ klass->kvm_reset = pef_kvm_reset; + } + + static void pef_guest_init(Object *obj) +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index a258d81846..6f6f0fd790 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -75,6 +75,7 @@ + #include "hw/virtio/vhost-scsi-common.h" + + #include "exec/ram_addr.h" ++#include "exec/confidential-guest-support.h" + #include "hw/usb.h" + #include "qemu/config-file.h" + #include "qemu/error-report.h" +@@ -87,7 +88,6 @@ + #include "hw/ppc/spapr_tpm_proxy.h" + #include "hw/ppc/spapr_nvdimm.h" + #include "hw/ppc/spapr_numa.h" +-#include "hw/ppc/pef.h" + + #include "monitor/monitor.h" + +@@ -1715,7 +1715,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) + qemu_guest_getrandom_nofail(spapr->fdt_rng_seed, 32); + } + +- pef_kvm_reset(machine->cgs, &error_fatal); ++ if (machine->cgs) { ++ confidential_guest_kvm_reset(machine->cgs, &error_fatal); ++ } + spapr_caps_apply(spapr); + spapr_nested_reset(spapr); + if (spapr->svm_allowed) { +@@ -2844,7 +2846,9 @@ static void spapr_machine_init(MachineState *machine) + /* + * if Secure VM (PEF) support is configured, then initialize it + */ +- pef_kvm_init(machine->cgs, &error_fatal); ++ if (machine->cgs) { ++ confidential_guest_kvm_init(machine->cgs, &error_fatal); ++ } + + msi_nonbroken = true; + +diff --git a/include/hw/ppc/pef.h b/include/hw/ppc/pef.h +deleted file mode 100644 +index 707dbe524c..0000000000 +--- a/include/hw/ppc/pef.h ++++ /dev/null +@@ -1,17 +0,0 @@ +-/* +- * PEF (Protected Execution Facility) for POWER support +- * +- * Copyright Red Hat. +- * +- * This work is licensed under the terms of the GNU GPL, version 2 or later. +- * See the COPYING file in the top-level directory. +- * +- */ +- +-#ifndef HW_PPC_PEF_H +-#define HW_PPC_PEF_H +- +-int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +-int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp); +- +-#endif /* HW_PPC_PEF_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch b/SOURCES/kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch new file mode 100644 index 0000000..c7f121b --- /dev/null +++ b/SOURCES/kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch @@ -0,0 +1,164 @@ +From 83bb32c25472b500738a54ac8f2ad0f5c496acf1 Mon Sep 17 00:00:00 2001 +From: Isaku Yamahata +Date: Wed, 20 Mar 2024 03:39:14 -0500 +Subject: [PATCH 009/100] q35: Introduce smm_ranges property for q35-pci-host + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [9/91] 931156772bfc2085e7241eecc56cf6eca3dac1fd (bonzini/rhel-qemu-kvm) + +Add a q35 property to check whether or not SMM ranges, e.g. SMRAM, TSEG, +etc... exist for the target platform. TDX doesn't support SMM and doesn't +play nice with QEMU modifying related guest memory ranges. + +Signed-off-by: Isaku Yamahata +Co-developed-by: Sean Christopherson +Signed-off-by: Sean Christopherson +Signed-off-by: Xiaoyao Li +Signed-off-by: Michael Roth +Message-ID: <20240320083945.991426-19-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit b07bf7b73fd02d24a7baa64a580f4974b86bbc86) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_q35.c | 2 ++ + hw/pci-host/q35.c | 42 +++++++++++++++++++++++++++------------ + include/hw/i386/pc.h | 1 + + include/hw/pci-host/q35.h | 1 + + 4 files changed, 33 insertions(+), 13 deletions(-) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 9adcdadce8..dedc86eec9 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -219,6 +219,8 @@ static void pc_q35_init(MachineState *machine) + x86ms->above_4g_mem_size, NULL); + object_property_set_bool(phb, PCI_HOST_BYPASS_IOMMU, + pcms->default_bus_bypass_iommu, NULL); ++ object_property_set_bool(phb, PCI_HOST_PROP_SMM_RANGES, ++ x86_machine_is_smm_enabled(x86ms), NULL); + sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal); + + /* pci */ +diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c +index 98d4a7c253..0b6cbaed7e 100644 +--- a/hw/pci-host/q35.c ++++ b/hw/pci-host/q35.c +@@ -179,6 +179,8 @@ static Property q35_host_props[] = { + mch.below_4g_mem_size, 0), + DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost, + mch.above_4g_mem_size, 0), ++ DEFINE_PROP_BOOL(PCI_HOST_PROP_SMM_RANGES, Q35PCIHost, ++ mch.has_smm_ranges, true), + DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true), + DEFINE_PROP_END_OF_LIST(), + }; +@@ -214,6 +216,7 @@ static void q35_host_initfn(Object *obj) + /* mch's object_initialize resets the default value, set it again */ + qdev_prop_set_uint64(DEVICE(s), PCI_HOST_PROP_PCI_HOLE64_SIZE, + Q35_PCI_HOST_HOLE64_SIZE_DEFAULT); ++ + object_property_add(obj, PCI_HOST_PROP_PCI_HOLE_START, "uint32", + q35_host_get_pci_hole_start, + NULL, NULL, NULL); +@@ -476,6 +479,10 @@ static void mch_write_config(PCIDevice *d, + mch_update_pciexbar(mch); + } + ++ if (!mch->has_smm_ranges) { ++ return; ++ } ++ + if (ranges_overlap(address, len, MCH_HOST_BRIDGE_SMRAM, + MCH_HOST_BRIDGE_SMRAM_SIZE)) { + mch_update_smram(mch); +@@ -494,10 +501,13 @@ static void mch_write_config(PCIDevice *d, + static void mch_update(MCHPCIState *mch) + { + mch_update_pciexbar(mch); ++ + mch_update_pam(mch); +- mch_update_smram(mch); +- mch_update_ext_tseg_mbytes(mch); +- mch_update_smbase_smram(mch); ++ if (mch->has_smm_ranges) { ++ mch_update_smram(mch); ++ mch_update_ext_tseg_mbytes(mch); ++ mch_update_smbase_smram(mch); ++ } + + /* + * pci hole goes from end-of-low-ram to io-apic. +@@ -538,18 +548,20 @@ static void mch_reset(DeviceState *qdev) + pci_set_quad(d->config + MCH_HOST_BRIDGE_PCIEXBAR, + MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT); + +- d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT; +- d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT; +- d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK; +- d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK; ++ if (mch->has_smm_ranges) { ++ d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT; ++ d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT; ++ d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK; ++ d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK; + +- if (mch->ext_tseg_mbytes > 0) { +- pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES, +- MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY); +- } ++ if (mch->ext_tseg_mbytes > 0) { ++ pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES, ++ MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY); ++ } + +- d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0; +- d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff; ++ d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0; ++ d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff; ++ } + + mch_update(mch); + } +@@ -578,6 +590,10 @@ static void mch_realize(PCIDevice *d, Error **errp) + PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); + } + ++ if (!mch->has_smm_ranges) { ++ return; ++ } ++ + /* if *disabled* show SMRAM to all CPUs */ + memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region", + mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 87420783ab..467e7fb52f 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -164,6 +164,7 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level); + #define PCI_HOST_PROP_PCI_HOLE64_SIZE "pci-hole64-size" + #define PCI_HOST_BELOW_4G_MEM_SIZE "below-4g-mem-size" + #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" ++#define PCI_HOST_PROP_SMM_RANGES "smm-ranges" + + + void pc_pci_as_mapping_init(MemoryRegion *system_memory, +diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h +index bafcbe6752..22fadfa3ed 100644 +--- a/include/hw/pci-host/q35.h ++++ b/include/hw/pci-host/q35.h +@@ -50,6 +50,7 @@ struct MCHPCIState { + MemoryRegion tseg_blackhole, tseg_window; + MemoryRegion smbase_blackhole, smbase_window; + bool has_smram_at_smbase; ++ bool has_smm_ranges; + Range pci_hole; + uint64_t below_4g_mem_size; + uint64_t above_4g_mem_size; +-- +2.39.3 + diff --git a/SOURCES/kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch b/SOURCES/kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch index a92c7f5..00cdae0 100644 --- a/SOURCES/kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch +++ b/SOURCES/kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch @@ -1,44 +1,36 @@ -From 6e39b4c13c0eacb35e81874b09e6b6411266c631 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Wed, 5 Jun 2024 19:56:51 -0400 +From 97739ee2ed20856fe395248d399dfc7f9ab4f33d Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 11 Apr 2024 15:06:01 +0200 Subject: [PATCH 1/4] qcow2: Don't open data_file with BDRV_O_NO_IO -RH-Author: Jon Maloy -RH-MergeRequest: 2: EMBARGOED CVE-2024-4467 for rhel-9.4.z (PRDSC) -RH-Jira: https://issues.redhat.com/browse/RHEL-35610 +RH-Author: Hana Czenczek +RH-MergeRequest: 1: CVE 2024-4467 (PRDSC) +RH-Jira: RHEL-35611 RH-CVE: CVE-2024-4467 RH-Acked-by: Kevin Wolf RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [1/4] 1000359b05c706f3c5155a9481692352be333129 +RH-Acked-by: Eric Blake +RH-Commit: [1/4] f9843ce5c519901654a7d8ba43ee95ce25ca13c2 -commit f9843ce5c519901654a7d8ba43ee95ce25ca13c2 -Author: Kevin Wolf -Date: Thu Apr 11 15:06:01 2024 +0200 +One use case for 'qemu-img info' is verifying that untrusted images +don't reference an unwanted external file, be it as a backing file or an +external data file. To make sure that calling 'qemu-img info' can't +already have undesired side effects with a malicious image, just don't +open the data file at all with BDRV_O_NO_IO. If nothing ever tries to do +I/O, we don't need to have it open. - qcow2: Don't open data_file with BDRV_O_NO_IO +This changes the output of iotests case 061, which used 'qemu-img info' +to show that opening an image with an invalid data file fails. After +this patch, it succeeds. Replace this part of the test with a qemu-io +call, but keep the final 'qemu-img info' to show that the invalid data +file is correctly displayed in the output. - One use case for 'qemu-img info' is verifying that untrusted images - don't reference an unwanted external file, be it as a backing file or an - external data file. To make sure that calling 'qemu-img info' can't - already have undesired side effects with a malicious image, just don't - open the data file at all with BDRV_O_NO_IO. If nothing ever tries to do - I/O, we don't need to have it open. - - This changes the output of iotests case 061, which used 'qemu-img info' - to show that opening an image with an invalid data file fails. After - this patch, it succeeds. Replace this part of the test with a qemu-io - call, but keep the final 'qemu-img info' to show that the invalid data - file is correctly displayed in the output. - - Signed-off-by: Kevin Wolf - Reviewed-by: Eric Blake - Reviewed-by: Stefan Hajnoczi - Reviewed-by: Hanna Czenczek - Upstream: N/A, embargoed - Signed-off-by: Hanna Czenczek - -Signed-off-by: Jon Maloy +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Hanna Czenczek +Upstream: N/A, embargoed +Signed-off-by: Hanna Czenczek --- block/qcow2.c | 17 ++++++++++++++++- tests/qemu-iotests/061 | 6 ++++-- @@ -46,7 +38,7 @@ Signed-off-by: Jon Maloy 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c -index d91b7b91d3..b269cfc78f 100644 +index 0ebd455dc8..a4cffb628c 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1642,7 +1642,22 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, diff --git a/SOURCES/kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch b/SOURCES/kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch deleted file mode 100644 index b31142e..0000000 --- a/SOURCES/kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch +++ /dev/null @@ -1,167 +0,0 @@ -From f1e82fe5076b4030d385dfa49b8284899386114d Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Wed, 20 Dec 2023 08:47:54 -0500 -Subject: [PATCH 08/22] qdev: add IOThreadVirtQueueMappingList property type - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [4/17] 817aa1339da8ed3814730473342ba045e66d5b51 (stefanha/centos-stream-qemu-kvm) - -virtio-blk and virtio-scsi devices will need a way to specify the -mapping between IOThreads and virtqueues. At the moment all virtqueues -are assigned to a single IOThread or the main loop. This single thread -can be a CPU bottleneck, so it is necessary to allow finer-grained -assignment to spread the load. - -Introduce DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST() so devices can take a -parameter that maps virtqueues to IOThreads. The command-line syntax for -this new property is as follows: - - --device '{"driver":"foo","iothread-vq-mapping":[{"iothread":"iothread0","vqs":[0,1,2]},...]}' - -IOThreads are specified by name and virtqueues are specified by 0-based -index. - -It will be common to simply assign virtqueues round-robin across a set -of IOThreads. A convenient syntax that does not require specifying -individual virtqueue indices is available: - - --device '{"driver":"foo","iothread-vq-mapping":[{"iothread":"iothread0"},{"iothread":"iothread1"},...]}' - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20231220134755.814917-4-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit cf03a152c5d749fd0083bfe540df9524f1d2ff1d) -Signed-off-by: Stefan Hajnoczi ---- - hw/core/qdev-properties-system.c | 46 +++++++++++++++++++++++++++++ - include/hw/qdev-properties-system.h | 5 ++++ - qapi/virtio.json | 29 ++++++++++++++++++ - 3 files changed, 80 insertions(+) - -diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c -index 73cced4626..1a396521d5 100644 ---- a/hw/core/qdev-properties-system.c -+++ b/hw/core/qdev-properties-system.c -@@ -18,6 +18,7 @@ - #include "qapi/qapi-types-block.h" - #include "qapi/qapi-types-machine.h" - #include "qapi/qapi-types-migration.h" -+#include "qapi/qapi-visit-virtio.h" - #include "qapi/qmp/qerror.h" - #include "qemu/ctype.h" - #include "qemu/cutils.h" -@@ -1160,3 +1161,48 @@ const PropertyInfo qdev_prop_cpus390entitlement = { - .set = qdev_propinfo_set_enum, - .set_default_value = qdev_propinfo_set_default_value_enum, - }; -+ -+/* --- IOThreadVirtQueueMappingList --- */ -+ -+static void get_iothread_vq_mapping_list(Object *obj, Visitor *v, -+ const char *name, void *opaque, Error **errp) -+{ -+ IOThreadVirtQueueMappingList **prop_ptr = -+ object_field_prop_ptr(obj, opaque); -+ -+ visit_type_IOThreadVirtQueueMappingList(v, name, prop_ptr, errp); -+} -+ -+static void set_iothread_vq_mapping_list(Object *obj, Visitor *v, -+ const char *name, void *opaque, Error **errp) -+{ -+ IOThreadVirtQueueMappingList **prop_ptr = -+ object_field_prop_ptr(obj, opaque); -+ IOThreadVirtQueueMappingList *list; -+ -+ if (!visit_type_IOThreadVirtQueueMappingList(v, name, &list, errp)) { -+ return; -+ } -+ -+ qapi_free_IOThreadVirtQueueMappingList(*prop_ptr); -+ *prop_ptr = list; -+} -+ -+static void release_iothread_vq_mapping_list(Object *obj, -+ const char *name, void *opaque) -+{ -+ IOThreadVirtQueueMappingList **prop_ptr = -+ object_field_prop_ptr(obj, opaque); -+ -+ qapi_free_IOThreadVirtQueueMappingList(*prop_ptr); -+ *prop_ptr = NULL; -+} -+ -+const PropertyInfo qdev_prop_iothread_vq_mapping_list = { -+ .name = "IOThreadVirtQueueMappingList", -+ .description = "IOThread virtqueue mapping list [{\"iothread\":\"\", " -+ "\"vqs\":[1,2,3,...]},...]", -+ .get = get_iothread_vq_mapping_list, -+ .set = set_iothread_vq_mapping_list, -+ .release = release_iothread_vq_mapping_list, -+}; -diff --git a/include/hw/qdev-properties-system.h b/include/hw/qdev-properties-system.h -index 91f7a2452d..06c359c190 100644 ---- a/include/hw/qdev-properties-system.h -+++ b/include/hw/qdev-properties-system.h -@@ -24,6 +24,7 @@ extern const PropertyInfo qdev_prop_off_auto_pcibar; - extern const PropertyInfo qdev_prop_pcie_link_speed; - extern const PropertyInfo qdev_prop_pcie_link_width; - extern const PropertyInfo qdev_prop_cpus390entitlement; -+extern const PropertyInfo qdev_prop_iothread_vq_mapping_list; - - #define DEFINE_PROP_PCI_DEVFN(_n, _s, _f, _d) \ - DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pci_devfn, int32_t) -@@ -82,4 +83,8 @@ extern const PropertyInfo qdev_prop_cpus390entitlement; - DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_cpus390entitlement, \ - CpuS390Entitlement) - -+#define DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST(_name, _state, _field) \ -+ DEFINE_PROP(_name, _state, _field, qdev_prop_iothread_vq_mapping_list, \ -+ IOThreadVirtQueueMappingList *) -+ - #endif -diff --git a/qapi/virtio.json b/qapi/virtio.json -index e6dcee7b83..19c7c36e36 100644 ---- a/qapi/virtio.json -+++ b/qapi/virtio.json -@@ -928,3 +928,32 @@ - 'data': { 'path': 'str', 'queue': 'uint16', '*index': 'uint16' }, - 'returns': 'VirtioQueueElement', - 'features': [ 'unstable' ] } -+ -+## -+# @IOThreadVirtQueueMapping: -+# -+# Describes the subset of virtqueues assigned to an IOThread. -+# -+# @iothread: the id of IOThread object -+# -+# @vqs: an optional array of virtqueue indices that will be handled by this -+# IOThread. When absent, virtqueues are assigned round-robin across all -+# IOThreadVirtQueueMappings provided. Either all IOThreadVirtQueueMappings -+# must have @vqs or none of them must have it. -+# -+# Since: 9.0 -+## -+ -+{ 'struct': 'IOThreadVirtQueueMapping', -+ 'data': { 'iothread': 'str', '*vqs': ['uint16'] } } -+ -+## -+# @DummyVirtioForceArrays: -+# -+# Not used by QMP; hack to let us use IOThreadVirtQueueMappingList internally -+# -+# Since: 9.0 -+## -+ -+{ 'struct': 'DummyVirtioForceArrays', -+ 'data': { 'unused-iothread-vq-mapping': ['IOThreadVirtQueueMapping'] } } --- -2.39.3 - diff --git a/SOURCES/kvm-qdev-properties-alias-all-object-class-properties.patch b/SOURCES/kvm-qdev-properties-alias-all-object-class-properties.patch deleted file mode 100644 index 94bb716..0000000 --- a/SOURCES/kvm-qdev-properties-alias-all-object-class-properties.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 4251aab5b2beb68d1800cd4a329361ff6f57c430 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Wed, 20 Dec 2023 08:47:52 -0500 -Subject: [PATCH 07/22] qdev-properties: alias all object class properties - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [3/17] bc5d0aafe4645dacf9277904a2b20760d6e676e1 (stefanha/centos-stream-qemu-kvm) - -qdev_alias_all_properties() aliases a DeviceState's qdev properties onto -an Object. This is used for VirtioPCIProxy types so that --device -virtio-blk-pci has properties of its embedded --device virtio-blk-device -object. - -Currently this function is implemented using qdev properties. Change the -function to use QOM object class properties instead. This works because -qdev properties create QOM object class properties, but it also catches -any QOM object class-only properties that have no qdev properties. - -This change ensures that properties of devices are shown with --device -foo,\? even if they are QOM object class properties. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20231220134755.814917-2-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 350147a871a545ab56b4a1062c8485635d9ffc24) -Signed-off-by: Stefan Hajnoczi ---- - hw/core/qdev-properties.c | 18 ++++++++++-------- - include/hw/qdev-properties.h | 4 ++-- - 2 files changed, 12 insertions(+), 10 deletions(-) - -diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c -index 840006e953..7d6fa726fd 100644 ---- a/hw/core/qdev-properties.c -+++ b/hw/core/qdev-properties.c -@@ -1076,16 +1076,18 @@ void device_class_set_props(DeviceClass *dc, Property *props) - void qdev_alias_all_properties(DeviceState *target, Object *source) - { - ObjectClass *class; -- Property *prop; -+ ObjectPropertyIterator iter; -+ ObjectProperty *prop; - - class = object_get_class(OBJECT(target)); -- do { -- DeviceClass *dc = DEVICE_CLASS(class); - -- for (prop = dc->props_; prop && prop->name; prop++) { -- object_property_add_alias(source, prop->name, -- OBJECT(target), prop->name); -+ object_class_property_iter_init(&iter, class); -+ while ((prop = object_property_iter_next(&iter))) { -+ if (object_property_find(source, prop->name)) { -+ continue; /* skip duplicate properties */ - } -- class = object_class_get_parent(class); -- } while (class != object_class_by_name(TYPE_DEVICE)); -+ -+ object_property_add_alias(source, prop->name, -+ OBJECT(target), prop->name); -+ } - } -diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h -index 25743a29a0..09aa04ca1e 100644 ---- a/include/hw/qdev-properties.h -+++ b/include/hw/qdev-properties.h -@@ -230,8 +230,8 @@ void qdev_property_add_static(DeviceState *dev, Property *prop); - * @target: Device which has properties to be aliased - * @source: Object to add alias properties to - * -- * Add alias properties to the @source object for all qdev properties on -- * the @target DeviceState. -+ * Add alias properties to the @source object for all properties on the @target -+ * DeviceState. - * - * This is useful when @target is an internal implementation object - * owned by @source, and you want to expose all the properties of that --- -2.39.3 - diff --git a/SOURCES/kvm-qemu-guest-agent-Update-the-logfile-path-of-qga-fsfr.patch b/SOURCES/kvm-qemu-guest-agent-Update-the-logfile-path-of-qga-fsfr.patch new file mode 100644 index 0000000..967adc1 --- /dev/null +++ b/SOURCES/kvm-qemu-guest-agent-Update-the-logfile-path-of-qga-fsfr.patch @@ -0,0 +1,40 @@ +From 97e10c2ccc8dc29019d6d22de1d23c55fea0f6c4 Mon Sep 17 00:00:00 2001 +From: Dehan Meng +Date: Wed, 21 Aug 2024 14:55:01 +0800 +Subject: [PATCH] qemu-guest-agent: Update the logfile path of + qga-fsfreeze-hook.log + +RH-Author: 6-dehan +RH-MergeRequest: 265: qemu-guest-agent: Update the logfile path of qga-fsfreeze-hook.log +RH-Jira: RHEL-52250 +RH-Acked-by: Konstantin Kostiuk +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] 7c5cfb882dbc277becb7daa2c5d6b8eff3d601b2 (6-dehan/src_centosupstream_qemu-kvm) + +selinux context 'system_u:object_r:virt_qemu_ga_log_t:s0', it +should be changed to '/var/log/qemu-ga/qga-fsfreeze-hook.log'. And +it's worth to mention that this is RHEL-only change for matching +existing SELinux boolean and policy. + +Jira: https://issues.redhat.com/browse/RHEL-52250 +Signed-off-by: Dehan Meng +--- + scripts/qemu-guest-agent/fsfreeze-hook | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook +index e9b84ec028..70536ba3e3 100755 +--- a/scripts/qemu-guest-agent/fsfreeze-hook ++++ b/scripts/qemu-guest-agent/fsfreeze-hook +@@ -7,7 +7,7 @@ + # "freeze" argument before the filesystem is frozen. And for fsfreeze-thaw + # request, it is issued with "thaw" argument after filesystem is thawed. + +-LOGFILE=/var/log/qga-fsfreeze-hook.log ++LOGFILE=/var/log/qemu-ga/qga-fsfreeze-hook.log + FSFREEZE_D=$(dirname -- "$(realpath $0)")/fsfreeze-hook.d + + # Check whether file $1 is a backup or rpm-generated file and should be ignored +-- +2.39.3 + diff --git a/SOURCES/kvm-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch b/SOURCES/kvm-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch deleted file mode 100644 index 7dc550c..0000000 --- a/SOURCES/kvm-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch +++ /dev/null @@ -1,135 +0,0 @@ -From f2fe6c7a2def488633cbb67e28ac00279d6e8de4 Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Tue, 27 Feb 2024 11:17:39 +0100 -Subject: [PATCH 1/2] qemu_init: increase NOFILE soft limit on POSIX -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cornelia Huck -RH-MergeRequest: 226: qemu_init: increase NOFILE soft limit on POSIX -RH-Jira: RHEL-26049 -RH-Acked-by: Gavin Shan -RH-Acked-by: Ani Sinha -RH-Acked-by: Shaoqin Huang -RH-Commit: [1/1] cee5404aef3f6437d45a1c43bdee73a57a528bee (cohuck/qemu-kvm-c9s) - -Jira: https://issues.redhat.com/browse/RHEL-26049 - -In many configurations, e.g. multiple vNICs with multiple queues or -with many Ceph OSDs, the default soft limit of 1024 is not enough. -QEMU is supposed to work fine with file descriptors >= 1024 and does -not use select() on POSIX. Bump the soft limit to the allowed hard -limit to avoid issues with the aforementioned configurations. - -Of course the limit could be raised from the outside, but the man page -of systemd.exec states about 'LimitNOFILE=': - -> Don't use. -> [...] -> Typically applications should increase their soft limit to the hard -> limit on their own, if they are OK with working with file -> descriptors above 1023, - -If the soft limit is already the same as the hard limit, avoid the -superfluous setrlimit call. This can avoid a warning with a strict -seccomp filter blocking setrlimit if NOFILE was already raised before -executing QEMU. - -Buglink: https://bugzilla.proxmox.com/show_bug.cgi?id=4507 -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Fiona Ebner -Signed-off-by: Daniel P. Berrangé -(cherry picked from commit 03e471c41d8b1b6eb16c9714f387449f52fe5c1d) -Signed-off-by: Cornelia Huck ---- - include/sysemu/os-posix.h | 1 + - include/sysemu/os-win32.h | 5 +++++ - os-posix.c | 22 ++++++++++++++++++++++ - system/vl.c | 2 ++ - 4 files changed, 30 insertions(+) - -diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h -index dff32ae185..b881ac6c6f 100644 ---- a/include/sysemu/os-posix.h -+++ b/include/sysemu/os-posix.h -@@ -51,6 +51,7 @@ bool is_daemonized(void); - void os_daemonize(void); - bool os_set_runas(const char *user_id); - void os_set_chroot(const char *path); -+void os_setup_limits(void); - void os_setup_post(void); - int os_mlock(void); - -diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h -index 1047d260cb..b82a5d3ad9 100644 ---- a/include/sysemu/os-win32.h -+++ b/include/sysemu/os-win32.h -@@ -128,6 +128,11 @@ static inline int os_mlock(void) - return -ENOSYS; - } - -+static inline void os_setup_limits(void) -+{ -+ return; -+} -+ - #define fsync _commit - - #if !defined(lseek) -diff --git a/os-posix.c b/os-posix.c -index 52ef6990ff..a4284e2c07 100644 ---- a/os-posix.c -+++ b/os-posix.c -@@ -24,6 +24,7 @@ - */ - - #include "qemu/osdep.h" -+#include - #include - #include - #include -@@ -256,6 +257,27 @@ void os_daemonize(void) - } - } - -+void os_setup_limits(void) -+{ -+ struct rlimit nofile; -+ -+ if (getrlimit(RLIMIT_NOFILE, &nofile) < 0) { -+ warn_report("unable to query NOFILE limit: %s", strerror(errno)); -+ return; -+ } -+ -+ if (nofile.rlim_cur == nofile.rlim_max) { -+ return; -+ } -+ -+ nofile.rlim_cur = nofile.rlim_max; -+ -+ if (setrlimit(RLIMIT_NOFILE, &nofile) < 0) { -+ warn_report("unable to set NOFILE limit: %s", strerror(errno)); -+ return; -+ } -+} -+ - void os_setup_post(void) - { - int fd = 0; -diff --git a/system/vl.c b/system/vl.c -index 93635ffc5b..6443b6e469 100644 ---- a/system/vl.c -+++ b/system/vl.c -@@ -2783,6 +2783,8 @@ void qemu_init(int argc, char **argv) - error_init(argv[0]); - qemu_init_exec_dir(argv[0]); - -+ os_setup_limits(); -+ - qemu_init_arch_modules(); - - qemu_init_subsystems(); --- -2.39.3 - diff --git a/SOURCES/kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch b/SOURCES/kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch index f033486..c60570e 100644 --- a/SOURCES/kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch +++ b/SOURCES/kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch @@ -1,17 +1,17 @@ -From 69d6b5f98d665fbcb86e42df40bcc5e9c79b397f Mon Sep 17 00:00:00 2001 +From fef17d3466deba9b4d581604de1c64c210d1a454 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Fri, 17 May 2024 21:50:14 -0500 -Subject: [PATCH 3/4] qio: Inherit follow_coroutine_ctx across TLS +Subject: [PATCH 1/4] qio: Inherit follow_coroutine_ctx across TLS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Eric Blake -RH-MergeRequest: 375: Fix regression on nbd+tls -RH-Jira: RHEL-33754 +RH-MergeRequest: 244: qio: Inherit follow_coroutine_ctx across TLS +RH-Jira: RHEL-33440 RH-Acked-by: Kevin Wolf -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/4] 14ddea7e3c81898bb3fe4be51a40749d67a5c0c0 (ebblake/qemu-kvm) +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/2] 447b144638a1f032fa036838f5f1839628389fe5 (ebblake/centos-qemu-kvm) Since qemu 8.2, the combination of NBD + TLS + iothread crashes on an assertion failure: @@ -41,7 +41,7 @@ Reviewed-by: Stefan Hajnoczi Reviewed-by: Daniel P. Berrangé Message-ID: <20240518025246.791593-5-eblake@redhat.com> (cherry picked from commit 199e84de1c903ba5aa1f7256310bbc4a20dd930b) -Jira: https://issues.redhat.com/browse/RHEL-33754 +Jira: https://issues.redhat.com/browse/RHEL-33440 Signed-off-by: Eric Blake --- io/channel-tls.c | 26 +++++++++++++++----------- @@ -49,7 +49,7 @@ Signed-off-by: Eric Blake 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/io/channel-tls.c b/io/channel-tls.c -index 58fe1aceee..a8ad89c3d1 100644 +index 1d9c9c72bf..67b9700006 100644 --- a/io/channel-tls.c +++ b/io/channel-tls.c @@ -69,37 +69,40 @@ qio_channel_tls_new_server(QIOChannel *master, diff --git a/SOURCES/kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch b/SOURCES/kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch index 667e80b..c6948ae 100644 --- a/SOURCES/kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch +++ b/SOURCES/kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch @@ -1,19 +1,17 @@ -From aa0ad36a56cbc0528306e47112081b0db124e512 Mon Sep 17 00:00:00 2001 +From 97334815c4a7cf0911b30c1366bbe67e883c57dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 5 Jun 2024 10:28:20 +0400 -Subject: [PATCH 3/4] rhel 9.4.0 machine type compat for virtio-gpu migration +Subject: [PATCH 4/4] rhel 9.4.0 machine type compat for virtio-gpu migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Marc-André Lureau -RH-MergeRequest: 377: virtio-gpu: fix scanout migration post-load -RH-Jira: RHEL-36181 +RH-MergeRequest: 246: virtio-gpu: fix v2 migration +RH-Jira: RHEL-34621 RH-Acked-by: Peter Xu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/3] 270e932e04eb2d730550f3a56b53cbb17f5e66f8 - -Jira: https://issues.redhat.com/browse/RHEL-36181 +RH-Acked-by: Thomas Huth +RH-Commit: [2/2] 2a895d510453e83bb45dfb39b7751bcc2f309cc5 (marcandre.lureau-rh/qemu-kvm-centos) Signed-off-by: Marc-André Lureau --- @@ -21,17 +19,17 @@ Signed-off-by: Marc-André Lureau 1 file changed, 2 insertions(+) diff --git a/hw/core/machine.c b/hw/core/machine.c -index a0843ab93e..c8c460c916 100644 +index cf1d7faaaf..92609aae27 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -81,6 +81,8 @@ GlobalProperty hw_compat_rhel_9_4[] = { - { "igb", "x-pcie-flr-init", "off" }, - /* hw_compat_rhel_9_4 jira RHEL-24045 */ - { "virtio-mem", "dynamic-memslots", "off" }, -+ /* hw_compat_rhel_9_4 from hw_compat_8_1 */ +@@ -310,6 +310,8 @@ GlobalProperty hw_compat_rhel_9_5[] = { + { TYPE_VIRTIO_IOMMU_PCI, "granule", "4k" }, + /* hw_compat_rhel_9_5 from hw_compat_8_2 */ + { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "64" }, ++ /* hw_compat_rhel_9_5 from hw_compat_8_2 */ + { "virtio-gpu-device", "x-scanout-vmstate-version", "1" }, }; - const size_t hw_compat_rhel_9_4_len = G_N_ELEMENTS(hw_compat_rhel_9_4); + const size_t hw_compat_rhel_9_5_len = G_N_ELEMENTS(hw_compat_rhel_9_5); -- 2.39.3 diff --git a/SOURCES/kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch b/SOURCES/kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch new file mode 100644 index 0000000..4f757bf --- /dev/null +++ b/SOURCES/kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch @@ -0,0 +1,67 @@ +From 4c93bec108f7e3918a2ef91b51cec477ade38cc3 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 18 Mar 2024 17:45:56 -0400 +Subject: [PATCH 018/100] runstate: skip initial CPU reset if reset is not + actually possible +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [18/91] ced267fdaadbf2072c1897223522457a006e6c81 (bonzini/rhel-qemu-kvm) + +Right now, the system reset is concluded by a call to +cpu_synchronize_all_post_reset() in order to sync any changes +that the machine reset callback applied to the CPU state. + +However, for VMs with encrypted state such as SEV-ES guests (currently +the only case of guests with non-resettable CPUs) this cannot be done, +because guest state has already been finalized by machine-init-done notifiers. +cpu_synchronize_all_post_reset() does nothing on these guests, and actually +we would like to make it fail if called once guest has been encrypted. +So, assume that boards that support non-resettable CPUs do not touch +CPU state and that all such setup is done before, at the time of +cpu_synchronize_all_post_init(). + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Paolo Bonzini +(cherry picked from commit 08b2d15cdd0d3fbbe37ce23bf192b770db3a7539) +Signed-off-by: Paolo Bonzini +--- + system/runstate.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/system/runstate.c b/system/runstate.c +index d6ab860eca..cb4905a40f 100644 +--- a/system/runstate.c ++++ b/system/runstate.c +@@ -501,7 +501,20 @@ void qemu_system_reset(ShutdownCause reason) + default: + qapi_event_send_reset(shutdown_caused_by_guest(reason), reason); + } +- cpu_synchronize_all_post_reset(); ++ ++ /* ++ * Some boards use the machine reset callback to point CPUs to the firmware ++ * entry point. Assume that this is not the case for boards that support ++ * non-resettable CPUs (currently used only for confidential guests), in ++ * which case cpu_synchronize_all_post_init() is enough because ++ * it does _more_ than cpu_synchronize_all_post_reset(). ++ */ ++ if (cpus_are_resettable()) { ++ cpu_synchronize_all_post_reset(); ++ } else { ++ assert(runstate_check(RUN_STATE_PRELAUNCH)); ++ } ++ + vm_set_suspended(false); + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch b/SOURCES/kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch new file mode 100644 index 0000000..bc6e4e3 --- /dev/null +++ b/SOURCES/kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch @@ -0,0 +1,109 @@ +From 4ebc58d4a7a3d4a20f20f1cd3f21082b80097fe2 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:00:38 -0500 +Subject: [PATCH 014/100] s390: Switch to use confidential_guest_kvm_init() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [14/91] 8c9e09ec9976c00b41c02868fff034286a341468 (bonzini/rhel-qemu-kvm) + +Use unified confidential_guest_kvm_init() for consistency with +other architectures. + +Signed-off-by: Xiaoyao Li +Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a14a2b0148e657cc526b7a75f2a1937628764e7a) +Signed-off-by: Paolo Bonzini +--- + hw/s390x/s390-virtio-ccw.c | 5 ++++- + target/s390x/kvm/pv.c | 10 +++++++++- + target/s390x/kvm/pv.h | 14 -------------- + 3 files changed, 13 insertions(+), 16 deletions(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 9ad54682c6..828ce6e87e 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + #include "qapi/error.h" + #include "exec/ram_addr.h" ++#include "exec/confidential-guest-support.h" + #include "hw/s390x/s390-virtio-hcall.h" + #include "hw/s390x/sclp.h" + #include "hw/s390x/s390_flic.h" +@@ -260,7 +261,9 @@ static void ccw_init(MachineState *machine) + s390_init_cpus(machine); + + /* Need CPU model to be determined before we can set up PV */ +- s390_pv_init(machine->cgs, &error_fatal); ++ if (machine->cgs) { ++ confidential_guest_kvm_init(machine->cgs, &error_fatal); ++ } + + s390_flic_init(); + +diff --git a/target/s390x/kvm/pv.c b/target/s390x/kvm/pv.c +index 7ca7faec73..dde836d21a 100644 +--- a/target/s390x/kvm/pv.c ++++ b/target/s390x/kvm/pv.c +@@ -334,12 +334,17 @@ static bool s390_pv_guest_check(ConfidentialGuestSupport *cgs, Error **errp) + return s390_pv_check_cpus(errp); + } + +-int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++static int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + if (!object_dynamic_cast(OBJECT(cgs), TYPE_S390_PV_GUEST)) { + return 0; + } + ++ if (!kvm_enabled()) { ++ error_setg(errp, "Protected Virtualization requires KVM"); ++ return -1; ++ } ++ + if (!s390_has_feat(S390_FEAT_UNPACK)) { + error_setg(errp, + "CPU model does not support Protected Virtualization"); +@@ -364,6 +369,9 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(S390PVGuest, + + static void s390_pv_guest_class_init(ObjectClass *oc, void *data) + { ++ ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); ++ ++ klass->kvm_init = s390_pv_kvm_init; + } + + static void s390_pv_guest_init(Object *obj) +diff --git a/target/s390x/kvm/pv.h b/target/s390x/kvm/pv.h +index 5877d28ff1..4b40817439 100644 +--- a/target/s390x/kvm/pv.h ++++ b/target/s390x/kvm/pv.h +@@ -80,18 +80,4 @@ static inline int kvm_s390_dump_mem_state(uint64_t addr, size_t len, + static inline int kvm_s390_dump_completion_data(void *buff) { return 0; } + #endif /* CONFIG_KVM */ + +-int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +-static inline int s390_pv_init(ConfidentialGuestSupport *cgs, Error **errp) +-{ +- if (!cgs) { +- return 0; +- } +- if (kvm_enabled()) { +- return s390_pv_kvm_init(cgs, errp); +- } +- +- error_setg(errp, "Protected Virtualization requires KVM"); +- return -1; +-} +- + #endif /* HW_S390_PV_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch b/SOURCES/kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch deleted file mode 100644 index 1b4a4ab..0000000 --- a/SOURCES/kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 64b0180f5a52668f8ac4c444ba369231dbc4d5b9 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Mon, 22 Jan 2024 09:25:53 +0100 -Subject: [PATCH 096/101] s390x/pci: avoid double enable/disable of aif -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 215: s390x: Fix reset ordering of passthrough ISM devices -RH-Jira: RHEL-21169 -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck -RH-Commit: [1/3] ebdf8a474ea21486f5ec051683f17bae6d20f675 (clegoate/qemu-kvm-c9s) - -JIRA: https://issues.redhat.com/browse/RHEL-21169 - -commit 07b2c8e034d80ff92e202405c494d2ff80fcf848 -Author: Matthew Rosato -Date: Thu Jan 18 13:51:49 2024 -0500 - - s390x/pci: avoid double enable/disable of aif - - Use a flag to keep track of whether AIF is currently enabled. This can be - used to avoid enabling/disabling AIF multiple times as well as to determine - whether or not it should be disabled during reset processing. - - Fixes: d0bc7091c2 ("s390x/pci: enable adapter event notification for interpreted devices") - Reported-by: Cédric Le Goater - Reviewed-by: Eric Farman - Signed-off-by: Matthew Rosato - Message-ID: <20240118185151.265329-2-mjrosato@linux.ibm.com> - Reviewed-by: Cédric Le Goater - Signed-off-by: Thomas Huth - -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-pci-kvm.c | 25 +++++++++++++++++++++++-- - include/hw/s390x/s390-pci-bus.h | 1 + - 2 files changed, 24 insertions(+), 2 deletions(-) - -diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c -index ff41e4106d..1ee510436c 100644 ---- a/hw/s390x/s390-pci-kvm.c -+++ b/hw/s390x/s390-pci-kvm.c -@@ -27,6 +27,7 @@ bool s390_pci_kvm_interp_allowed(void) - - int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, bool assist) - { -+ int rc; - struct kvm_s390_zpci_op args = { - .fh = pbdev->fh, - .op = KVM_S390_ZPCIOP_REG_AEN, -@@ -38,15 +39,35 @@ int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, bool assist) - .u.reg_aen.flags = (assist) ? 0 : KVM_S390_ZPCIOP_REGAEN_HOST - }; - -- return kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); -+ if (pbdev->aif) { -+ return -EINVAL; -+ } -+ -+ rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); -+ if (rc == 0) { -+ pbdev->aif = true; -+ } -+ -+ return rc; - } - - int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev) - { -+ int rc; -+ - struct kvm_s390_zpci_op args = { - .fh = pbdev->fh, - .op = KVM_S390_ZPCIOP_DEREG_AEN - }; - -- return kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); -+ if (!pbdev->aif) { -+ return -EINVAL; -+ } -+ -+ rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); -+ if (rc == 0) { -+ pbdev->aif = false; -+ } -+ -+ return rc; - } -diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h -index b1bdbeaeb5..435e788867 100644 ---- a/include/hw/s390x/s390-pci-bus.h -+++ b/include/hw/s390x/s390-pci-bus.h -@@ -361,6 +361,7 @@ struct S390PCIBusDevice { - bool unplug_requested; - bool interp; - bool forwarding_assist; -+ bool aif; - QTAILQ_ENTRY(S390PCIBusDevice) link; - }; - --- -2.39.3 - diff --git a/SOURCES/kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch b/SOURCES/kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch deleted file mode 100644 index f3a4129..0000000 --- a/SOURCES/kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch +++ /dev/null @@ -1,137 +0,0 @@ -From c885b17e09ab19a3e8d3b2e1765963811af6f764 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Mon, 22 Jan 2024 09:25:53 +0100 -Subject: [PATCH 098/101] s390x/pci: drive ISM reset from subsystem reset -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 215: s390x: Fix reset ordering of passthrough ISM devices -RH-Jira: RHEL-21169 -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck -RH-Commit: [3/3] 426cf156a2c67e6dcd7483a769fa3741e2700504 (clegoate/qemu-kvm-c9s) - -JIRA: https://issues.redhat.com/browse/RHEL-21169 - -commit 68c691ca99a2538d6a53a70ce8a9ce06ee307ff1 -Author: Matthew Rosato -Date: Thu Jan 18 13:51:51 2024 -0500 - - s390x/pci: drive ISM reset from subsystem reset - - ISM devices are sensitive to manipulation of the IOMMU, so the ISM device - needs to be reset before the vfio-pci device is reset (triggering a full - UNMAP). In order to ensure this occurs, trigger ISM device resets from - subsystem_reset before triggering the PCI bus reset (which will also - trigger vfio-pci reset). This only needs to be done for ISM devices - which were enabled for use by the guest. - Further, ensure that AIF is disabled as part of the reset event. - - Fixes: ef1535901a ("s390x: do a subsystem reset before the unprotect on reboot") - Fixes: 03451953c7 ("s390x/pci: reset ISM passthrough devices on shutdown and system reset") - Reported-by: Cédric Le Goater - Signed-off-by: Matthew Rosato - Message-ID: <20240118185151.265329-4-mjrosato@linux.ibm.com> - Reviewed-by: Eric Farman - Reviewed-by: Cédric Le Goater - Signed-off-by: Thomas Huth - -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-pci-bus.c | 26 +++++++++++++++++--------- - hw/s390x/s390-virtio-ccw.c | 8 ++++++++ - include/hw/s390x/s390-pci-bus.h | 1 + - 3 files changed, 26 insertions(+), 9 deletions(-) - -diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c -index 347580ebac..3e57d5faca 100644 ---- a/hw/s390x/s390-pci-bus.c -+++ b/hw/s390x/s390-pci-bus.c -@@ -151,20 +151,12 @@ static void s390_pci_shutdown_notifier(Notifier *n, void *opaque) - pci_device_reset(pbdev->pdev); - } - --static void s390_pci_reset_cb(void *opaque) --{ -- S390PCIBusDevice *pbdev = opaque; -- -- pci_device_reset(pbdev->pdev); --} -- - static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev) - { - HotplugHandler *hotplug_ctrl; - - if (pbdev->pft == ZPCI_PFT_ISM) { - notifier_remove(&pbdev->shutdown_notifier); -- qemu_unregister_reset(s390_pci_reset_cb, pbdev); - } - - /* Unplug the PCI device */ -@@ -1132,7 +1124,6 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, - if (pbdev->pft == ZPCI_PFT_ISM) { - pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier; - qemu_register_shutdown_notifier(&pbdev->shutdown_notifier); -- qemu_register_reset(s390_pci_reset_cb, pbdev); - } - } else { - pbdev->fh |= FH_SHM_EMUL; -@@ -1279,6 +1270,23 @@ static void s390_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev, - pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, s->bus_no, 1); - } - -+void s390_pci_ism_reset(void) -+{ -+ S390pciState *s = s390_get_phb(); -+ -+ S390PCIBusDevice *pbdev, *next; -+ -+ /* Trigger reset event for each passthrough ISM device currently in-use */ -+ QTAILQ_FOREACH_SAFE(pbdev, &s->zpci_devs, link, next) { -+ if (pbdev->interp && pbdev->pft == ZPCI_PFT_ISM && -+ pbdev->fh & FH_MASK_ENABLE) { -+ s390_pci_kvm_aif_disable(pbdev); -+ -+ pci_device_reset(pbdev->pdev); -+ } -+ } -+} -+ - static void s390_pcihost_reset(DeviceState *dev) - { - S390pciState *s = S390_PCI_HOST_BRIDGE(dev); -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index e26ce26f5a..24f4773179 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -118,6 +118,14 @@ static void subsystem_reset(void) - DeviceState *dev; - int i; - -+ /* -+ * ISM firmware is sensitive to unexpected changes to the IOMMU, which can -+ * occur during reset of the vfio-pci device (unmap of entire aperture). -+ * Ensure any passthrough ISM devices are reset now, while CPUs are paused -+ * but before vfio-pci cleanup occurs. -+ */ -+ s390_pci_ism_reset(); -+ - for (i = 0; i < ARRAY_SIZE(reset_dev_types); i++) { - dev = DEVICE(object_resolve_path_type("", reset_dev_types[i], NULL)); - if (dev) { -diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h -index 435e788867..2c43ea123f 100644 ---- a/include/hw/s390x/s390-pci-bus.h -+++ b/include/hw/s390x/s390-pci-bus.h -@@ -401,5 +401,6 @@ S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s, - const char *target); - S390PCIBusDevice *s390_pci_find_next_avail_dev(S390pciState *s, - S390PCIBusDevice *pbdev); -+void s390_pci_ism_reset(void); - - #endif --- -2.39.3 - diff --git a/SOURCES/kvm-s390x-pci-refresh-fh-before-disabling-aif.patch b/SOURCES/kvm-s390x-pci-refresh-fh-before-disabling-aif.patch deleted file mode 100644 index 845a467..0000000 --- a/SOURCES/kvm-s390x-pci-refresh-fh-before-disabling-aif.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 49078bdfd4c116da3e920632ec6f7041f1b38015 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Mon, 22 Jan 2024 09:25:53 +0100 -Subject: [PATCH 097/101] s390x/pci: refresh fh before disabling aif -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 215: s390x: Fix reset ordering of passthrough ISM devices -RH-Jira: RHEL-21169 -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck -RH-Commit: [2/3] 3523067909c41818dfc769abdb93930833416c11 (clegoate/qemu-kvm-c9s) - -JIRA: https://issues.redhat.com/browse/RHEL-21169 - -commit 30e35258e25c75c9d799c34fd89afcafffb37084 -Author: Matthew Rosato -Date: Thu Jan 18 13:51:50 2024 -0500 - - s390x/pci: refresh fh before disabling aif - - Typically we refresh the host fh during CLP enable, however it's possible - that the device goes through multiple reset events before the guest - performs another CLP enable. Let's handle this for now by refreshing the - host handle from vfio before disabling aif. - - Fixes: 03451953c7 ("s390x/pci: reset ISM passthrough devices on shutdown and system reset") - Reported-by: Cédric Le Goater - Reviewed-by: Eric Farman - Signed-off-by: Matthew Rosato - Message-ID: <20240118185151.265329-3-mjrosato@linux.ibm.com> - Reviewed-by: Cédric Le Goater - Signed-off-by: Thomas Huth - -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-pci-kvm.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c -index 1ee510436c..9eef4fc3ec 100644 ---- a/hw/s390x/s390-pci-kvm.c -+++ b/hw/s390x/s390-pci-kvm.c -@@ -18,6 +18,7 @@ - #include "hw/s390x/s390-pci-bus.h" - #include "hw/s390x/s390-pci-kvm.h" - #include "hw/s390x/s390-pci-inst.h" -+#include "hw/s390x/s390-pci-vfio.h" - #include "cpu_models.h" - - bool s390_pci_kvm_interp_allowed(void) -@@ -64,6 +65,14 @@ int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev) - return -EINVAL; - } - -+ /* -+ * The device may have already been reset but we still want to relinquish -+ * the guest ISC, so always be sure to use an up-to-date host fh. -+ */ -+ if (!s390_pci_get_host_fh(pbdev, &args.fh)) { -+ return -EPERM; -+ } -+ - rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); - if (rc == 0) { - pbdev->aif = false; --- -2.39.3 - diff --git a/SOURCES/kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch b/SOURCES/kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch new file mode 100644 index 0000000..2c0fb91 --- /dev/null +++ b/SOURCES/kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch @@ -0,0 +1,186 @@ +From 3d197f42afea6d0b176c2b26b772965692ffeab3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Alex=20Benn=C3=A9e?= +Date: Tue, 14 May 2024 18:42:44 +0100 +Subject: [PATCH 047/100] scripts/update-linux-header.sh: be more src tree + friendly +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [47/91] c4165cc8bf79c3f96912e8210b3bb3565add288f (bonzini/rhel-qemu-kvm) + +Running "install_headers" in the Linux source tree is fairly +unfriendly as out-of-tree builds will start complaining about the +kernel source being non-pristine. As we have a temporary directory for +the install we should also do the build step here. So now we have: + + $tmpdir/ + $blddir/ + $hdrdir/ + +Reviewed-by: Pierrick Bouvier +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Alex Bennée +Message-Id: <20240514174253.694591-3-alex.bennee@linaro.org> +(cherry picked from commit b51ddd937f11f76614d4b36d14d8778df242661c) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 80 +++++++++++++++++---------------- + 1 file changed, 41 insertions(+), 39 deletions(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 5f20434d5c..4431ba4d54 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -27,6 +27,8 @@ + # types like "__u64". This work is done in the cp_portable function. + + tmpdir=$(mktemp -d) ++hdrdir="$tmpdir/headers" ++blddir="$tmpdir/build" + linux="$1" + output="$2" + +@@ -111,56 +113,56 @@ for arch in $ARCHLIST; do + arch_var=ARCH + fi + +- make -C "$linux" INSTALL_HDR_PATH="$tmpdir" $arch_var=$arch headers_install ++ make -C "$linux" O="$blddir" INSTALL_HDR_PATH="$hdrdir" $arch_var=$arch headers_install + + rm -rf "$output/linux-headers/asm-$arch" + mkdir -p "$output/linux-headers/asm-$arch" + for header in kvm.h unistd.h bitsperlong.h mman.h; do +- cp "$tmpdir/include/asm/$header" "$output/linux-headers/asm-$arch" ++ cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch" + done + + if [ $arch = mips ]; then +- cp "$tmpdir/include/asm/sgidefs.h" "$output/linux-headers/asm-mips/" +- cp "$tmpdir/include/asm/unistd_o32.h" "$output/linux-headers/asm-mips/" +- cp "$tmpdir/include/asm/unistd_n32.h" "$output/linux-headers/asm-mips/" +- cp "$tmpdir/include/asm/unistd_n64.h" "$output/linux-headers/asm-mips/" ++ cp "$hdrdir/include/asm/sgidefs.h" "$output/linux-headers/asm-mips/" ++ cp "$hdrdir/include/asm/unistd_o32.h" "$output/linux-headers/asm-mips/" ++ cp "$hdrdir/include/asm/unistd_n32.h" "$output/linux-headers/asm-mips/" ++ cp "$hdrdir/include/asm/unistd_n64.h" "$output/linux-headers/asm-mips/" + fi + if [ $arch = powerpc ]; then +- cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-powerpc/" +- cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-powerpc/" ++ cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-powerpc/" ++ cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-powerpc/" + fi + + rm -rf "$output/include/standard-headers/asm-$arch" + mkdir -p "$output/include/standard-headers/asm-$arch" + if [ $arch = s390 ]; then +- cp_portable "$tmpdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/" +- cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-s390/" +- cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-s390/" ++ cp_portable "$hdrdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/" ++ cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-s390/" ++ cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-s390/" + fi + if [ $arch = arm ]; then +- cp "$tmpdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/" +- cp "$tmpdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/" +- cp "$tmpdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/" ++ cp "$hdrdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/" ++ cp "$hdrdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/" ++ cp "$hdrdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/" + fi + if [ $arch = arm64 ]; then +- cp "$tmpdir/include/asm/sve_context.h" "$output/linux-headers/asm-arm64/" ++ cp "$hdrdir/include/asm/sve_context.h" "$output/linux-headers/asm-arm64/" + fi + if [ $arch = x86 ]; then +- cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" +- cp "$tmpdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/" +- cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/" +- cp_portable "$tmpdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch" ++ cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" ++ cp "$hdrdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/" ++ cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/" ++ cp_portable "$hdrdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch" + # Remove everything except the macros from bootparam.h avoiding the + # unnecessary import of several video/ist/etc headers + sed -e '/__ASSEMBLY__/,/__ASSEMBLY__/d' \ +- "$tmpdir/include/asm/bootparam.h" > "$tmpdir/bootparam.h" +- cp_portable "$tmpdir/bootparam.h" \ ++ "$hdrdir/include/asm/bootparam.h" > "$hdrdir/bootparam.h" ++ cp_portable "$hdrdir/bootparam.h" \ + "$output/include/standard-headers/asm-$arch" +- cp_portable "$tmpdir/include/asm/setup_data.h" \ ++ cp_portable "$hdrdir/include/asm/setup_data.h" \ + "$output/standard-headers/asm-x86" + fi + if [ $arch = riscv ]; then +- cp "$tmpdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" ++ cp "$hdrdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" + fi + done + arch= +@@ -170,13 +172,13 @@ mkdir -p "$output/linux-headers/linux" + for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \ + psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h \ + vduse.h iommufd.h bits.h; do +- cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" ++ cp "$hdrdir/include/linux/$header" "$output/linux-headers/linux" + done + + rm -rf "$output/linux-headers/asm-generic" + mkdir -p "$output/linux-headers/asm-generic" + for header in unistd.h bitsperlong.h mman-common.h mman.h hugetlb_encode.h; do +- cp "$tmpdir/include/asm-generic/$header" "$output/linux-headers/asm-generic" ++ cp "$hdrdir/include/asm-generic/$header" "$output/linux-headers/asm-generic" + done + + if [ -L "$linux/source" ]; then +@@ -211,23 +213,23 @@ EOF + + rm -rf "$output/include/standard-headers/linux" + mkdir -p "$output/include/standard-headers/linux" +-for i in "$tmpdir"/include/linux/*virtio*.h \ +- "$tmpdir/include/linux/qemu_fw_cfg.h" \ +- "$tmpdir/include/linux/fuse.h" \ +- "$tmpdir/include/linux/input.h" \ +- "$tmpdir/include/linux/input-event-codes.h" \ +- "$tmpdir/include/linux/udmabuf.h" \ +- "$tmpdir/include/linux/pci_regs.h" \ +- "$tmpdir/include/linux/ethtool.h" \ +- "$tmpdir/include/linux/const.h" \ +- "$tmpdir/include/linux/kernel.h" \ +- "$tmpdir/include/linux/vhost_types.h" \ +- "$tmpdir/include/linux/sysinfo.h" \ +- "$tmpdir/include/misc/pvpanic.h"; do ++for i in "$hdrdir"/include/linux/*virtio*.h \ ++ "$hdrdir/include/linux/qemu_fw_cfg.h" \ ++ "$hdrdir/include/linux/fuse.h" \ ++ "$hdrdir/include/linux/input.h" \ ++ "$hdrdir/include/linux/input-event-codes.h" \ ++ "$hdrdir/include/linux/udmabuf.h" \ ++ "$hdrdir/include/linux/pci_regs.h" \ ++ "$hdrdir/include/linux/ethtool.h" \ ++ "$hdrdir/include/linux/const.h" \ ++ "$hdrdir/include/linux/kernel.h" \ ++ "$hdrdir/include/linux/vhost_types.h" \ ++ "$hdrdir/include/linux/sysinfo.h" \ ++ "$hdrdir/include/misc/pvpanic.h"; do + cp_portable "$i" "$output/include/standard-headers/linux" + done + mkdir -p "$output/include/standard-headers/drm" +-cp_portable "$tmpdir/include/drm/drm_fourcc.h" \ ++cp_portable "$hdrdir/include/drm/drm_fourcc.h" \ + "$output/include/standard-headers/drm" + + rm -rf "$output/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma" +-- +2.39.3 + diff --git a/SOURCES/kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch b/SOURCES/kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch new file mode 100644 index 0000000..eea324e --- /dev/null +++ b/SOURCES/kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch @@ -0,0 +1,38 @@ +From 5db9faee4d6efc9dbe010d2b745aba59d943d2ac Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Wed, 21 Feb 2024 10:51:38 -0600 +Subject: [PATCH 016/100] scripts/update-linux-headers: Add bits.h to file + imports + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [16/91] 150ee6376982bd5f471cb561f6760bf80d1211db (bonzini/rhel-qemu-kvm) + +Signed-off-by: Michael Roth +Signed-off-by: Paolo Bonzini +(cherry picked from commit b40b8eb609d3549ac14aab43849b20f5cba951c9) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index d48856f9e2..5f20434d5c 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -169,7 +169,7 @@ rm -rf "$output/linux-headers/linux" + mkdir -p "$output/linux-headers/linux" + for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \ + psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h \ +- vduse.h iommufd.h; do ++ vduse.h iommufd.h bits.h; do + cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" + done + +-- +2.39.3 + diff --git a/SOURCES/kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch b/SOURCES/kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch new file mode 100644 index 0000000..86c1f9c --- /dev/null +++ b/SOURCES/kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch @@ -0,0 +1,83 @@ +From 2d0989fe09703ef46ba9c5d14770dbf8a6fd2f80 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Sun, 18 Feb 2024 23:35:02 -0600 +Subject: [PATCH 015/100] scripts/update-linux-headers: Add setup_data.h to + import list + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [15/91] 9d46c8787259317710a84e7a6aa36731e9f55a17 (bonzini/rhel-qemu-kvm) + +Data structures like struct setup_data have been moved to a separate +setup_data.h header which bootparam.h relies on. Add setup_data.h to +the cp_portable() list and sync it along with the other header files. + +Note that currently struct setup_data is stripped away as part of +generating bootparam.h, but that handling is no currently needed for +setup_data.h since it doesn't pull in many external +headers/dependencies. However, QEMU currently redefines struct +setup_data in hw/i386/x86.c, so that will need to be removed as part of +any header update that pulls in the new setup_data.h to avoid build +bisect breakage. + +Because is the first architecture specific #include +in include/standard-headers/, add a new sed substitution to rewrite +asm/ include to the standard-headers/asm-* subdirectory for the current +architecture. + +And while at it, remove asm-generic/kvm_para.h from the list of +allowed includes: it does not have a matching substitution, and therefore +it would not be possible to use it on non-Linux systems where there is +no /usr/include/asm-generic/ directory. + +Signed-off-by: Michael Roth +Signed-off-by: Paolo Bonzini +(cherry picked from commit 66210a1a30f2384bb59f9dad8d769dba56dd30f1) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index a0006eec6f..d48856f9e2 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -61,7 +61,7 @@ cp_portable() { + -e 'linux/const' \ + -e 'linux/kernel' \ + -e 'linux/sysinfo' \ +- -e 'asm-generic/kvm_para' \ ++ -e 'asm/setup_data.h' \ + > /dev/null + then + echo "Unexpected #include in input file $f". +@@ -77,6 +77,7 @@ cp_portable() { + -e 's/__be\([0-9][0-9]*\)/uint\1_t/g' \ + -e 's/"\(input-event-codes\.h\)"/"standard-headers\/linux\/\1"/' \ + -e 's/]*\)>/"standard-headers\/linux\/\1"/' \ ++ -e 's/]*\)>/"standard-headers\/asm-'$arch'\/\1"/' \ + -e 's/__bitwise//' \ + -e 's/__attribute__((packed))/QEMU_PACKED/' \ + -e 's/__inline__/inline/' \ +@@ -155,11 +156,14 @@ for arch in $ARCHLIST; do + "$tmpdir/include/asm/bootparam.h" > "$tmpdir/bootparam.h" + cp_portable "$tmpdir/bootparam.h" \ + "$output/include/standard-headers/asm-$arch" ++ cp_portable "$tmpdir/include/asm/setup_data.h" \ ++ "$output/standard-headers/asm-x86" + fi + if [ $arch = riscv ]; then + cp "$tmpdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" + fi + done ++arch= + + rm -rf "$output/linux-headers/linux" + mkdir -p "$output/linux-headers/linux" +-- +2.39.3 + diff --git a/SOURCES/kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch b/SOURCES/kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch new file mode 100644 index 0000000..ecd631a --- /dev/null +++ b/SOURCES/kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch @@ -0,0 +1,47 @@ +From 09acdbc49a4dd85d82ad30ec2859edfcdba8431e Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 27 May 2024 08:01:26 +0200 +Subject: [PATCH 049/100] scripts/update-linux-headers.sh: Fix the path of + setup_data.h + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [49/91] f3008bc07796687c9440f5720fbc72a12d0a1278 (bonzini/rhel-qemu-kvm) + +When running the update-linx-headers.sh script, it currently fails with: + +scripts/update-linux-headers.sh: line 73: .../qemu/standard-headers/asm-x86/setup_data.h: No such file or directory + +The "include" folder is obviously missing here - no clue how this could +have worked before? + +Fixes: 66210a1a30 ("scripts/update-linux-headers: Add setup_data.h to import list") +Message-ID: <20240527060126.12578-1-thuth@redhat.com> +Reviewed-by: Cornelia Huck +Signed-off-by: Thomas Huth +(cherry picked from commit bde26d90ae9f7551cac90e117fc7216c807a3bfe) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 0f404d5317..f084bee72e 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -160,7 +160,7 @@ for arch in $ARCHLIST; do + cp_portable "$hdrdir/bootparam.h" \ + "$output/include/standard-headers/asm-$arch" + cp_portable "$hdrdir/include/asm/setup_data.h" \ +- "$output/standard-headers/asm-x86" ++ "$output/include/standard-headers/asm-x86" + fi + if [ $arch = riscv ]; then + cp "$hdrdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" +-- +2.39.3 + diff --git a/SOURCES/kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch b/SOURCES/kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch new file mode 100644 index 0000000..c7dbd3a --- /dev/null +++ b/SOURCES/kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch @@ -0,0 +1,44 @@ +From 8e63d742015bf69a00fd44e88eb1198f594b2de2 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 27 May 2024 08:02:43 +0200 +Subject: [PATCH 048/100] scripts/update-linux-headers.sh: Remove temporary + directory inbetween + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [48/91] 879554dc7e722c4e20b302a00ca745ddeefdc0fb (bonzini/rhel-qemu-kvm) + +We are reusing the same temporary directory for installing the headers +of all targets, so there could be stale files here when switching from +one target to another. Make sure to delete the folder before installing +a new set of target headers into it. + +Message-ID: <20240527060243.12647-1-thuth@redhat.com> +Reviewed-by: Michael S. Tsirkin +Acked-by: Cornelia Huck +Signed-off-by: Thomas Huth +(cherry picked from commit 3efc75ad9d9317e5709861bbebb2c29390f8e7a2) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 4431ba4d54..0f404d5317 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -113,6 +113,7 @@ for arch in $ARCHLIST; do + arch_var=ARCH + fi + ++ rm -rf "$hdrdir" + make -C "$linux" O="$blddir" INSTALL_HDR_PATH="$hdrdir" $arch_var=$arch headers_install + + rm -rf "$output/linux-headers/asm-$arch" +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-Await-request-purging.patch b/SOURCES/kvm-scsi-Await-request-purging.patch deleted file mode 100644 index 9bd4399..0000000 --- a/SOURCES/kvm-scsi-Await-request-purging.patch +++ /dev/null @@ -1,124 +0,0 @@ -From 94d6458a58239b52394d58b6880509041186d5a8 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Fri, 2 Feb 2024 15:47:55 +0100 -Subject: [PATCH 04/22] scsi: Await request purging - -RH-Author: Hanna Czenczek -RH-MergeRequest: 222: Allow concurrent BlockBackend context changes -RH-Jira: RHEL-24593 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Commit: [2/2] 35a89273cab0af8f999881e67d359fe1328363a0 (hreitz/qemu-kvm-c-9-s) - -scsi_device_for_each_req_async() currently does not provide any way to -be awaited. One of its callers is scsi_device_purge_requests(), which -therefore currently does not guarantee that all requests are fully -settled when it returns. - -We want all requests to be settled, because scsi_device_purge_requests() -is called through the unrealize path, including the one invoked by -virtio_scsi_hotunplug() through qdev_simple_device_unplug_cb(), which -most likely assumes that all SCSI requests are done then. - -In fact, scsi_device_purge_requests() already contains a blk_drain(), -but this will not fully await scsi_device_for_each_req_async(), only the -I/O requests it potentially cancels (not the non-I/O requests). -However, we can have scsi_device_for_each_req_async() increment the BB -in-flight counter, and have scsi_device_for_each_req_async_bh() -decrement it when it is done. This way, the blk_drain() will fully -await all SCSI requests to be purged. - -This also removes the need for scsi_device_for_each_req_async_bh() to -double-check the current context and potentially re-schedule itself, -should it now differ from the BB's context: Changing a BB's AioContext -with a root node is done through bdrv_try_change_aio_context(), which -creates a drained section. With this patch, we keep the BB in-flight -counter elevated throughout, so we know the BB's context cannot change. - -Signed-off-by: Hanna Czenczek -Message-ID: <20240202144755.671354-3-hreitz@redhat.com> -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 1604c0493193273e4eac547f86fbd2845e7f9af4) ---- - hw/scsi/scsi-bus.c | 30 +++++++++++++++++++++--------- - 1 file changed, 21 insertions(+), 9 deletions(-) - -diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index 5b08cbf60a..b1bf8e6433 100644 ---- a/hw/scsi/scsi-bus.c -+++ b/hw/scsi/scsi-bus.c -@@ -120,17 +120,13 @@ static void scsi_device_for_each_req_async_bh(void *opaque) - SCSIRequest *next; - - /* -- * If the AioContext changed before this BH was called then reschedule into -- * the new AioContext before accessing ->requests. This can happen when -- * scsi_device_for_each_req_async() is called and then the AioContext is -- * changed before BHs are run. -+ * The BB cannot have changed contexts between this BH being scheduled and -+ * now: BBs' AioContexts, when they have a node attached, can only be -+ * changed via bdrv_try_change_aio_context(), in a drained section. While -+ * we have the in-flight counter incremented, that drain must block. - */ - ctx = blk_get_aio_context(s->conf.blk); -- if (ctx != qemu_get_current_aio_context()) { -- aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, -- g_steal_pointer(&data)); -- return; -- } -+ assert(ctx == qemu_get_current_aio_context()); - - QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { - data->fn(req, data->fn_opaque); -@@ -138,11 +134,16 @@ static void scsi_device_for_each_req_async_bh(void *opaque) - - /* Drop the reference taken by scsi_device_for_each_req_async() */ - object_unref(OBJECT(s)); -+ -+ /* Paired with blk_inc_in_flight() in scsi_device_for_each_req_async() */ -+ blk_dec_in_flight(s->conf.blk); - } - - /* - * Schedule @fn() to be invoked for each enqueued request in device @s. @fn() - * runs in the AioContext that is executing the request. -+ * Keeps the BlockBackend's in-flight counter incremented until everything is -+ * done, so draining it will settle all scheduled @fn() calls. - */ - static void scsi_device_for_each_req_async(SCSIDevice *s, - void (*fn)(SCSIRequest *, void *), -@@ -163,6 +164,8 @@ static void scsi_device_for_each_req_async(SCSIDevice *s, - */ - object_ref(OBJECT(s)); - -+ /* Paired with blk_dec_in_flight() in scsi_device_for_each_req_async_bh() */ -+ blk_inc_in_flight(s->conf.blk); - aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk), - scsi_device_for_each_req_async_bh, - data); -@@ -1728,11 +1731,20 @@ static void scsi_device_purge_one_req(SCSIRequest *req, void *opaque) - scsi_req_cancel_async(req, NULL); - } - -+/** -+ * Cancel all requests, and block until they are deleted. -+ */ - void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) - { - scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL); - -+ /* -+ * Await all the scsi_device_purge_one_req() calls scheduled by -+ * scsi_device_for_each_req_async(), and all I/O requests that were -+ * cancelled this way, but may still take a bit of time to settle. -+ */ - blk_drain(sdev->conf.blk); -+ - scsi_device_set_ua(sdev, sense); - } - --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch b/SOURCES/kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch deleted file mode 100644 index 6d43810..0000000 --- a/SOURCES/kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch +++ /dev/null @@ -1,88 +0,0 @@ -From cd08d22a0da022d99fe6cfddb7de680abf66c8be Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:19:59 -0500 -Subject: [PATCH 082/101] scsi: assert that callbacks run in the correct - AioContext - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [13/26] d2fd5065c3b72d9d2f4e37efee39fe12eba0f0a9 (kmwolf/centos-qemu-kvm) - -Since the removal of AioContext locking, the correctness of the code -relies on running requests from a single AioContext at any given time. - -Add assertions that verify that callbacks are invoked in the correct -AioContext. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20231205182011.1976568-3-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - hw/scsi/scsi-disk.c | 14 ++++++++++++++ - system/dma-helpers.c | 3 +++ - 2 files changed, 17 insertions(+) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index 2c1bbb3530..a5048e0aaf 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -273,6 +273,10 @@ static void scsi_aio_complete(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ /* The request must only run in the BlockBackend's AioContext */ -+ assert(blk_get_aio_context(s->qdev.conf.blk) == -+ qemu_get_current_aio_context()); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -@@ -370,8 +374,13 @@ static void scsi_dma_complete(void *opaque, int ret) - - static void scsi_read_complete_noio(SCSIDiskReq *r, int ret) - { -+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - uint32_t n; - -+ /* The request must only run in the BlockBackend's AioContext */ -+ assert(blk_get_aio_context(s->qdev.conf.blk) == -+ qemu_get_current_aio_context()); -+ - assert(r->req.aiocb == NULL); - if (scsi_disk_req_check_error(r, ret, false)) { - goto done; -@@ -496,8 +505,13 @@ static void scsi_read_data(SCSIRequest *req) - - static void scsi_write_complete_noio(SCSIDiskReq *r, int ret) - { -+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - uint32_t n; - -+ /* The request must only run in the BlockBackend's AioContext */ -+ assert(blk_get_aio_context(s->qdev.conf.blk) == -+ qemu_get_current_aio_context()); -+ - assert (r->req.aiocb == NULL); - if (scsi_disk_req_check_error(r, ret, false)) { - goto done; -diff --git a/system/dma-helpers.c b/system/dma-helpers.c -index 528117f256..9b221cf94e 100644 ---- a/system/dma-helpers.c -+++ b/system/dma-helpers.c -@@ -119,6 +119,9 @@ static void dma_blk_cb(void *opaque, int ret) - - trace_dma_blk_cb(dbs, ret); - -+ /* DMAAIOCB is not thread-safe and must be accessed only from dbs->ctx */ -+ assert(ctx == qemu_get_current_aio_context()); -+ - dbs->acb = NULL; - dbs->offset += dbs->iov.size; - --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch b/SOURCES/kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch new file mode 100644 index 0000000..a7eebbc --- /dev/null +++ b/SOURCES/kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch @@ -0,0 +1,60 @@ +From d580b83d9eda7802ffa3890ea8641793fe78937c Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 31 Jul 2024 14:32:05 +0200 +Subject: [PATCH 094/100] scsi-block: Don't skip callback for sgio error + status/driver_status + +RH-Author: Kevin Wolf +RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop +RH-Jira: RHEL-50000 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/4] 1fee1b21dae314f4f34c88f2d2fabd7af011404a (kmwolf/centos-qemu-kvm) + +Instead of calling into scsi_handle_rw_error() directly from +scsi_block_sgio_complete() and skipping the normal callback, go through +the normal cleanup path by calling the callback with a positive error +value. + +The important difference here is not only that the code path is cleaner, +but that the callbacks set r->req.aiocb = NULL. If we skip setting this +and the error action is BLOCK_ERROR_ACTION_STOP, resuming the VM runs +into an assertion failure in scsi_read_data() or scsi_write_data() +because the dangling aiocb pointer is unexpected. + +Fixes: a108557bbf ("scsi: inline sg_io_sense_from_errno() into the callers.") +Buglink: https://issues.redhat.com/browse/RHEL-50000 +Signed-off-by: Kevin Wolf +Acked-by: Paolo Bonzini +Message-ID: <20240731123207.27636-3-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 622a70161ac258e4a166a7dca4b5be267e0652d9) +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 10 ---------- + 1 file changed, 10 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index bed2c8746c..e7f57f3230 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -2804,16 +2804,6 @@ static void scsi_block_sgio_complete(void *opaque, int ret) + } else { + ret = io_hdr->status; + } +- +- if (ret > 0) { +- if (scsi_handle_rw_error(r, ret, true)) { +- scsi_req_unref(&r->req); +- return; +- } +- +- /* Ignore error. */ +- ret = 0; +- } + } + + req->cb(req->cb_opaque, ret); +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch b/SOURCES/kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch new file mode 100644 index 0000000..20aa88a --- /dev/null +++ b/SOURCES/kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch @@ -0,0 +1,79 @@ +From eebe5fe8cbc854a6365e7c1adbb701079b137bcb Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 31 Jul 2024 14:32:06 +0200 +Subject: [PATCH 095/100] scsi-disk: Add warning comments that host_status + errors take a shortcut + +RH-Author: Kevin Wolf +RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop +RH-Jira: RHEL-50000 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/4] 6fcd603fc78fda65a425a1acd9a8710d81c6ed7f (kmwolf/centos-qemu-kvm) + +scsi_block_sgio_complete() has surprising behaviour in that there are +error cases in which it directly completes the request and never calls +the passed callback. In the current state of the code, this doesn't seem +to result in bugs, but with future code changes, we must be careful to +never rely on the callback doing some cleanup until this code smell is +fixed. For now, just add warnings to make people aware of the trap. + +Signed-off-by: Kevin Wolf +Acked-by: Paolo Bonzini +Message-ID: <20240731123207.27636-4-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 8a0495624f23f8f01dfb1484f367174f3b3572e8) +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index e7f57f3230..b4062ac2ff 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -65,6 +65,9 @@ struct SCSIDiskClass { + /* + * Callbacks receive ret == 0 for success. Errors are represented either as + * negative errno values, or as positive SAM status codes. ++ * ++ * Beware: For errors returned in host_status, the function may directly ++ * complete the request and never call the callback. + */ + DMAIOFunc *dma_readv; + DMAIOFunc *dma_writev; +@@ -359,6 +362,7 @@ done: + scsi_req_unref(&r->req); + } + ++/* May not be called in all error cases, don't rely on cleanup here */ + static void scsi_dma_complete(void *opaque, int ret) + { + SCSIDiskReq *r = (SCSIDiskReq *)opaque; +@@ -399,6 +403,7 @@ done: + scsi_req_unref(&r->req); + } + ++/* May not be called in all error cases, don't rely on cleanup here */ + static void scsi_read_complete(void *opaque, int ret) + { + SCSIDiskReq *r = (SCSIDiskReq *)opaque; +@@ -538,6 +543,7 @@ done: + scsi_req_unref(&r->req); + } + ++/* May not be called in all error cases, don't rely on cleanup here */ + static void scsi_write_complete(void * opaque, int ret) + { + SCSIDiskReq *r = (SCSIDiskReq *)opaque; +@@ -2793,6 +2799,7 @@ static void scsi_block_sgio_complete(void *opaque, int ret) + sg_io_hdr_t *io_hdr = &req->io_header; + + if (ret == 0) { ++ /* FIXME This skips calling req->cb() and any cleanup in it */ + if (io_hdr->host_status != SCSI_HOST_OK) { + scsi_req_complete_failed(&r->req, io_hdr->host_status); + scsi_req_unref(&r->req); +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch b/SOURCES/kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch new file mode 100644 index 0000000..0e2aeaf --- /dev/null +++ b/SOURCES/kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch @@ -0,0 +1,106 @@ +From bd5cace452183053e356a27317c759ecfe0391aa Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 31 Jul 2024 14:32:07 +0200 +Subject: [PATCH 096/100] scsi-disk: Always report RESERVATION_CONFLICT to + guest + +RH-Author: Kevin Wolf +RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop +RH-Jira: RHEL-50000 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [4/4] eb4142071e5cbe385a949a6c48b0c8f8c6086918 (kmwolf/centos-qemu-kvm) + +In the case of scsi-block, RESERVATION_CONFLICT is not a backend error, +but indicates that the guest tried to make a request that it isn't +allowed to execute. Pass the error to the guest so that it can decide +what to do with it. + +Without this, if we stop the VM in response to a RESERVATION_CONFLICT +(as is the default policy in management software such as oVirt or +KubeVirt), it can happen that the VM cannot be resumed any more because +every attempt to resume it immediately runs into the same error and +stops the VM again. + +One case that expects RESERVATION_CONFLICT errors to be visible in the +guest is running the validation tests in Windows 2019's Failover Cluster +Manager, which intentionally tries to execute invalid requests to see if +they are properly rejected. + +Buglink: https://issues.redhat.com/browse/RHEL-50000 +Signed-off-by: Kevin Wolf +Acked-by: Paolo Bonzini +Message-ID: <20240731123207.27636-5-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 9da6bd39f92434f55573acd017841b195c60188f) +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 35 ++++++++++++++++++++++++++++++----- + 1 file changed, 30 insertions(+), 5 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index b4062ac2ff..91ccf37fef 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -202,7 +202,7 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); + SCSISense sense = SENSE_CODE(NO_SENSE); +- int error = 0; ++ int error; + bool req_has_sense = false; + BlockErrorAction action; + int status; +@@ -213,11 +213,35 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + } else { + /* A passthrough command has completed with nonzero status. */ + status = ret; +- if (status == CHECK_CONDITION) { ++ switch (status) { ++ case CHECK_CONDITION: + req_has_sense = true; + error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); +- } else { ++ break; ++ case RESERVATION_CONFLICT: ++ /* ++ * Don't apply the error policy, always report to the guest. ++ * ++ * This is a passthrough code path, so it's not a backend error, but ++ * a response to an invalid guest request. ++ * ++ * Windows Failover Cluster validation intentionally sends invalid ++ * requests to verify that reservations work as intended. It is ++ * crucial that it sees the resulting errors. ++ * ++ * Treating a reservation conflict as a guest-side error is obvious ++ * when a pr-manager is in use. Without one, the situation is less ++ * clear, but there might be nothing that can be fixed on the host ++ * (like in the above example), and we don't want to be stuck in a ++ * loop where resuming the VM and retrying the request immediately ++ * stops it again. So always reporting is still the safer option in ++ * this case, too. ++ */ ++ error = 0; ++ break; ++ default: + error = EINVAL; ++ break; + } + } + +@@ -227,8 +251,9 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + * are usually retried immediately, so do not post them to QMP and + * do not account them as failed I/O. + */ +- if (req_has_sense && +- scsi_sense_buf_is_guest_recoverable(r->req.sense, sizeof(r->req.sense))) { ++ if (!error || (req_has_sense && ++ scsi_sense_buf_is_guest_recoverable(r->req.sense, ++ sizeof(r->req.sense)))) { + action = BLOCK_ERROR_ACTION_REPORT; + acct_failed = false; + } else { +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch b/SOURCES/kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch new file mode 100644 index 0000000..409028a --- /dev/null +++ b/SOURCES/kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch @@ -0,0 +1,125 @@ +From 1a0aa9bbdad63d72628002740410b8a28282a96e Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 31 Jul 2024 14:32:04 +0200 +Subject: [PATCH 093/100] scsi-disk: Use positive return value for status in + dma_readv/writev + +RH-Author: Kevin Wolf +RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop +RH-Jira: RHEL-50000 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/4] a0b3e7bfd7b7059c0ec3706f2eb1698c1d430b08 (kmwolf/centos-qemu-kvm) + +In some error cases, scsi_block_sgio_complete() never calls the passed +callback, but directly completes the request. This leads to bugs because +its error paths are not exact copies of what the callback would normally +do. + +In preparation to fix this, allow passing positive return values to the +callbacks that represent the status code that should be used to complete +the request. + +scsi_handle_rw_error() already handles positive values for its ret +parameter because scsi_block_sgio_complete() calls directly into it. + +Signed-off-by: Kevin Wolf +Acked-by: Paolo Bonzini +Message-ID: <20240731123207.27636-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit cfe0880835cd364b590ffd27ef8dbd2ad8838bc5) +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 21 ++++++++++++++------- + 1 file changed, 14 insertions(+), 7 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 4bd7af9d0c..bed2c8746c 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -62,6 +62,10 @@ OBJECT_DECLARE_TYPE(SCSIDiskState, SCSIDiskClass, SCSI_DISK_BASE) + + struct SCSIDiskClass { + SCSIDeviceClass parent_class; ++ /* ++ * Callbacks receive ret == 0 for success. Errors are represented either as ++ * negative errno values, or as positive SAM status codes. ++ */ + DMAIOFunc *dma_readv; + DMAIOFunc *dma_writev; + bool (*need_fua_emulation)(SCSICommand *cmd); +@@ -261,7 +265,7 @@ static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) + return true; + } + +- if (ret < 0) { ++ if (ret != 0) { + return scsi_handle_rw_error(r, ret, acct_failed); + } + +@@ -338,7 +342,7 @@ static void scsi_write_do_fua(SCSIDiskReq *r) + static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret) + { + assert(r->req.aiocb == NULL); +- if (scsi_disk_req_check_error(r, ret, false)) { ++ if (scsi_disk_req_check_error(r, ret, ret > 0)) { + goto done; + } + +@@ -363,9 +367,10 @@ static void scsi_dma_complete(void *opaque, int ret) + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + ++ /* ret > 0 is accounted for in scsi_disk_req_check_error() */ + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); +- } else { ++ } else if (ret == 0) { + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + scsi_dma_complete_noio(r, ret); +@@ -381,7 +386,7 @@ static void scsi_read_complete_noio(SCSIDiskReq *r, int ret) + qemu_get_current_aio_context()); + + assert(r->req.aiocb == NULL); +- if (scsi_disk_req_check_error(r, ret, false)) { ++ if (scsi_disk_req_check_error(r, ret, ret > 0)) { + goto done; + } + +@@ -402,9 +407,10 @@ static void scsi_read_complete(void *opaque, int ret) + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + ++ /* ret > 0 is accounted for in scsi_disk_req_check_error() */ + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); +- } else { ++ } else if (ret == 0) { + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + trace_scsi_disk_read_complete(r->req.tag, r->qiov.size); + } +@@ -512,7 +518,7 @@ static void scsi_write_complete_noio(SCSIDiskReq *r, int ret) + qemu_get_current_aio_context()); + + assert (r->req.aiocb == NULL); +- if (scsi_disk_req_check_error(r, ret, false)) { ++ if (scsi_disk_req_check_error(r, ret, ret > 0)) { + goto done; + } + +@@ -540,9 +546,10 @@ static void scsi_write_complete(void * opaque, int ret) + assert (r->req.aiocb != NULL); + r->req.aiocb = NULL; + ++ /* ret > 0 is accounted for in scsi_disk_req_check_error() */ + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); +- } else { ++ } else if (ret == 0) { + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + scsi_write_complete_noio(r, ret); +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch b/SOURCES/kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch deleted file mode 100644 index 65b08ce..0000000 --- a/SOURCES/kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch +++ /dev/null @@ -1,245 +0,0 @@ -From d1d384bd24a7aeb527f4abd8a0958146544ef9bb Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 4 Dec 2023 11:42:58 -0500 -Subject: [PATCH 079/101] scsi: don't lock AioContext in I/O code path - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [10/26] b5814cec94af5c254e300646d8783672b085bac3 (kmwolf/centos-qemu-kvm) - -blk_aio_*() doesn't require the AioContext lock and the SCSI subsystem's -internal state also does not anymore. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Acked-by: Kevin Wolf -Message-ID: <20231204164259.1515217-4-stefanha@redhat.com> -Signed-off-by: Kevin Wolf ---- - hw/scsi/scsi-disk.c | 23 ----------------------- - hw/scsi/scsi-generic.c | 20 +++----------------- - 2 files changed, 3 insertions(+), 40 deletions(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index 6691f5edb8..2c1bbb3530 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -273,8 +273,6 @@ static void scsi_aio_complete(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -- - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -@@ -286,7 +284,6 @@ static void scsi_aio_complete(void *opaque, int ret) - scsi_req_complete(&r->req, GOOD); - - done: -- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); - scsi_req_unref(&r->req); - } - -@@ -394,8 +391,6 @@ static void scsi_read_complete(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -- - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -@@ -406,7 +401,6 @@ static void scsi_read_complete(void *opaque, int ret) - trace_scsi_disk_read_complete(r->req.tag, r->qiov.size); - } - scsi_read_complete_noio(r, ret); -- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); - } - - /* Actually issue a read to the block device. */ -@@ -448,8 +442,6 @@ static void scsi_do_read_cb(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -- - assert (r->req.aiocb != NULL); - r->req.aiocb = NULL; - -@@ -459,7 +451,6 @@ static void scsi_do_read_cb(void *opaque, int ret) - block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); - } - scsi_do_read(opaque, ret); -- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); - } - - /* Read more data from scsi device into buffer. */ -@@ -533,8 +524,6 @@ static void scsi_write_complete(void * opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -- - assert (r->req.aiocb != NULL); - r->req.aiocb = NULL; - -@@ -544,7 +533,6 @@ static void scsi_write_complete(void * opaque, int ret) - block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); - } - scsi_write_complete_noio(r, ret); -- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); - } - - static void scsi_write_data(SCSIRequest *req) -@@ -1742,8 +1730,6 @@ static void scsi_unmap_complete(void *opaque, int ret) - SCSIDiskReq *r = data->r; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -- - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -@@ -1754,7 +1740,6 @@ static void scsi_unmap_complete(void *opaque, int ret) - block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); - scsi_unmap_complete_noio(data, ret); - } -- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); - } - - static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf) -@@ -1822,8 +1807,6 @@ static void scsi_write_same_complete(void *opaque, int ret) - SCSIDiskReq *r = data->r; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -- - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -@@ -1847,7 +1830,6 @@ static void scsi_write_same_complete(void *opaque, int ret) - data->sector << BDRV_SECTOR_BITS, - &data->qiov, 0, - scsi_write_same_complete, data); -- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); - return; - } - -@@ -1857,7 +1839,6 @@ done: - scsi_req_unref(&r->req); - qemu_vfree(data->iov.iov_base); - g_free(data); -- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); - } - - static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf) -@@ -2810,7 +2791,6 @@ static void scsi_block_sgio_complete(void *opaque, int ret) - { - SCSIBlockReq *req = (SCSIBlockReq *)opaque; - SCSIDiskReq *r = &req->req; -- SCSIDevice *s = r->req.dev; - sg_io_hdr_t *io_hdr = &req->io_header; - - if (ret == 0) { -@@ -2827,13 +2807,10 @@ static void scsi_block_sgio_complete(void *opaque, int ret) - } - - if (ret > 0) { -- aio_context_acquire(blk_get_aio_context(s->conf.blk)); - if (scsi_handle_rw_error(r, ret, true)) { -- aio_context_release(blk_get_aio_context(s->conf.blk)); - scsi_req_unref(&r->req); - return; - } -- aio_context_release(blk_get_aio_context(s->conf.blk)); - - /* Ignore error. */ - ret = 0; -diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c -index 2417f0ad84..b7b04e1d63 100644 ---- a/hw/scsi/scsi-generic.c -+++ b/hw/scsi/scsi-generic.c -@@ -109,15 +109,11 @@ done: - static void scsi_command_complete(void *opaque, int ret) - { - SCSIGenericReq *r = (SCSIGenericReq *)opaque; -- SCSIDevice *s = r->req.dev; -- -- aio_context_acquire(blk_get_aio_context(s->conf.blk)); - - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - - scsi_command_complete_noio(r, ret); -- aio_context_release(blk_get_aio_context(s->conf.blk)); - } - - static int execute_command(BlockBackend *blk, -@@ -274,14 +270,12 @@ static void scsi_read_complete(void * opaque, int ret) - SCSIDevice *s = r->req.dev; - int len; - -- aio_context_acquire(blk_get_aio_context(s->conf.blk)); -- - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - - if (ret || r->req.io_canceled) { - scsi_command_complete_noio(r, ret); -- goto done; -+ return; - } - - len = r->io_header.dxfer_len - r->io_header.resid; -@@ -320,7 +314,7 @@ static void scsi_read_complete(void * opaque, int ret) - r->io_header.status != GOOD || - len == 0) { - scsi_command_complete_noio(r, 0); -- goto done; -+ return; - } - - /* Snoop READ CAPACITY output to set the blocksize. */ -@@ -356,9 +350,6 @@ static void scsi_read_complete(void * opaque, int ret) - req_complete: - scsi_req_data(&r->req, len); - scsi_req_unref(&r->req); -- --done: -- aio_context_release(blk_get_aio_context(s->conf.blk)); - } - - /* Read more data from scsi device into buffer. */ -@@ -391,14 +382,12 @@ static void scsi_write_complete(void * opaque, int ret) - - trace_scsi_generic_write_complete(ret); - -- aio_context_acquire(blk_get_aio_context(s->conf.blk)); -- - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - - if (ret || r->req.io_canceled) { - scsi_command_complete_noio(r, ret); -- goto done; -+ return; - } - - if (r->req.cmd.buf[0] == MODE_SELECT && r->req.cmd.buf[4] == 12 && -@@ -408,9 +397,6 @@ static void scsi_write_complete(void * opaque, int ret) - } - - scsi_command_complete_noio(r, ret); -- --done: -- aio_context_release(blk_get_aio_context(s->conf.blk)); - } - - /* Write data to a scsi device. Returns nonzero on failure. --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch b/SOURCES/kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch deleted file mode 100644 index 30f1c00..0000000 --- a/SOURCES/kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch +++ /dev/null @@ -1,307 +0,0 @@ -From 42dd1357310bd1a68d6cacaa53cd5b1d1b02880d Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 4 Dec 2023 11:42:56 -0500 -Subject: [PATCH 077/101] scsi: only access SCSIDevice->requests from one - thread - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [8/26] 9df662e82a63e93d184b5763bebbe7e43bc9dabe (kmwolf/centos-qemu-kvm) - -Stop depending on the AioContext lock and instead access -SCSIDevice->requests from only one thread at a time: -- When the VM is running only the BlockBackend's AioContext may access - the requests list. -- When the VM is stopped only the main loop may access the requests - list. - -These constraints protect the requests list without the need for locking -in the I/O code path. - -Note that multiple IOThreads are not supported yet because the code -assumes all SCSIRequests are executed from a single AioContext. Leave -that as future work. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Message-ID: <20231204164259.1515217-2-stefanha@redhat.com> -Signed-off-by: Kevin Wolf ---- - hw/scsi/scsi-bus.c | 181 ++++++++++++++++++++++++++++------------- - include/hw/scsi/scsi.h | 7 +- - 2 files changed, 131 insertions(+), 57 deletions(-) - -diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index fc4b77fdb0..b649cdf555 100644 ---- a/hw/scsi/scsi-bus.c -+++ b/hw/scsi/scsi-bus.c -@@ -85,6 +85,89 @@ SCSIDevice *scsi_device_get(SCSIBus *bus, int channel, int id, int lun) - return d; - } - -+/* -+ * Invoke @fn() for each enqueued request in device @s. Must be called from the -+ * main loop thread while the guest is stopped. This is only suitable for -+ * vmstate ->put(), use scsi_device_for_each_req_async() for other cases. -+ */ -+static void scsi_device_for_each_req_sync(SCSIDevice *s, -+ void (*fn)(SCSIRequest *, void *), -+ void *opaque) -+{ -+ SCSIRequest *req; -+ SCSIRequest *next_req; -+ -+ assert(!runstate_is_running()); -+ assert(qemu_in_main_thread()); -+ -+ QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) { -+ fn(req, opaque); -+ } -+} -+ -+typedef struct { -+ SCSIDevice *s; -+ void (*fn)(SCSIRequest *, void *); -+ void *fn_opaque; -+} SCSIDeviceForEachReqAsyncData; -+ -+static void scsi_device_for_each_req_async_bh(void *opaque) -+{ -+ g_autofree SCSIDeviceForEachReqAsyncData *data = opaque; -+ SCSIDevice *s = data->s; -+ AioContext *ctx; -+ SCSIRequest *req; -+ SCSIRequest *next; -+ -+ /* -+ * If the AioContext changed before this BH was called then reschedule into -+ * the new AioContext before accessing ->requests. This can happen when -+ * scsi_device_for_each_req_async() is called and then the AioContext is -+ * changed before BHs are run. -+ */ -+ ctx = blk_get_aio_context(s->conf.blk); -+ if (ctx != qemu_get_current_aio_context()) { -+ aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, -+ g_steal_pointer(&data)); -+ return; -+ } -+ -+ QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { -+ data->fn(req, data->fn_opaque); -+ } -+ -+ /* Drop the reference taken by scsi_device_for_each_req_async() */ -+ object_unref(OBJECT(s)); -+} -+ -+/* -+ * Schedule @fn() to be invoked for each enqueued request in device @s. @fn() -+ * runs in the AioContext that is executing the request. -+ */ -+static void scsi_device_for_each_req_async(SCSIDevice *s, -+ void (*fn)(SCSIRequest *, void *), -+ void *opaque) -+{ -+ assert(qemu_in_main_thread()); -+ -+ SCSIDeviceForEachReqAsyncData *data = -+ g_new(SCSIDeviceForEachReqAsyncData, 1); -+ -+ data->s = s; -+ data->fn = fn; -+ data->fn_opaque = opaque; -+ -+ /* -+ * Hold a reference to the SCSIDevice until -+ * scsi_device_for_each_req_async_bh() finishes. -+ */ -+ object_ref(OBJECT(s)); -+ -+ aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk), -+ scsi_device_for_each_req_async_bh, -+ data); -+} -+ - static void scsi_device_realize(SCSIDevice *s, Error **errp) - { - SCSIDeviceClass *sc = SCSI_DEVICE_GET_CLASS(s); -@@ -144,20 +227,18 @@ void scsi_bus_init_named(SCSIBus *bus, size_t bus_size, DeviceState *host, - qbus_set_bus_hotplug_handler(BUS(bus)); - } - --static void scsi_dma_restart_bh(void *opaque) -+void scsi_req_retry(SCSIRequest *req) - { -- SCSIDevice *s = opaque; -- SCSIRequest *req, *next; -- -- qemu_bh_delete(s->bh); -- s->bh = NULL; -+ req->retry = true; -+} - -- aio_context_acquire(blk_get_aio_context(s->conf.blk)); -- QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { -- scsi_req_ref(req); -- if (req->retry) { -- req->retry = false; -- switch (req->cmd.mode) { -+/* Called in the AioContext that is executing the request */ -+static void scsi_dma_restart_req(SCSIRequest *req, void *opaque) -+{ -+ scsi_req_ref(req); -+ if (req->retry) { -+ req->retry = false; -+ switch (req->cmd.mode) { - case SCSI_XFER_FROM_DEV: - case SCSI_XFER_TO_DEV: - scsi_req_continue(req); -@@ -166,37 +247,22 @@ static void scsi_dma_restart_bh(void *opaque) - scsi_req_dequeue(req); - scsi_req_enqueue(req); - break; -- } - } -- scsi_req_unref(req); - } -- aio_context_release(blk_get_aio_context(s->conf.blk)); -- /* Drop the reference that was acquired in scsi_dma_restart_cb */ -- object_unref(OBJECT(s)); --} -- --void scsi_req_retry(SCSIRequest *req) --{ -- /* No need to save a reference, because scsi_dma_restart_bh just -- * looks at the request list. */ -- req->retry = true; -+ scsi_req_unref(req); - } - - static void scsi_dma_restart_cb(void *opaque, bool running, RunState state) - { - SCSIDevice *s = opaque; - -+ assert(qemu_in_main_thread()); -+ - if (!running) { - return; - } -- if (!s->bh) { -- AioContext *ctx = blk_get_aio_context(s->conf.blk); -- /* The reference is dropped in scsi_dma_restart_bh.*/ -- object_ref(OBJECT(s)); -- s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s, -- &DEVICE(s)->mem_reentrancy_guard); -- qemu_bh_schedule(s->bh); -- } -+ -+ scsi_device_for_each_req_async(s, scsi_dma_restart_req, NULL); - } - - static bool scsi_bus_is_address_free(SCSIBus *bus, -@@ -1657,15 +1723,16 @@ void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense) - } - } - -+static void scsi_device_purge_one_req(SCSIRequest *req, void *opaque) -+{ -+ scsi_req_cancel_async(req, NULL); -+} -+ - void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) - { -- SCSIRequest *req; -+ scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL); - - aio_context_acquire(blk_get_aio_context(sdev->conf.blk)); -- while (!QTAILQ_EMPTY(&sdev->requests)) { -- req = QTAILQ_FIRST(&sdev->requests); -- scsi_req_cancel_async(req, NULL); -- } - blk_drain(sdev->conf.blk); - aio_context_release(blk_get_aio_context(sdev->conf.blk)); - scsi_device_set_ua(sdev, sense); -@@ -1737,31 +1804,33 @@ static char *scsibus_get_fw_dev_path(DeviceState *dev) - - /* SCSI request list. For simplicity, pv points to the whole device */ - -+static void put_scsi_req(SCSIRequest *req, void *opaque) -+{ -+ QEMUFile *f = opaque; -+ -+ assert(!req->io_canceled); -+ assert(req->status == -1 && req->host_status == -1); -+ assert(req->enqueued); -+ -+ qemu_put_sbyte(f, req->retry ? 1 : 2); -+ qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf)); -+ qemu_put_be32s(f, &req->tag); -+ qemu_put_be32s(f, &req->lun); -+ if (req->bus->info->save_request) { -+ req->bus->info->save_request(f, req); -+ } -+ if (req->ops->save_request) { -+ req->ops->save_request(f, req); -+ } -+} -+ - static int put_scsi_requests(QEMUFile *f, void *pv, size_t size, - const VMStateField *field, JSONWriter *vmdesc) - { - SCSIDevice *s = pv; -- SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, s->qdev.parent_bus); -- SCSIRequest *req; - -- QTAILQ_FOREACH(req, &s->requests, next) { -- assert(!req->io_canceled); -- assert(req->status == -1 && req->host_status == -1); -- assert(req->enqueued); -- -- qemu_put_sbyte(f, req->retry ? 1 : 2); -- qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf)); -- qemu_put_be32s(f, &req->tag); -- qemu_put_be32s(f, &req->lun); -- if (bus->info->save_request) { -- bus->info->save_request(f, req); -- } -- if (req->ops->save_request) { -- req->ops->save_request(f, req); -- } -- } -+ scsi_device_for_each_req_sync(s, put_scsi_req, f); - qemu_put_sbyte(f, 0); -- - return 0; - } - -diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h -index 3692ca82f3..10c4e8288d 100644 ---- a/include/hw/scsi/scsi.h -+++ b/include/hw/scsi/scsi.h -@@ -69,14 +69,19 @@ struct SCSIDevice - { - DeviceState qdev; - VMChangeStateEntry *vmsentry; -- QEMUBH *bh; - uint32_t id; - BlockConf conf; - SCSISense unit_attention; - bool sense_is_ua; - uint8_t sense[SCSI_SENSE_BUF_SIZE]; - uint32_t sense_len; -+ -+ /* -+ * The requests list is only accessed from the AioContext that executes -+ * requests or from the main loop when IOThread processing is stopped. -+ */ - QTAILQ_HEAD(, SCSIRequest) requests; -+ - uint32_t channel; - uint32_t lun; - int blocksize; --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-remove-AioContext-locking.patch b/SOURCES/kvm-scsi-remove-AioContext-locking.patch deleted file mode 100644 index 34a5e46..0000000 --- a/SOURCES/kvm-scsi-remove-AioContext-locking.patch +++ /dev/null @@ -1,280 +0,0 @@ -From 61d605433a5edfcc7fe836fd399106ed1e1907bb Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:05 -0500 -Subject: [PATCH 088/101] scsi: remove AioContext locking - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [19/26] 12a8e26670074a17dd2b0cfac06e0aea03b3068f (kmwolf/centos-qemu-kvm) - -The AioContext lock no longer has any effect. Remove it. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Message-ID: <20231205182011.1976568-9-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - hw/scsi/scsi-bus.c | 2 -- - hw/scsi/scsi-disk.c | 31 +++++-------------------------- - hw/scsi/virtio-scsi.c | 18 ------------------ - include/hw/virtio/virtio-scsi.h | 14 -------------- - 4 files changed, 5 insertions(+), 60 deletions(-) - -diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index b649cdf555..5b08cbf60a 100644 ---- a/hw/scsi/scsi-bus.c -+++ b/hw/scsi/scsi-bus.c -@@ -1732,9 +1732,7 @@ void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) - { - scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL); - -- aio_context_acquire(blk_get_aio_context(sdev->conf.blk)); - blk_drain(sdev->conf.blk); -- aio_context_release(blk_get_aio_context(sdev->conf.blk)); - scsi_device_set_ua(sdev, sense); - } - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index a5048e0aaf..61be3d395a 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -2339,14 +2339,10 @@ static void scsi_disk_reset(DeviceState *dev) - { - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev.qdev, dev); - uint64_t nb_sectors; -- AioContext *ctx; - - scsi_device_purge_requests(&s->qdev, SENSE_CODE(RESET)); - -- ctx = blk_get_aio_context(s->qdev.conf.blk); -- aio_context_acquire(ctx); - blk_get_geometry(s->qdev.conf.blk, &nb_sectors); -- aio_context_release(ctx); - - nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE; - if (nb_sectors) { -@@ -2545,15 +2541,13 @@ static void scsi_unrealize(SCSIDevice *dev) - static void scsi_hd_realize(SCSIDevice *dev, Error **errp) - { - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); -- AioContext *ctx = NULL; -+ - /* can happen for devices without drive. The error message for missing - * backend will be issued in scsi_realize - */ - if (s->qdev.conf.blk) { -- ctx = blk_get_aio_context(s->qdev.conf.blk); -- aio_context_acquire(ctx); - if (!blkconf_blocksizes(&s->qdev.conf, errp)) { -- goto out; -+ return; - } - } - s->qdev.blocksize = s->qdev.conf.logical_block_size; -@@ -2562,16 +2556,11 @@ static void scsi_hd_realize(SCSIDevice *dev, Error **errp) - s->product = g_strdup("QEMU HARDDISK"); - } - scsi_realize(&s->qdev, errp); --out: -- if (ctx) { -- aio_context_release(ctx); -- } - } - - static void scsi_cd_realize(SCSIDevice *dev, Error **errp) - { - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); -- AioContext *ctx; - int ret; - uint32_t blocksize = 2048; - -@@ -2587,8 +2576,6 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) - blocksize = dev->conf.physical_block_size; - } - -- ctx = blk_get_aio_context(dev->conf.blk); -- aio_context_acquire(ctx); - s->qdev.blocksize = blocksize; - s->qdev.type = TYPE_ROM; - s->features |= 1 << SCSI_DISK_F_REMOVABLE; -@@ -2596,7 +2583,6 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) - s->product = g_strdup("QEMU CD-ROM"); - } - scsi_realize(&s->qdev, errp); -- aio_context_release(ctx); - } - - -@@ -2727,7 +2713,6 @@ static int get_device_type(SCSIDiskState *s) - static void scsi_block_realize(SCSIDevice *dev, Error **errp) - { - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); -- AioContext *ctx; - int sg_version; - int rc; - -@@ -2742,9 +2727,6 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) - "be removed in a future version"); - } - -- ctx = blk_get_aio_context(s->qdev.conf.blk); -- aio_context_acquire(ctx); -- - /* check we are using a driver managing SG_IO (version 3 and after) */ - rc = blk_ioctl(s->qdev.conf.blk, SG_GET_VERSION_NUM, &sg_version); - if (rc < 0) { -@@ -2752,18 +2734,18 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) - if (rc != -EPERM) { - error_append_hint(errp, "Is this a SCSI device?\n"); - } -- goto out; -+ return; - } - if (sg_version < 30000) { - error_setg(errp, "scsi generic interface too old"); -- goto out; -+ return; - } - - /* get device type from INQUIRY data */ - rc = get_device_type(s); - if (rc < 0) { - error_setg(errp, "INQUIRY failed"); -- goto out; -+ return; - } - - /* Make a guess for the block size, we'll fix it when the guest sends. -@@ -2783,9 +2765,6 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) - - scsi_realize(&s->qdev, errp); - scsi_generic_read_device_inquiry(&s->qdev); -- --out: -- aio_context_release(ctx); - } - - typedef struct SCSIBlockReq { -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 4f8d35facc..ca365a70e9 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -642,9 +642,7 @@ static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) - return; - } - -- virtio_scsi_acquire(s); - virtio_scsi_handle_ctrl_vq(s, vq); -- virtio_scsi_release(s); - } - - static void virtio_scsi_complete_cmd_req(VirtIOSCSIReq *req) -@@ -882,9 +880,7 @@ static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) - return; - } - -- virtio_scsi_acquire(s); - virtio_scsi_handle_cmd_vq(s, vq); -- virtio_scsi_release(s); - } - - static void virtio_scsi_get_config(VirtIODevice *vdev, -@@ -1031,9 +1027,7 @@ static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq) - return; - } - -- virtio_scsi_acquire(s); - virtio_scsi_handle_event_vq(s, vq); -- virtio_scsi_release(s); - } - - static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense) -@@ -1052,9 +1046,7 @@ static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense) - }, - }; - -- virtio_scsi_acquire(s); - virtio_scsi_push_event(s, &info); -- virtio_scsi_release(s); - } - } - -@@ -1071,17 +1063,13 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, - VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev); - VirtIOSCSI *s = VIRTIO_SCSI(vdev); - SCSIDevice *sd = SCSI_DEVICE(dev); -- AioContext *old_context; - int ret; - - if (s->ctx && !s->dataplane_fenced) { - if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { - return; - } -- old_context = blk_get_aio_context(sd->conf.blk); -- aio_context_acquire(old_context); - ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp); -- aio_context_release(old_context); - if (ret < 0) { - return; - } -@@ -1097,10 +1085,8 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, - }, - }; - -- virtio_scsi_acquire(s); - virtio_scsi_push_event(s, &info); - scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED)); -- virtio_scsi_release(s); - } - } - -@@ -1122,17 +1108,13 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, - qdev_simple_device_unplug_cb(hotplug_dev, dev, errp); - - if (s->ctx) { -- virtio_scsi_acquire(s); - /* If other users keep the BlockBackend in the iothread, that's ok */ - blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL); -- virtio_scsi_release(s); - } - - if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) { -- virtio_scsi_acquire(s); - virtio_scsi_push_event(s, &info); - scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED)); -- virtio_scsi_release(s); - } - } - -diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h -index da8cb928d9..7f0573b1bf 100644 ---- a/include/hw/virtio/virtio-scsi.h -+++ b/include/hw/virtio/virtio-scsi.h -@@ -101,20 +101,6 @@ struct VirtIOSCSI { - uint32_t host_features; - }; - --static inline void virtio_scsi_acquire(VirtIOSCSI *s) --{ -- if (s->ctx) { -- aio_context_acquire(s->ctx); -- } --} -- --static inline void virtio_scsi_release(VirtIOSCSI *s) --{ -- if (s->ctx) { -- aio_context_release(s->ctx); -- } --} -- - void virtio_scsi_common_realize(DeviceState *dev, - VirtIOHandleOutput ctrl, - VirtIOHandleOutput evt, --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-remove-outdated-AioContext-lock-comment.patch b/SOURCES/kvm-scsi-remove-outdated-AioContext-lock-comment.patch deleted file mode 100644 index c9baf60..0000000 --- a/SOURCES/kvm-scsi-remove-outdated-AioContext-lock-comment.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 9f5c6dbe907fe6227006ab51179eaa50a63559cb Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:09 -0500 -Subject: [PATCH 092/101] scsi: remove outdated AioContext lock comment - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [23/26] 96e2e7d2e6a160ce4d695060f902d21030b3b1d8 (kmwolf/centos-qemu-kvm) - -The SCSI subsystem no longer uses the AioContext lock. Request -processing runs exclusively in the BlockBackend's AioContext since -"scsi: only access SCSIDevice->requests from one thread" and hence the -lock is unnecessary. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Message-ID: <20231205182011.1976568-13-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - hw/scsi/scsi-disk.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index 61be3d395a..2e7e1e9a1c 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -355,7 +355,6 @@ done: - scsi_req_unref(&r->req); - } - --/* Called with AioContext lock held */ - static void scsi_dma_complete(void *opaque, int ret) - { - SCSIDiskReq *r = (SCSIDiskReq *)opaque; --- -2.39.3 - diff --git a/SOURCES/kvm-smbios-add-smbios_add_usr_blob_size-helper.patch b/SOURCES/kvm-smbios-add-smbios_add_usr_blob_size-helper.patch deleted file mode 100644 index a9d24a0..0000000 --- a/SOURCES/kvm-smbios-add-smbios_add_usr_blob_size-helper.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 93cf5b82771f1d1e8182be168dae7a45d42069e9 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Mon, 4 Mar 2024 15:39:57 +0100 -Subject: [PATCH 11/20] smbios: add smbios_add_usr_blob_size() helper - -RH-Author: Igor Mammedov -RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS -RH-Jira: RHEL-21705 -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [9/18] 8c698fb9e186d2b1d2b7f75a74305f356450ad68 - -JIRA: https://issues.redhat.com/browse/RHEL-21705 - -it will be used by follow up patch when legacy handling -is moved out into a separate file. - -Signed-off-by: Igor Mammedov -Reviewed-by: Ani Sinha ---- - hw/smbios/smbios.c | 18 ++++++++++++++---- - 1 file changed, 14 insertions(+), 4 deletions(-) - -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 441517cf24..c48a290478 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -1426,6 +1426,14 @@ static bool save_opt_list(size_t *ndest, char ***dest, QemuOpts *opts, - return true; - } - -+static void smbios_add_usr_blob_size(size_t size) -+{ -+ if (!usr_blobs_sizes) { -+ usr_blobs_sizes = g_array_new(false, false, sizeof(size_t)); -+ } -+ g_array_append_val(usr_blobs_sizes, size); -+} -+ - void smbios_entry_add(QemuOpts *opts, Error **errp) - { - const char *val; -@@ -1473,10 +1481,12 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) - smbios_type4_count++; - } - -- if (!usr_blobs_sizes) { -- usr_blobs_sizes = g_array_new(false, false, sizeof(size_t)); -- } -- g_array_append_val(usr_blobs_sizes, size); -+ /* -+ * preserve blob size for legacy mode so it could build its -+ * blobs flavor from 'usr_blobs' -+ */ -+ smbios_add_usr_blob_size(size); -+ - usr_blobs_len += size; - if (size > usr_table_max) { - usr_table_max = size; --- -2.39.3 - diff --git a/SOURCES/kvm-smbios-avoid-mangling-user-provided-tables.patch b/SOURCES/kvm-smbios-avoid-mangling-user-provided-tables.patch deleted file mode 100644 index b8e4d92..0000000 --- a/SOURCES/kvm-smbios-avoid-mangling-user-provided-tables.patch +++ /dev/null @@ -1,309 +0,0 @@ -From 15d293b706ca6c9e6ad569becda8da5f70461c30 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Tue, 13 Feb 2024 09:25:11 +0100 -Subject: [PATCH 09/20] smbios: avoid mangling user provided tables - -RH-Author: Igor Mammedov -RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS -RH-Jira: RHEL-21705 -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [7/18] fa2c7372f55f29e5834eee94ba98f19ea02e7a82 - -JIRA: https://issues.redhat.com/browse/RHEL-21705 - - currently smbios_entry_add() preserves internally '-smbios type=' - options but tables provided with '-smbios file=' are stored directly - into blob that eventually will be exposed to VM. And then later - QEMU adds default/'-smbios type' entries on top into the same blob. - - It makes impossible to generate tables more than once, hence - 'immutable' guard was used. - Make it possible to regenerate final blob by storing user provided - blobs into a dedicated area (usr_blobs) and then copy it when - composing final blob. Which also makes handling of -smbios - options consistent. - - As side effect of this and previous commits there is no need to - generate legacy smbios_entries at the time options are parsed. - Instead compose smbios_entries on demand from usr_blobs like - it is done for non-legacy SMBIOS tables. - - Signed-off-by: Igor Mammedov - Tested-by: Fiona Ebner - Reviewed-by: Ani Sinha - -Conflicts: hw/smbios/smbios.c - caused by downstream smbios_type2_required - -Signed-off-by: Igor Mammedov ---- - hw/smbios/smbios.c | 181 +++++++++++++++++++++++---------------------- - 1 file changed, 93 insertions(+), 88 deletions(-) - -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 0c8c439859..d8d68716d4 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -60,6 +60,14 @@ static bool smbios_uuid_encoded = true; - /* Set to true for modern Windows 10 HardwareID-6 compat */ - static bool smbios_type2_required; - -+/* -+ * SMBIOS tables provided by user with '-smbios file=' option -+ */ -+uint8_t *usr_blobs; -+size_t usr_blobs_len; -+static GArray *usr_blobs_sizes; -+static unsigned usr_table_max; -+static unsigned usr_table_cnt; - - uint8_t *smbios_tables; - size_t smbios_tables_len; -@@ -70,7 +78,6 @@ static SmbiosEntryPointType smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; - static SmbiosEntryPoint ep; - - static int smbios_type4_count = 0; --static bool smbios_immutable; - static bool smbios_have_defaults; - static uint32_t smbios_cpuid_version, smbios_cpuid_features; - -@@ -617,9 +624,8 @@ static void smbios_build_type_1_fields(void) - - uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) - { -- /* drop unwanted version of command-line file blob(s) */ -- g_free(smbios_tables); -- smbios_tables = NULL; -+ int i; -+ size_t usr_offset; - - /* also complain if fields were given for types > 1 */ - if (find_next_bit(have_fields_bitmap, -@@ -629,12 +635,33 @@ uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) - exit(1); - } - -- if (!smbios_immutable) { -- smbios_build_type_0_fields(); -- smbios_build_type_1_fields(); -- smbios_validate_table(expected_t4_count); -- smbios_immutable = true; -+ g_free(smbios_entries); -+ smbios_entries_len = sizeof(uint16_t); -+ smbios_entries = g_malloc0(smbios_entries_len); -+ -+ for (i = 0, usr_offset = 0; usr_blobs_sizes && i < usr_blobs_sizes->len; -+ i++) -+ { -+ struct smbios_table *table; -+ struct smbios_structure_header *header; -+ size_t size = g_array_index(usr_blobs_sizes, size_t, i); -+ -+ header = (struct smbios_structure_header *)(usr_blobs + usr_offset); -+ smbios_entries = g_realloc(smbios_entries, smbios_entries_len + -+ size + sizeof(*table)); -+ table = (struct smbios_table *)(smbios_entries + smbios_entries_len); -+ table->header.type = SMBIOS_TABLE_ENTRY; -+ table->header.length = cpu_to_le16(sizeof(*table) + size); -+ memcpy(table->data, header, size); -+ smbios_entries_len += sizeof(*table) + size; -+ (*(uint16_t *)smbios_entries) = -+ cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); -+ usr_offset += size; - } -+ -+ smbios_build_type_0_fields(); -+ smbios_build_type_1_fields(); -+ smbios_validate_table(expected_t4_count); - *length = smbios_entries_len; - return smbios_entries; - } -@@ -1232,68 +1259,68 @@ void smbios_get_tables(MachineState *ms, - { - unsigned i, dimm_cnt, offset; - -- /* drop unwanted (legacy) version of command-line file blob(s) */ -- g_free(smbios_entries); -- smbios_entries = NULL; -+ g_free(smbios_tables); -+ smbios_tables = g_memdup2(usr_blobs, usr_blobs_len); -+ smbios_tables_len = usr_blobs_len; -+ smbios_table_max = usr_table_max; -+ smbios_table_cnt = usr_table_cnt; - -- if (!smbios_immutable) { -- smbios_build_type_0_table(); -- smbios_build_type_1_table(); -- smbios_build_type_2_table(); -- smbios_build_type_3_table(); -+ smbios_build_type_0_table(); -+ smbios_build_type_1_table(); -+ smbios_build_type_2_table(); -+ smbios_build_type_3_table(); - -- assert(ms->smp.sockets >= 1); -+ assert(ms->smp.sockets >= 1); - -- for (i = 0; i < ms->smp.sockets; i++) { -- smbios_build_type_4_table(ms, i); -- } -+ for (i = 0; i < ms->smp.sockets; i++) { -+ smbios_build_type_4_table(ms, i); -+ } - -- smbios_build_type_8_table(); -- smbios_build_type_9_table(errp); -- smbios_build_type_11_table(); -+ smbios_build_type_8_table(); -+ smbios_build_type_9_table(errp); -+ smbios_build_type_11_table(); - - #define MAX_DIMM_SZ (16 * GiB) - #define GET_DIMM_SZ ((i < dimm_cnt - 1) ? MAX_DIMM_SZ \ - : ((current_machine->ram_size - 1) % MAX_DIMM_SZ) + 1) - -- dimm_cnt = QEMU_ALIGN_UP(current_machine->ram_size, MAX_DIMM_SZ) / MAX_DIMM_SZ; -+ dimm_cnt = QEMU_ALIGN_UP(current_machine->ram_size, MAX_DIMM_SZ) / -+ MAX_DIMM_SZ; - -- /* -- * The offset determines if we need to keep additional space between -- * table 17 and table 19 header handle numbers so that they do -- * not overlap. For example, for a VM with larger than 8 TB guest -- * memory and DIMM like chunks of 16 GiB, the default space between -- * the two tables (T19_BASE - T17_BASE = 512) is not enough. -- */ -- offset = (dimm_cnt > (T19_BASE - T17_BASE)) ? \ -- dimm_cnt - (T19_BASE - T17_BASE) : 0; -+ /* -+ * The offset determines if we need to keep additional space between -+ * table 17 and table 19 header handle numbers so that they do -+ * not overlap. For example, for a VM with larger than 8 TB guest -+ * memory and DIMM like chunks of 16 GiB, the default space between -+ * the two tables (T19_BASE - T17_BASE = 512) is not enough. -+ */ -+ offset = (dimm_cnt > (T19_BASE - T17_BASE)) ? \ -+ dimm_cnt - (T19_BASE - T17_BASE) : 0; - -- smbios_build_type_16_table(dimm_cnt); -+ smbios_build_type_16_table(dimm_cnt); - -- for (i = 0; i < dimm_cnt; i++) { -- smbios_build_type_17_table(i, GET_DIMM_SZ); -- } -+ for (i = 0; i < dimm_cnt; i++) { -+ smbios_build_type_17_table(i, GET_DIMM_SZ); -+ } - -- for (i = 0; i < mem_array_size; i++) { -- smbios_build_type_19_table(i, offset, mem_array[i].address, -- mem_array[i].length); -- } -+ for (i = 0; i < mem_array_size; i++) { -+ smbios_build_type_19_table(i, offset, mem_array[i].address, -+ mem_array[i].length); -+ } - -- /* -- * make sure 16 bit handle numbers in the headers of tables 19 -- * and 32 do not overlap. -- */ -- assert((mem_array_size + offset) < (T32_BASE - T19_BASE)); -+ /* -+ * make sure 16 bit handle numbers in the headers of tables 19 -+ * and 32 do not overlap. -+ */ -+ assert((mem_array_size + offset) < (T32_BASE - T19_BASE)); - -- smbios_build_type_32_table(); -- smbios_build_type_38_table(); -- smbios_build_type_41_table(errp); -- smbios_build_type_127_table(); -+ smbios_build_type_32_table(); -+ smbios_build_type_38_table(); -+ smbios_build_type_41_table(errp); -+ smbios_build_type_127_table(); - -- smbios_validate_table(ms->smp.sockets); -- smbios_entry_point_setup(); -- smbios_immutable = true; -- } -+ smbios_validate_table(ms->smp.sockets); -+ smbios_entry_point_setup(); - - /* return tables blob and entry point (anchor), and their sizes */ - *tables = smbios_tables; -@@ -1393,13 +1420,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) - { - const char *val; - -- assert(!smbios_immutable); -- - val = qemu_opt_get(opts, "file"); - if (val) { - struct smbios_structure_header *header; -- int size; -- struct smbios_table *table; /* legacy mode only */ -+ size_t size; - - if (!qemu_opts_validate(opts, qemu_smbios_file_opts, errp)) { - return; -@@ -1416,9 +1440,9 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) - * (except in legacy mode, where the second '\0' is implicit and - * will be inserted by the BIOS). - */ -- smbios_tables = g_realloc(smbios_tables, smbios_tables_len + size); -- header = (struct smbios_structure_header *)(smbios_tables + -- smbios_tables_len); -+ usr_blobs = g_realloc(usr_blobs, usr_blobs_len + size); -+ header = (struct smbios_structure_header *)(usr_blobs + -+ usr_blobs_len); - - if (load_image_size(val, (uint8_t *)header, size) != size) { - error_setg(errp, "Failed to load SMBIOS file %s", val); -@@ -1439,34 +1463,15 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) - smbios_type4_count++; - } - -- smbios_tables_len += size; -- if (size > smbios_table_max) { -- smbios_table_max = size; -+ if (!usr_blobs_sizes) { -+ usr_blobs_sizes = g_array_new(false, false, sizeof(size_t)); - } -- smbios_table_cnt++; -- -- /* add a copy of the newly loaded blob to legacy smbios_entries */ -- /* NOTE: This code runs before smbios_set_defaults(), so we don't -- * yet know which mode (legacy vs. aggregate-table) will be -- * required. We therefore add the binary blob to both legacy -- * (smbios_entries) and aggregate (smbios_tables) tables, and -- * delete the one we don't need from smbios_set_defaults(), -- * once we know which machine version has been requested. -- */ -- if (!smbios_entries) { -- smbios_entries_len = sizeof(uint16_t); -- smbios_entries = g_malloc0(smbios_entries_len); -+ g_array_append_val(usr_blobs_sizes, size); -+ usr_blobs_len += size; -+ if (size > usr_table_max) { -+ usr_table_max = size; - } -- smbios_entries = g_realloc(smbios_entries, smbios_entries_len + -- size + sizeof(*table)); -- table = (struct smbios_table *)(smbios_entries + smbios_entries_len); -- table->header.type = SMBIOS_TABLE_ENTRY; -- table->header.length = cpu_to_le16(sizeof(*table) + size); -- memcpy(table->data, header, size); -- smbios_entries_len += sizeof(*table) + size; -- (*(uint16_t *)smbios_entries) = -- cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); -- /* end: add a copy of the newly loaded blob to legacy smbios_entries */ -+ usr_table_cnt++; - - return; - } --- -2.39.3 - diff --git a/SOURCES/kvm-smbios-build-legacy-mode-code-only-for-pc-machine.patch b/SOURCES/kvm-smbios-build-legacy-mode-code-only-for-pc-machine.patch deleted file mode 100644 index 1dc4d22..0000000 --- a/SOURCES/kvm-smbios-build-legacy-mode-code-only-for-pc-machine.patch +++ /dev/null @@ -1,517 +0,0 @@ -From 7ebb314a4f81d6d1a7dd4980b757fb5e556f5837 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Tue, 13 Feb 2024 16:45:18 +0100 -Subject: [PATCH 13/20] smbios: build legacy mode code only for 'pc' machine - -RH-Author: Igor Mammedov -RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS -RH-Jira: RHEL-21705 -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [11/18] 06e639be03e0d151fb9bcf5f728388edcb84219a - -JIRA: https://issues.redhat.com/browse/RHEL-21705 - - basically moving code around without functional change. - And exposing some symbols so that they could be shared - between smbbios.c and new smbios_legacy.c - - plus some meson magic to build smbios_legacy.c only - for 'pc' machine and otherwise replace it with stub - if not selected. - - Signed-off-by: Igor Mammedov - Reviewed-by: Ani Sinha - -Conflicts: hw/smbios/smbios.c - context change due to downstream smbios_type2_required - -Signed-off-by: Igor Mammedov ---- - hw/i386/Kconfig | 1 + - hw/smbios/Kconfig | 2 + - hw/smbios/meson.build | 5 + - hw/smbios/smbios.c | 163 +----------------------------- - hw/smbios/smbios_legacy.c | 179 +++++++++++++++++++++++++++++++++ - hw/smbios/smbios_legacy_stub.c | 15 +++ - include/hw/firmware/smbios.h | 5 + - 7 files changed, 208 insertions(+), 162 deletions(-) - create mode 100644 hw/smbios/smbios_legacy.c - create mode 100644 hw/smbios/smbios_legacy_stub.c - -diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig -index a1846be6f7..a6ee052f9a 100644 ---- a/hw/i386/Kconfig -+++ b/hw/i386/Kconfig -@@ -76,6 +76,7 @@ config I440FX - select PIIX - select DIMM - select SMBIOS -+ select SMBIOS_LEGACY - select FW_CFG_DMA - - config ISAPC -diff --git a/hw/smbios/Kconfig b/hw/smbios/Kconfig -index 553adf4bfc..8d989a2f1b 100644 ---- a/hw/smbios/Kconfig -+++ b/hw/smbios/Kconfig -@@ -1,2 +1,4 @@ - config SMBIOS - bool -+config SMBIOS_LEGACY -+ bool -diff --git a/hw/smbios/meson.build b/hw/smbios/meson.build -index 6eeae4b35c..fcac1d7490 100644 ---- a/hw/smbios/meson.build -+++ b/hw/smbios/meson.build -@@ -4,10 +4,15 @@ smbios_ss.add(when: 'CONFIG_IPMI', - if_true: files('smbios_type_38.c'), - if_false: files('smbios_type_38-stub.c')) - -+smbios_ss.add(when: 'CONFIG_SMBIOS_LEGACY', -+ if_true: files('smbios_legacy.c'), -+ if_false: files('smbios_legacy_stub.c')) -+ - system_ss.add_all(when: 'CONFIG_SMBIOS', if_true: smbios_ss) - system_ss.add(when: 'CONFIG_SMBIOS', if_false: files('smbios-stub.c')) - - system_ss.add(when: 'CONFIG_ALL', if_true: files( - 'smbios-stub.c', - 'smbios_type_38-stub.c', -+ 'smbios_legacy_stub.c', - )) -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index eb9927335d..e40204550e 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -31,31 +31,7 @@ - #include "hw/pci/pci_device.h" - #include "smbios_build.h" - --/* legacy structures and constants for <= 2.0 machines */ --struct smbios_header { -- uint16_t length; -- uint8_t type; --} QEMU_PACKED; -- --struct smbios_field { -- struct smbios_header header; -- uint8_t type; -- uint16_t offset; -- uint8_t data[]; --} QEMU_PACKED; -- --struct smbios_table { -- struct smbios_header header; -- uint8_t data[]; --} QEMU_PACKED; -- --#define SMBIOS_FIELD_ENTRY 0 --#define SMBIOS_TABLE_ENTRY 1 -- --static uint8_t *smbios_entries; --static size_t smbios_entries_len; - static bool smbios_uuid_encoded = true; --/* end: legacy structures & constants for <= 2.0 machines */ - - /* Set to true for modern Windows 10 HardwareID-6 compat */ - static bool smbios_type2_required; -@@ -65,7 +41,6 @@ static bool smbios_type2_required; - */ - uint8_t *usr_blobs; - size_t usr_blobs_len; --static GArray *usr_blobs_sizes; - static unsigned usr_table_max; - static unsigned usr_table_cnt; - -@@ -531,7 +506,7 @@ static void smbios_check_type4_count(uint32_t expected_t4_count) - } - } - --static void smbios_validate_table(void) -+void smbios_validate_table(void) - { - if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_32 && - smbios_tables_len > SMBIOS_21_MAX_TABLES_LEN) { -@@ -541,134 +516,6 @@ static void smbios_validate_table(void) - } - } - -- --/* legacy setup functions for <= 2.0 machines */ --static void smbios_add_field(int type, int offset, const void *data, size_t len) --{ -- struct smbios_field *field; -- -- if (!smbios_entries) { -- smbios_entries_len = sizeof(uint16_t); -- smbios_entries = g_malloc0(smbios_entries_len); -- } -- smbios_entries = g_realloc(smbios_entries, smbios_entries_len + -- sizeof(*field) + len); -- field = (struct smbios_field *)(smbios_entries + smbios_entries_len); -- field->header.type = SMBIOS_FIELD_ENTRY; -- field->header.length = cpu_to_le16(sizeof(*field) + len); -- -- field->type = type; -- field->offset = cpu_to_le16(offset); -- memcpy(field->data, data, len); -- -- smbios_entries_len += sizeof(*field) + len; -- (*(uint16_t *)smbios_entries) = -- cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); --} -- --static void smbios_maybe_add_str(int type, int offset, const char *data) --{ -- if (data) { -- smbios_add_field(type, offset, data, strlen(data) + 1); -- } --} -- --static void smbios_build_type_0_fields(void) --{ -- smbios_maybe_add_str(0, offsetof(struct smbios_type_0, vendor_str), -- smbios_type0.vendor); -- smbios_maybe_add_str(0, offsetof(struct smbios_type_0, bios_version_str), -- smbios_type0.version); -- smbios_maybe_add_str(0, offsetof(struct smbios_type_0, -- bios_release_date_str), -- smbios_type0.date); -- if (smbios_type0.have_major_minor) { -- smbios_add_field(0, offsetof(struct smbios_type_0, -- system_bios_major_release), -- &smbios_type0.major, 1); -- smbios_add_field(0, offsetof(struct smbios_type_0, -- system_bios_minor_release), -- &smbios_type0.minor, 1); -- } --} -- --static void smbios_build_type_1_fields(void) --{ -- smbios_maybe_add_str(1, offsetof(struct smbios_type_1, manufacturer_str), -- smbios_type1.manufacturer); -- smbios_maybe_add_str(1, offsetof(struct smbios_type_1, product_name_str), -- smbios_type1.product); -- smbios_maybe_add_str(1, offsetof(struct smbios_type_1, version_str), -- smbios_type1.version); -- smbios_maybe_add_str(1, offsetof(struct smbios_type_1, serial_number_str), -- smbios_type1.serial); -- smbios_maybe_add_str(1, offsetof(struct smbios_type_1, sku_number_str), -- smbios_type1.sku); -- smbios_maybe_add_str(1, offsetof(struct smbios_type_1, family_str), -- smbios_type1.family); -- if (qemu_uuid_set) { -- /* We don't encode the UUID in the "wire format" here because this -- * function is for legacy mode and needs to keep the guest ABI, and -- * because we don't know what's the SMBIOS version advertised by the -- * BIOS. -- */ -- smbios_add_field(1, offsetof(struct smbios_type_1, uuid), -- &qemu_uuid, 16); -- } --} -- --uint8_t *smbios_get_table_legacy(size_t *length) --{ -- int i; -- size_t usr_offset; -- -- /* also complain if fields were given for types > 1 */ -- if (find_next_bit(smbios_have_fields_bitmap, -- SMBIOS_MAX_TYPE + 1, 2) < SMBIOS_MAX_TYPE + 1) { -- error_report("can't process fields for smbios " -- "types > 1 on machine versions < 2.1!"); -- exit(1); -- } -- -- if (test_bit(4, smbios_have_binfile_bitmap)) { -- error_report("can't process table for smbios " -- "type 4 on machine versions < 2.1!"); -- exit(1); -- } -- -- g_free(smbios_entries); -- smbios_entries_len = sizeof(uint16_t); -- smbios_entries = g_malloc0(smbios_entries_len); -- -- for (i = 0, usr_offset = 0; usr_blobs_sizes && i < usr_blobs_sizes->len; -- i++) -- { -- struct smbios_table *table; -- struct smbios_structure_header *header; -- size_t size = g_array_index(usr_blobs_sizes, size_t, i); -- -- header = (struct smbios_structure_header *)(usr_blobs + usr_offset); -- smbios_entries = g_realloc(smbios_entries, smbios_entries_len + -- size + sizeof(*table)); -- table = (struct smbios_table *)(smbios_entries + smbios_entries_len); -- table->header.type = SMBIOS_TABLE_ENTRY; -- table->header.length = cpu_to_le16(sizeof(*table) + size); -- memcpy(table->data, header, size); -- smbios_entries_len += sizeof(*table) + size; -- (*(uint16_t *)smbios_entries) = -- cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); -- usr_offset += size; -- } -- -- smbios_build_type_0_fields(); -- smbios_build_type_1_fields(); -- smbios_validate_table(); -- *length = smbios_entries_len; -- return smbios_entries; --} --/* end: legacy setup functions for <= 2.0 machines */ -- -- - bool smbios_skip_table(uint8_t type, bool required_table) - { - if (test_bit(type, smbios_have_binfile_bitmap)) { -@@ -1418,14 +1265,6 @@ static bool save_opt_list(size_t *ndest, char ***dest, QemuOpts *opts, - return true; - } - --static void smbios_add_usr_blob_size(size_t size) --{ -- if (!usr_blobs_sizes) { -- usr_blobs_sizes = g_array_new(false, false, sizeof(size_t)); -- } -- g_array_append_val(usr_blobs_sizes, size); --} -- - void smbios_entry_add(QemuOpts *opts, Error **errp) - { - const char *val; -diff --git a/hw/smbios/smbios_legacy.c b/hw/smbios/smbios_legacy.c -new file mode 100644 -index 0000000000..21f143e738 ---- /dev/null -+++ b/hw/smbios/smbios_legacy.c -@@ -0,0 +1,179 @@ -+/* -+ * SMBIOS legacy support -+ * -+ * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. -+ * Copyright (C) 2013 Red Hat, Inc. -+ * -+ * Authors: -+ * Alex Williamson -+ * Markus Armbruster -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2. See -+ * the COPYING file in the top-level directory. -+ * -+ * Contributions after 2012-01-13 are licensed under the terms of the -+ * GNU GPL, version 2 or (at your option) any later version. -+ */ -+ -+#include "qemu/osdep.h" -+#include "qemu/bswap.h" -+#include "hw/firmware/smbios.h" -+#include "sysemu/sysemu.h" -+#include "qemu/error-report.h" -+ -+struct smbios_header { -+ uint16_t length; -+ uint8_t type; -+} QEMU_PACKED; -+ -+struct smbios_field { -+ struct smbios_header header; -+ uint8_t type; -+ uint16_t offset; -+ uint8_t data[]; -+} QEMU_PACKED; -+ -+struct smbios_table { -+ struct smbios_header header; -+ uint8_t data[]; -+} QEMU_PACKED; -+ -+#define SMBIOS_FIELD_ENTRY 0 -+#define SMBIOS_TABLE_ENTRY 1 -+ -+static uint8_t *smbios_entries; -+static size_t smbios_entries_len; -+GArray *usr_blobs_sizes; -+ -+void smbios_add_usr_blob_size(size_t size) -+{ -+ if (!usr_blobs_sizes) { -+ usr_blobs_sizes = g_array_new(false, false, sizeof(size_t)); -+ } -+ g_array_append_val(usr_blobs_sizes, size); -+} -+ -+static void smbios_add_field(int type, int offset, const void *data, size_t len) -+{ -+ struct smbios_field *field; -+ -+ if (!smbios_entries) { -+ smbios_entries_len = sizeof(uint16_t); -+ smbios_entries = g_malloc0(smbios_entries_len); -+ } -+ smbios_entries = g_realloc(smbios_entries, smbios_entries_len + -+ sizeof(*field) + len); -+ field = (struct smbios_field *)(smbios_entries + smbios_entries_len); -+ field->header.type = SMBIOS_FIELD_ENTRY; -+ field->header.length = cpu_to_le16(sizeof(*field) + len); -+ -+ field->type = type; -+ field->offset = cpu_to_le16(offset); -+ memcpy(field->data, data, len); -+ -+ smbios_entries_len += sizeof(*field) + len; -+ (*(uint16_t *)smbios_entries) = -+ cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); -+} -+ -+static void smbios_maybe_add_str(int type, int offset, const char *data) -+{ -+ if (data) { -+ smbios_add_field(type, offset, data, strlen(data) + 1); -+ } -+} -+ -+static void smbios_build_type_0_fields(void) -+{ -+ smbios_maybe_add_str(0, offsetof(struct smbios_type_0, vendor_str), -+ smbios_type0.vendor); -+ smbios_maybe_add_str(0, offsetof(struct smbios_type_0, bios_version_str), -+ smbios_type0.version); -+ smbios_maybe_add_str(0, offsetof(struct smbios_type_0, -+ bios_release_date_str), -+ smbios_type0.date); -+ if (smbios_type0.have_major_minor) { -+ smbios_add_field(0, offsetof(struct smbios_type_0, -+ system_bios_major_release), -+ &smbios_type0.major, 1); -+ smbios_add_field(0, offsetof(struct smbios_type_0, -+ system_bios_minor_release), -+ &smbios_type0.minor, 1); -+ } -+} -+ -+static void smbios_build_type_1_fields(void) -+{ -+ smbios_maybe_add_str(1, offsetof(struct smbios_type_1, manufacturer_str), -+ smbios_type1.manufacturer); -+ smbios_maybe_add_str(1, offsetof(struct smbios_type_1, product_name_str), -+ smbios_type1.product); -+ smbios_maybe_add_str(1, offsetof(struct smbios_type_1, version_str), -+ smbios_type1.version); -+ smbios_maybe_add_str(1, offsetof(struct smbios_type_1, serial_number_str), -+ smbios_type1.serial); -+ smbios_maybe_add_str(1, offsetof(struct smbios_type_1, sku_number_str), -+ smbios_type1.sku); -+ smbios_maybe_add_str(1, offsetof(struct smbios_type_1, family_str), -+ smbios_type1.family); -+ if (qemu_uuid_set) { -+ /* -+ * We don't encode the UUID in the "wire format" here because this -+ * function is for legacy mode and needs to keep the guest ABI, and -+ * because we don't know what's the SMBIOS version advertised by the -+ * BIOS. -+ */ -+ smbios_add_field(1, offsetof(struct smbios_type_1, uuid), -+ &qemu_uuid, 16); -+ } -+} -+ -+uint8_t *smbios_get_table_legacy(size_t *length) -+{ -+ int i; -+ size_t usr_offset; -+ -+ /* complain if fields were given for types > 1 */ -+ if (find_next_bit(smbios_have_fields_bitmap, -+ SMBIOS_MAX_TYPE + 1, 2) < SMBIOS_MAX_TYPE + 1) { -+ error_report("can't process fields for smbios " -+ "types > 1 on machine versions < 2.1!"); -+ exit(1); -+ } -+ -+ if (test_bit(4, smbios_have_binfile_bitmap)) { -+ error_report("can't process table for smbios " -+ "type 4 on machine versions < 2.1!"); -+ exit(1); -+ } -+ -+ g_free(smbios_entries); -+ smbios_entries_len = sizeof(uint16_t); -+ smbios_entries = g_malloc0(smbios_entries_len); -+ -+ for (i = 0, usr_offset = 0; usr_blobs_sizes && i < usr_blobs_sizes->len; -+ i++) -+ { -+ struct smbios_table *table; -+ struct smbios_structure_header *header; -+ size_t size = g_array_index(usr_blobs_sizes, size_t, i); -+ -+ header = (struct smbios_structure_header *)(usr_blobs + usr_offset); -+ smbios_entries = g_realloc(smbios_entries, smbios_entries_len + -+ size + sizeof(*table)); -+ table = (struct smbios_table *)(smbios_entries + smbios_entries_len); -+ table->header.type = SMBIOS_TABLE_ENTRY; -+ table->header.length = cpu_to_le16(sizeof(*table) + size); -+ memcpy(table->data, header, size); -+ smbios_entries_len += sizeof(*table) + size; -+ (*(uint16_t *)smbios_entries) = -+ cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); -+ usr_offset += size; -+ } -+ -+ smbios_build_type_0_fields(); -+ smbios_build_type_1_fields(); -+ smbios_validate_table(); -+ *length = smbios_entries_len; -+ return smbios_entries; -+} -diff --git a/hw/smbios/smbios_legacy_stub.c b/hw/smbios/smbios_legacy_stub.c -new file mode 100644 -index 0000000000..f29b15316c ---- /dev/null -+++ b/hw/smbios/smbios_legacy_stub.c -@@ -0,0 +1,15 @@ -+/* -+ * IPMI SMBIOS firmware handling -+ * -+ * Copyright (c) 2024 Igor Mammedov, Red Hat, Inc. -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+ -+#include "qemu/osdep.h" -+#include "hw/firmware/smbios.h" -+ -+void smbios_add_usr_blob_size(size_t size) -+{ -+} -diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 333de0d5fc..92e9aba415 100644 ---- a/include/hw/firmware/smbios.h -+++ b/include/hw/firmware/smbios.h -@@ -17,6 +17,9 @@ - * - */ - -+extern uint8_t *usr_blobs; -+extern GArray *usr_blobs_sizes; -+ - typedef struct { - const char *vendor, *version, *date; - bool have_major_minor, uefi; -@@ -323,6 +326,8 @@ struct smbios_type_127 { - struct smbios_structure_header header; - } QEMU_PACKED; - -+void smbios_validate_table(void); -+void smbios_add_usr_blob_size(size_t size); - void smbios_entry_add(QemuOpts *opts, Error **errp); - void smbios_set_cpuid(uint32_t version, uint32_t features); - void smbios_set_defaults(const char *manufacturer, const char *product, --- -2.39.3 - diff --git a/SOURCES/kvm-smbios-cleanup-smbios_get_tables-from-legacy-handlin.patch b/SOURCES/kvm-smbios-cleanup-smbios_get_tables-from-legacy-handlin.patch deleted file mode 100644 index 48a9e16..0000000 --- a/SOURCES/kvm-smbios-cleanup-smbios_get_tables-from-legacy-handlin.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 07f6ef2d032cda3e746ac2477c0a9bc1ac636f45 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Fri, 29 Dec 2023 14:08:13 +0100 -Subject: [PATCH 06/20] smbios: cleanup smbios_get_tables() from legacy - handling - -RH-Author: Igor Mammedov -RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS -RH-Jira: RHEL-21705 -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [4/18] a5e09ce1df72293fecad863edd146a8c4b1a734f - -JIRA: https://issues.redhat.com/browse/RHEL-21705 - -smbios_get_tables() bails out right away if leagacy mode is enabled -and won't generate any SMBIOS tables. At the same time x86 specific -fw_cfg_build_smbios() will genarate legacy tables and then proceed -to preparing temporary mem_array for useless call to -smbios_get_tables() and then discard it. - -Drop legacy related check in smbios_get_tables() and return from -fw_cfg_build_smbios() early if legacy tables where built without -proceeding to non legacy part of the function. - -Signed-off-by: Igor Mammedov -Reviewed-by: Ani Sinha -Tested-by: Fiona Ebner ---- - hw/i386/fw_cfg.c | 1 + - hw/smbios/smbios.c | 6 ------ - 2 files changed, 1 insertion(+), 6 deletions(-) - -diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c -index 6a5466faf0..ed72b1442d 100644 ---- a/hw/i386/fw_cfg.c -+++ b/hw/i386/fw_cfg.c -@@ -76,6 +76,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) - if (smbios_tables) { - fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, - smbios_tables, smbios_tables_len); -+ return; - } - - /* build the array of physical mem area from e820 table */ -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 074705fa4c..b13e40bae2 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -1244,12 +1244,6 @@ void smbios_get_tables(MachineState *ms, - { - unsigned i, dimm_cnt, offset; - -- if (smbios_legacy) { -- *tables = *anchor = NULL; -- *tables_len = *anchor_len = 0; -- return; -- } -- - if (!smbios_immutable) { - smbios_build_type_0_table(); - smbios_build_type_1_table(); --- -2.39.3 - diff --git a/SOURCES/kvm-smbios-clear-smbios_type4_count-before-building-tabl.patch b/SOURCES/kvm-smbios-clear-smbios_type4_count-before-building-tabl.patch deleted file mode 100644 index 38a7f95..0000000 --- a/SOURCES/kvm-smbios-clear-smbios_type4_count-before-building-tabl.patch +++ /dev/null @@ -1,38 +0,0 @@ -From c0282a842a912aacac28a6ae229de5854c3fb5df Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Mon, 26 Feb 2024 13:20:22 +0100 -Subject: [PATCH 16/20] smbios: clear smbios_type4_count before building tables - -RH-Author: Igor Mammedov -RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS -RH-Jira: RHEL-21705 -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [14/18] f809595d2934ae975c0b7d17a4a79645e062ba42 - -JIRA: https://issues.redhat.com/browse/RHEL-21705 - -it will help to keep type 4 tables accounting correct in case -SMBIOS tables are built multiple times. - -Signed-off-by: Igor Mammedov -Tested-by: Fiona Ebner ---- - hw/smbios/smbios.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 7e32430b85..4521ea386c 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -1111,6 +1111,7 @@ void smbios_get_tables(MachineState *ms, - ep_type == SMBIOS_ENTRY_POINT_TYPE_64); - - g_free(smbios_tables); -+ smbios_type4_count = 0; - smbios_tables = g_memdup2(usr_blobs, usr_blobs_len); - smbios_tables_len = usr_blobs_len; - smbios_table_max = usr_table_max; --- -2.39.3 - diff --git a/SOURCES/kvm-smbios-don-t-check-type4-structures-in-legacy-mode.patch b/SOURCES/kvm-smbios-don-t-check-type4-structures-in-legacy-mode.patch deleted file mode 100644 index a2c9532..0000000 --- a/SOURCES/kvm-smbios-don-t-check-type4-structures-in-legacy-mode.patch +++ /dev/null @@ -1,133 +0,0 @@ -From 2b76d95ec07aba6d96070ee90c5015c1676be091 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Tue, 13 Feb 2024 16:25:54 +0100 -Subject: [PATCH 10/20] smbios: don't check type4 structures in legacy mode - -RH-Author: Igor Mammedov -RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS -RH-Jira: RHEL-21705 -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [8/18] c1f8409ea0d916f333c9373535bf21b521c62855 - -JIRA: https://issues.redhat.com/browse/RHEL-21705 - - legacy mode doesn't support structures of type 2 and more, - and CLI has a check for '-smbios type' option, however it's - still possible to sneak in type4 as a blob with '-smbios file' - option. However doing the later makes SMBIOS tables broken - since SeaBIOS doesn't expect that. - - Rather than trying to add support for type4 to legacy code - (both QEMU and SeaBIOS), simplify smbios_get_table_legacy() - by dropping not relevant check in legacy code and error out - on type4 blob. - - Signed-off-by: Igor Mammedov - Reviewed-by: Ani Sinha - Tested-by: Fiona Ebner - -Conflicts: include/hw/firmware/smbios.h -Signed-off-by: Igor Mammedov - - Please enter the commit message for your changes. Lines starting ---- - hw/i386/fw_cfg.c | 3 +-- - hw/smbios/smbios.c | 18 ++++++++++++++---- - include/hw/firmware/smbios.h | 2 +- - 3 files changed, 16 insertions(+), 7 deletions(-) - -diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c -index bb7149c4c3..a25793a68f 100644 ---- a/hw/i386/fw_cfg.c -+++ b/hw/i386/fw_cfg.c -@@ -73,8 +73,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) - smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); - - if (pcmc->smbios_legacy_mode) { -- smbios_tables = smbios_get_table_legacy(ms->smp.cpus, -- &smbios_tables_len); -+ smbios_tables = smbios_get_table_legacy(&smbios_tables_len); - fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, - smbios_tables, smbios_tables_len); - return; -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index d8d68716d4..441517cf24 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -530,14 +530,17 @@ opts_init(smbios_register_config); - */ - #define SMBIOS_21_MAX_TABLES_LEN 0xffff - --static void smbios_validate_table(uint32_t expected_t4_count) -+static void smbios_check_type4_count(uint32_t expected_t4_count) - { - if (smbios_type4_count && smbios_type4_count != expected_t4_count) { - error_report("Expected %d SMBIOS Type 4 tables, got %d instead", - expected_t4_count, smbios_type4_count); - exit(1); - } -+} - -+static void smbios_validate_table(void) -+{ - if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_32 && - smbios_tables_len > SMBIOS_21_MAX_TABLES_LEN) { - error_report("SMBIOS 2.1 table length %zu exceeds %d", -@@ -622,7 +625,7 @@ static void smbios_build_type_1_fields(void) - } - } - --uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) -+uint8_t *smbios_get_table_legacy(size_t *length) - { - int i; - size_t usr_offset; -@@ -635,6 +638,12 @@ uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) - exit(1); - } - -+ if (test_bit(4, have_binfile_bitmap)) { -+ error_report("can't process table for smbios " -+ "type 4 on machine versions < 2.1!"); -+ exit(1); -+ } -+ - g_free(smbios_entries); - smbios_entries_len = sizeof(uint16_t); - smbios_entries = g_malloc0(smbios_entries_len); -@@ -661,7 +670,7 @@ uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) - - smbios_build_type_0_fields(); - smbios_build_type_1_fields(); -- smbios_validate_table(expected_t4_count); -+ smbios_validate_table(); - *length = smbios_entries_len; - return smbios_entries; - } -@@ -1319,7 +1328,8 @@ void smbios_get_tables(MachineState *ms, - smbios_build_type_41_table(errp); - smbios_build_type_127_table(); - -- smbios_validate_table(ms->smp.sockets); -+ smbios_check_type4_count(ms->smp.sockets); -+ smbios_validate_table(); - smbios_entry_point_setup(); - - /* return tables blob and entry point (anchor), and their sizes */ -diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index b9fc9a0f42..d55018e5e3 100644 ---- a/include/hw/firmware/smbios.h -+++ b/include/hw/firmware/smbios.h -@@ -315,7 +315,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, - SmbiosEntryPointType ep_type, - const char *stream_product, - const char *stream_version); --uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length); -+uint8_t *smbios_get_table_legacy(size_t *length); - void smbios_get_tables(MachineState *ms, - const struct smbios_phys_mem_area *mem_array, - const unsigned int mem_array_size, --- -2.39.3 - diff --git a/SOURCES/kvm-smbios-error-out-when-building-type-4-table-is-not-p.patch b/SOURCES/kvm-smbios-error-out-when-building-type-4-table-is-not-p.patch deleted file mode 100644 index 2afbe66..0000000 --- a/SOURCES/kvm-smbios-error-out-when-building-type-4-table-is-not-p.patch +++ /dev/null @@ -1,72 +0,0 @@ -From bbb2d260e6f33380b9df28c74421055bd8dccda5 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Mon, 26 Feb 2024 12:59:33 +0100 -Subject: [PATCH 19/20] smbios: error out when building type 4 table is not - possible - -RH-Author: Igor Mammedov -RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS -RH-Jira: RHEL-21705 -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [17/18] 86b1c67bfbe9c0c14a190cd1204b6ccd1de1630f - -JIRA: https://issues.redhat.com/browse/RHEL-21705 - -If SMBIOS v2 version is requested but number of cores/threads -are more than it's possible to describe with v2, error out -instead of silently ignoring the fact and filling core/thread -count with bogus values. - -This will help caller to decide if it should fallback to -SMBIOSv3 when smbios-entry-point-type='auto' - -Signed-off-by: Igor Mammedov -Reviewed-by: Ani Sinha -Tested-by: Fiona Ebner ---- - hw/smbios/smbios.c | 14 ++++++++++++-- - 1 file changed, 12 insertions(+), 2 deletions(-) - -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 3d9dcb0d31..637aa952f5 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -655,7 +655,8 @@ static void smbios_build_type_3_table(void) - } - - static void smbios_build_type_4_table(MachineState *ms, unsigned instance, -- SmbiosEntryPointType ep_type) -+ SmbiosEntryPointType ep_type, -+ Error **errp) - { - char sock_str[128]; - size_t tbl_len = SMBIOS_TYPE_4_LEN_V28; -@@ -709,6 +710,12 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance, - if (tbl_len == SMBIOS_TYPE_4_LEN_V30) { - t->core_count2 = t->core_enabled2 = cpu_to_le16(cores_per_socket); - t->thread_count2 = cpu_to_le16(threads_per_socket); -+ } else if (t->core_count == 0xFF || t->thread_count == 0xFF) { -+ error_setg(errp, "SMBIOS 2.0 doesn't support number of processor " -+ "cores/threads more than 255, use " -+ "-machine smbios-entry-point-type=64 option to enable " -+ "SMBIOS 3.0 support"); -+ return; - } - - SMBIOS_BUILD_TABLE_POST; -@@ -1126,7 +1133,10 @@ static bool smbios_get_tables_ep(MachineState *ms, - assert(ms->smp.sockets >= 1); - - for (i = 0; i < ms->smp.sockets; i++) { -- smbios_build_type_4_table(ms, i, ep_type); -+ smbios_build_type_4_table(ms, i, ep_type, errp); -+ if (*errp) { -+ goto err_exit; -+ } - } - - smbios_build_type_8_table(); --- -2.39.3 - diff --git a/SOURCES/kvm-smbios-extend-smbios-entry-point-type-with-auto-valu.patch b/SOURCES/kvm-smbios-extend-smbios-entry-point-type-with-auto-valu.patch deleted file mode 100644 index db012bd..0000000 --- a/SOURCES/kvm-smbios-extend-smbios-entry-point-type-with-auto-valu.patch +++ /dev/null @@ -1,48 +0,0 @@ -From c083959c963fde33f4769fd4c6e122dd16ce6d3c Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Wed, 21 Feb 2024 16:04:36 +0100 -Subject: [PATCH 17/20] smbios: extend smbios-entry-point-type with 'auto' - value - -RH-Author: Igor Mammedov -RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS -RH-Jira: RHEL-21705 -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [15/18] 202563dcf77e062a238aa2a10ec14c25d3f5a7d0 - -JIRA: https://issues.redhat.com/browse/RHEL-21705 - -later patches will use it to pick SMBIOS version at runtime -depending on configuration. - -Signed-off-by: Igor Mammedov -Acked-by: Markus Armbruster -Reviewed-by: Ani Sinha -Tested-by: Fiona Ebner ---- - qapi/machine.json | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/qapi/machine.json b/qapi/machine.json -index b6d634b30d..99f6368fa6 100644 ---- a/qapi/machine.json -+++ b/qapi/machine.json -@@ -1788,10 +1788,13 @@ - # - # @64: SMBIOS version 3.0 (64-bit) Entry Point - # -+# @auto: Either 2.x or 3.x SMBIOS version, 2.x if configuration can be -+# described by it and 3.x otherwise (since: 9.0) -+# - # Since: 7.0 - ## - { 'enum': 'SmbiosEntryPointType', -- 'data': [ '32', '64' ] } -+ 'data': [ '32', '64', 'auto' ] } - - ## - # @MemorySizeConfiguration: --- -2.39.3 - diff --git a/SOURCES/kvm-smbios-get-rid-of-global-smbios_ep_type.patch b/SOURCES/kvm-smbios-get-rid-of-global-smbios_ep_type.patch deleted file mode 100644 index 1896f2b..0000000 --- a/SOURCES/kvm-smbios-get-rid-of-global-smbios_ep_type.patch +++ /dev/null @@ -1,281 +0,0 @@ -From be0abbf3f7845847b46486704c46c5de5a2b2323 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Mon, 26 Feb 2024 13:49:14 +0100 -Subject: [PATCH 15/20] smbios: get rid of global smbios_ep_type - -RH-Author: Igor Mammedov -RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS -RH-Jira: RHEL-21705 -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [13/18] 2e838ed0d03989e2e4ee08041b5ba64d5d7f5820 - -JIRA: https://issues.redhat.com/browse/RHEL-21705 - -Conflicts: hw/arm/virt.c, hw/i386/fw_cfg.c, hw/riscv/virt.c, hw/smbios/smbios.c, - include/hw/firmware/smbios.h - due to downstream specific smbios_set_defaults() - -Signed-off-by: Igor Mammedov -Acked-by: Daniel Henrique Barboza -Reviewed-by: Ani Sinha -Tested-by: Fiona Ebner -Signed-off-by: Igor Mammedov ---- - hw/arm/virt.c | 4 ++-- - hw/i386/fw_cfg.c | 6 +++--- - hw/i386/fw_cfg.h | 3 ++- - hw/i386/pc.c | 2 +- - hw/loongarch/virt.c | 7 ++++--- - hw/smbios/smbios.c | 26 ++++++++++++++------------ - hw/smbios/smbios_legacy.c | 2 +- - include/hw/firmware/smbios.h | 4 ++-- - 8 files changed, 29 insertions(+), 25 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index e5cfc19c08..e4a66affcb 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1695,14 +1695,14 @@ static void virt_build_smbios(VirtMachineState *vms) - - smbios_set_defaults("QEMU", product, - vmc->smbios_old_sys_ver ? "1.0" : mc->name, -- true, SMBIOS_ENTRY_POINT_TYPE_64, -+ true, - NULL, NULL); - - /* build the array of physical mem area from base_memmap */ - mem_array.address = vms->memmap[VIRT_MEM].base; - mem_array.length = ms->ram_size; - -- smbios_get_tables(ms, &mem_array, 1, -+ smbios_get_tables(ms, SMBIOS_ENTRY_POINT_TYPE_64, &mem_array, 1, - &smbios_tables, &smbios_tables_len, - &smbios_anchor, &smbios_anchor_len, - &error_fatal); -diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c -index bdc3cc4556..58429bb78d 100644 ---- a/hw/i386/fw_cfg.c -+++ b/hw/i386/fw_cfg.c -@@ -48,7 +48,8 @@ const char *fw_cfg_arch_key_name(uint16_t key) - return NULL; - } - --void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) -+void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, -+ SmbiosEntryPointType ep_type) - { - #ifdef CONFIG_SMBIOS - uint8_t *smbios_tables, *smbios_anchor; -@@ -64,7 +65,6 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) - /* These values are guest ABI, do not change */ - smbios_set_defaults("QEMU", mc->desc, mc->name, - pcmc->smbios_uuid_encoded, -- pcms->smbios_entry_point_type, - pcmc->smbios_stream_product, - pcmc->smbios_stream_version); - } -@@ -91,7 +91,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) - array_count++; - } - } -- smbios_get_tables(ms, mem_array, array_count, -+ smbios_get_tables(ms, ep_type, mem_array, array_count, - &smbios_tables, &smbios_tables_len, - &smbios_anchor, &smbios_anchor_len, - &error_fatal); -diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h -index 1e1de6b4a3..92e310f5fd 100644 ---- a/hw/i386/fw_cfg.h -+++ b/hw/i386/fw_cfg.h -@@ -23,7 +23,8 @@ - FWCfgState *fw_cfg_arch_create(MachineState *ms, - uint16_t boot_cpus, - uint16_t apic_id_limit); --void fw_cfg_build_smbios(PCMachineState *ms, FWCfgState *fw_cfg); -+void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, -+ SmbiosEntryPointType ep_type); - void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg); - void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg); - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 16de2a59e8..ae6777fc1a 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -847,7 +847,7 @@ void pc_machine_done(Notifier *notifier, void *data) - - acpi_setup(); - if (x86ms->fw_cfg) { -- fw_cfg_build_smbios(pcms, x86ms->fw_cfg); -+ fw_cfg_build_smbios(pcms, x86ms->fw_cfg, pcms->smbios_entry_point_type); - fw_cfg_build_feature_control(MACHINE(pcms), x86ms->fw_cfg); - /* update FW_CFG_NB_CPUS to account for -device added CPUs */ - fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); -diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c -index 7358a023d3..77956b5ada 100644 ---- a/hw/loongarch/virt.c -+++ b/hw/loongarch/virt.c -@@ -320,10 +320,11 @@ static void virt_build_smbios(LoongArchMachineState *lams) - return; - } - -- smbios_set_defaults("QEMU", product, mc->name, -- true, SMBIOS_ENTRY_POINT_TYPE_64); -+ smbios_set_defaults("QEMU", product, mc->name, true); - -- smbios_get_tables(ms, NULL, 0, &smbios_tables, &smbios_tables_len, -+ smbios_get_tables(ms, SMBIOS_ENTRY_POINT_TYPE_64, -+ NULL, 0, -+ &smbios_tables, &smbios_tables_len, - &smbios_anchor, &smbios_anchor_len, &error_fatal); - - if (smbios_anchor) { -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index b5745c6c2d..7e32430b85 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -47,7 +47,6 @@ uint8_t *smbios_tables; - size_t smbios_tables_len; - unsigned smbios_table_max; - unsigned smbios_table_cnt; --static SmbiosEntryPointType smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; - - static SmbiosEntryPoint ep; - -@@ -506,9 +505,9 @@ static bool smbios_check_type4_count(uint32_t expected_t4_count, Error **errp) - return true; - } - --bool smbios_validate_table(Error **errp) -+bool smbios_validate_table(SmbiosEntryPointType ep_type, Error **errp) - { -- if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_32 && -+ if (ep_type == SMBIOS_ENTRY_POINT_TYPE_32 && - smbios_tables_len > SMBIOS_21_MAX_TABLES_LEN) { - error_setg(errp, "SMBIOS 2.1 table length %zu exceeds %d", - smbios_tables_len, SMBIOS_21_MAX_TABLES_LEN); -@@ -655,14 +654,15 @@ static void smbios_build_type_3_table(void) - SMBIOS_BUILD_TABLE_POST; - } - --static void smbios_build_type_4_table(MachineState *ms, unsigned instance) -+static void smbios_build_type_4_table(MachineState *ms, unsigned instance, -+ SmbiosEntryPointType ep_type) - { - char sock_str[128]; - size_t tbl_len = SMBIOS_TYPE_4_LEN_V28; - unsigned threads_per_socket; - unsigned cores_per_socket; - -- if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_64) { -+ if (ep_type == SMBIOS_ENTRY_POINT_TYPE_64) { - tbl_len = SMBIOS_TYPE_4_LEN_V30; - } - -@@ -991,13 +991,11 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) - void smbios_set_defaults(const char *manufacturer, const char *product, - const char *version, - bool uuid_encoded, -- SmbiosEntryPointType ep_type, - const char *stream_product, - const char *stream_version) - { - smbios_have_defaults = true; - smbios_uuid_encoded = uuid_encoded; -- smbios_ep_type = ep_type; - - /* - * If @stream_product & @stream_version are non-NULL, then -@@ -1048,9 +1046,9 @@ void smbios_set_defaults(const char *manufacturer, const char *product, - SMBIOS_SET_DEFAULT(type17.manufacturer, manufacturer); - } - --static void smbios_entry_point_setup(void) -+static void smbios_entry_point_setup(SmbiosEntryPointType ep_type) - { -- switch (smbios_ep_type) { -+ switch (ep_type) { - case SMBIOS_ENTRY_POINT_TYPE_32: - memcpy(ep.ep21.anchor_string, "_SM_", 4); - memcpy(ep.ep21.intermediate_anchor_string, "_DMI_", 5); -@@ -1100,6 +1098,7 @@ static void smbios_entry_point_setup(void) - } - - void smbios_get_tables(MachineState *ms, -+ SmbiosEntryPointType ep_type, - const struct smbios_phys_mem_area *mem_array, - const unsigned int mem_array_size, - uint8_t **tables, size_t *tables_len, -@@ -1108,6 +1107,9 @@ void smbios_get_tables(MachineState *ms, - { - unsigned i, dimm_cnt, offset; - -+ assert(ep_type == SMBIOS_ENTRY_POINT_TYPE_32 || -+ ep_type == SMBIOS_ENTRY_POINT_TYPE_64); -+ - g_free(smbios_tables); - smbios_tables = g_memdup2(usr_blobs, usr_blobs_len); - smbios_tables_len = usr_blobs_len; -@@ -1122,7 +1124,7 @@ void smbios_get_tables(MachineState *ms, - assert(ms->smp.sockets >= 1); - - for (i = 0; i < ms->smp.sockets; i++) { -- smbios_build_type_4_table(ms, i); -+ smbios_build_type_4_table(ms, i, ep_type); - } - - smbios_build_type_8_table(); -@@ -1171,10 +1173,10 @@ void smbios_get_tables(MachineState *ms, - if (!smbios_check_type4_count(ms->smp.sockets, errp)) { - goto err_exit; - } -- if (!smbios_validate_table(errp)) { -+ if (!smbios_validate_table(ep_type, errp)) { - goto err_exit; - } -- smbios_entry_point_setup(); -+ smbios_entry_point_setup(ep_type); - - /* return tables blob and entry point (anchor), and their sizes */ - *tables = smbios_tables; -diff --git a/hw/smbios/smbios_legacy.c b/hw/smbios/smbios_legacy.c -index a6544bf55a..06907cd16c 100644 ---- a/hw/smbios/smbios_legacy.c -+++ b/hw/smbios/smbios_legacy.c -@@ -173,7 +173,7 @@ uint8_t *smbios_get_table_legacy(size_t *length, Error **errp) - - smbios_build_type_0_fields(); - smbios_build_type_1_fields(); -- if (!smbios_validate_table(errp)) { -+ if (!smbios_validate_table(SMBIOS_ENTRY_POINT_TYPE_32, errp)) { - goto err_exit; - } - -diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 44af3a0d82..781298f594 100644 ---- a/include/hw/firmware/smbios.h -+++ b/include/hw/firmware/smbios.h -@@ -326,18 +326,18 @@ struct smbios_type_127 { - struct smbios_structure_header header; - } QEMU_PACKED; - --bool smbios_validate_table(Error **errp); -+bool smbios_validate_table(SmbiosEntryPointType ep_type, Error **errp); - void smbios_add_usr_blob_size(size_t size); - void smbios_entry_add(QemuOpts *opts, Error **errp); - void smbios_set_cpuid(uint32_t version, uint32_t features); - void smbios_set_defaults(const char *manufacturer, const char *product, - const char *version, - bool uuid_encoded, -- SmbiosEntryPointType ep_type, - const char *stream_product, - const char *stream_version); - uint8_t *smbios_get_table_legacy(size_t *length, Error **errp); - void smbios_get_tables(MachineState *ms, -+ SmbiosEntryPointType ep_type, - const struct smbios_phys_mem_area *mem_array, - const unsigned int mem_array_size, - uint8_t **tables, size_t *tables_len, --- -2.39.3 - diff --git a/SOURCES/kvm-smbios-get-rid-of-smbios_legacy-global.patch b/SOURCES/kvm-smbios-get-rid-of-smbios_legacy-global.patch deleted file mode 100644 index ecb730e..0000000 --- a/SOURCES/kvm-smbios-get-rid-of-smbios_legacy-global.patch +++ /dev/null @@ -1,198 +0,0 @@ -From 0802fa7199c8085d018fc38dd4beaa5062d383d1 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Fri, 29 Dec 2023 15:37:29 +0100 -Subject: [PATCH 08/20] smbios: get rid of smbios_legacy global - -RH-Author: Igor Mammedov -RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS -RH-Jira: RHEL-21705 -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [6/18] 684dd1dca8d611c6de97b26ef8c1cda6ca509d54 - -JIRA: https://issues.redhat.com/browse/RHEL-21705 - - clean up smbios_set_defaults() which is reused by legacy - and non legacy machines from being aware of 'legacy' notion - and need to turn it off. And push legacy handling up to - PC machine code where it's relevant. - - Signed-off-by: Igor Mammedov - Reviewed-by: Ani Sinha - Acked-by: Daniel Henrique Barboza - Tested-by: Fiona Ebner - -Conflicts: hw/arm/virt.c, hw/i386/fw_cfg.c, hw/loongarch/virt.c, - hw/smbios/smbios.c, include/hw/firmware/smbios.h - due to downstream specifc signature of smbios_set_defaults() -PS: - while fixing conflicts move RHEL specific - smbios_stream_product/smbios_stream_version - at the end of arguments list - -Signed-off-by: Igor Mammedov ---- - hw/arm/virt.c | 5 +++-- - hw/i386/fw_cfg.c | 11 +++++----- - hw/loongarch/virt.c | 2 +- - hw/smbios/smbios.c | 39 ++++++++++++++++-------------------- - include/hw/firmware/smbios.h | 6 +++--- - 5 files changed, 30 insertions(+), 33 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 943c563391..e5cfc19c08 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1694,8 +1694,9 @@ static void virt_build_smbios(VirtMachineState *vms) - } - - smbios_set_defaults("QEMU", product, -- vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, -- true, NULL, NULL, SMBIOS_ENTRY_POINT_TYPE_64); -+ vmc->smbios_old_sys_ver ? "1.0" : mc->name, -+ true, SMBIOS_ENTRY_POINT_TYPE_64, -+ NULL, NULL); - - /* build the array of physical mem area from base_memmap */ - mem_array.address = vms->memmap[VIRT_MEM].base; -diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c -index 79ff7f7225..bb7149c4c3 100644 ---- a/hw/i386/fw_cfg.c -+++ b/hw/i386/fw_cfg.c -@@ -63,17 +63,18 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) - if (pcmc->smbios_defaults) { - /* These values are guest ABI, do not change */ - smbios_set_defaults("QEMU", mc->desc, mc->name, -- pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, -+ pcmc->smbios_uuid_encoded, -+ pcms->smbios_entry_point_type, - pcmc->smbios_stream_product, -- pcmc->smbios_stream_version, -- pcms->smbios_entry_point_type); -+ pcmc->smbios_stream_version); - } - - /* tell smbios about cpuid version and features */ - smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); - -- smbios_tables = smbios_get_table_legacy(ms->smp.cpus, &smbios_tables_len); -- if (smbios_tables) { -+ if (pcmc->smbios_legacy_mode) { -+ smbios_tables = smbios_get_table_legacy(ms->smp.cpus, -+ &smbios_tables_len); - fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, - smbios_tables, smbios_tables_len); - return; -diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c -index 4b7dc67a2d..7358a023d3 100644 ---- a/hw/loongarch/virt.c -+++ b/hw/loongarch/virt.c -@@ -320,7 +320,7 @@ static void virt_build_smbios(LoongArchMachineState *lams) - return; - } - -- smbios_set_defaults("QEMU", product, mc->name, false, -+ smbios_set_defaults("QEMU", product, mc->name, - true, SMBIOS_ENTRY_POINT_TYPE_64); - - smbios_get_tables(ms, NULL, 0, &smbios_tables, &smbios_tables_len, -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 8129d396d1..0c8c439859 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -54,7 +54,6 @@ struct smbios_table { - - static uint8_t *smbios_entries; - static size_t smbios_entries_len; --static bool smbios_legacy = true; - static bool smbios_uuid_encoded = true; - /* end: legacy structures & constants for <= 2.0 machines */ - -@@ -618,9 +617,16 @@ static void smbios_build_type_1_fields(void) - - uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) - { -- if (!smbios_legacy) { -- *length = 0; -- return NULL; -+ /* drop unwanted version of command-line file blob(s) */ -+ g_free(smbios_tables); -+ smbios_tables = NULL; -+ -+ /* also complain if fields were given for types > 1 */ -+ if (find_next_bit(have_fields_bitmap, -+ SMBIOS_MAX_TYPE + 1, 2) < SMBIOS_MAX_TYPE + 1) { -+ error_report("can't process fields for smbios " -+ "types > 1 on machine versions < 2.1!"); -+ exit(1); - } - - if (!smbios_immutable) { -@@ -1107,31 +1113,16 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) - } - - void smbios_set_defaults(const char *manufacturer, const char *product, -- const char *version, bool legacy_mode, -+ const char *version, - bool uuid_encoded, -+ SmbiosEntryPointType ep_type, - const char *stream_product, -- const char *stream_version, -- SmbiosEntryPointType ep_type) -+ const char *stream_version) - { - smbios_have_defaults = true; -- smbios_legacy = legacy_mode; - smbios_uuid_encoded = uuid_encoded; - smbios_ep_type = ep_type; - -- /* drop unwanted version of command-line file blob(s) */ -- if (smbios_legacy) { -- g_free(smbios_tables); -- /* in legacy mode, also complain if fields were given for types > 1 */ -- if (find_next_bit(have_fields_bitmap, -- SMBIOS_MAX_TYPE+1, 2) < SMBIOS_MAX_TYPE+1) { -- error_report("can't process fields for smbios " -- "types > 1 on machine versions < 2.1!"); -- exit(1); -- } -- } else { -- g_free(smbios_entries); -- } -- - /* - * If @stream_product & @stream_version are non-NULL, then - * we're following rules for new Windows driver support. -@@ -1241,6 +1232,10 @@ void smbios_get_tables(MachineState *ms, - { - unsigned i, dimm_cnt, offset; - -+ /* drop unwanted (legacy) version of command-line file blob(s) */ -+ g_free(smbios_entries); -+ smbios_entries = NULL; -+ - if (!smbios_immutable) { - smbios_build_type_0_table(); - smbios_build_type_1_table(); -diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 95ec64ce2c..b9fc9a0f42 100644 ---- a/include/hw/firmware/smbios.h -+++ b/include/hw/firmware/smbios.h -@@ -310,11 +310,11 @@ struct smbios_type_127 { - void smbios_entry_add(QemuOpts *opts, Error **errp); - void smbios_set_cpuid(uint32_t version, uint32_t features); - void smbios_set_defaults(const char *manufacturer, const char *product, -- const char *version, bool legacy_mode, -+ const char *version, - bool uuid_encoded, -+ SmbiosEntryPointType ep_type, - const char *stream_product, -- const char *stream_version, -- SmbiosEntryPointType ep_type); -+ const char *stream_version); - uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length); - void smbios_get_tables(MachineState *ms, - const struct smbios_phys_mem_area *mem_array, --- -2.39.3 - diff --git a/SOURCES/kvm-smbios-get-rid-of-smbios_smp_sockets-global.patch b/SOURCES/kvm-smbios-get-rid-of-smbios_smp_sockets-global.patch deleted file mode 100644 index fde17ac..0000000 --- a/SOURCES/kvm-smbios-get-rid-of-smbios_smp_sockets-global.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 1536f1ec00ddc1729854b381cc0d54814bb6c19f Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Fri, 29 Dec 2023 15:04:55 +0100 -Subject: [PATCH 07/20] smbios: get rid of smbios_smp_sockets global - -RH-Author: Igor Mammedov -RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS -RH-Jira: RHEL-21705 -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [5/18] f59bfeb547b7febcf1de2b6179af006e7fa0bccd - -JIRA: https://issues.redhat.com/browse/RHEL-21705 - - it makes smbios_validate_table() independent from - smbios_smp_sockets global, which in turn lets - smbios_get_tables() avoid using not related legacy code. - - Signed-off-by: Igor Mammedov - Reviewed-by: Ani Sinha - Tested-by: Fiona Ebner - -Conflicts: - include/hw/firmware/smbios.h due to down-stream - (d9ff466c980d Machine type related general changes) - adding custom stream_product/stream_version - -Signed-off-by: Igor Mammedov ---- - hw/i386/fw_cfg.c | 2 +- - hw/smbios/smbios.c | 22 +++++++++------------- - include/hw/firmware/smbios.h | 2 +- - 3 files changed, 11 insertions(+), 15 deletions(-) - -diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c -index ed72b1442d..79ff7f7225 100644 ---- a/hw/i386/fw_cfg.c -+++ b/hw/i386/fw_cfg.c -@@ -72,7 +72,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) - /* tell smbios about cpuid version and features */ - smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); - -- smbios_tables = smbios_get_table_legacy(ms, &smbios_tables_len); -+ smbios_tables = smbios_get_table_legacy(ms->smp.cpus, &smbios_tables_len); - if (smbios_tables) { - fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, - smbios_tables, smbios_tables_len); -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index b13e40bae2..8129d396d1 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -73,7 +73,7 @@ static SmbiosEntryPoint ep; - static int smbios_type4_count = 0; - static bool smbios_immutable; - static bool smbios_have_defaults; --static uint32_t smbios_cpuid_version, smbios_cpuid_features, smbios_smp_sockets; -+static uint32_t smbios_cpuid_version, smbios_cpuid_features; - - static DECLARE_BITMAP(have_binfile_bitmap, SMBIOS_MAX_TYPE+1); - static DECLARE_BITMAP(have_fields_bitmap, SMBIOS_MAX_TYPE+1); -@@ -524,14 +524,11 @@ opts_init(smbios_register_config); - */ - #define SMBIOS_21_MAX_TABLES_LEN 0xffff - --static void smbios_validate_table(MachineState *ms) -+static void smbios_validate_table(uint32_t expected_t4_count) - { -- uint32_t expect_t4_count = smbios_legacy ? -- ms->smp.cpus : smbios_smp_sockets; -- -- if (smbios_type4_count && smbios_type4_count != expect_t4_count) { -+ if (smbios_type4_count && smbios_type4_count != expected_t4_count) { - error_report("Expected %d SMBIOS Type 4 tables, got %d instead", -- expect_t4_count, smbios_type4_count); -+ expected_t4_count, smbios_type4_count); - exit(1); - } - -@@ -619,7 +616,7 @@ static void smbios_build_type_1_fields(void) - } - } - --uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length) -+uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) - { - if (!smbios_legacy) { - *length = 0; -@@ -629,7 +626,7 @@ uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length) - if (!smbios_immutable) { - smbios_build_type_0_fields(); - smbios_build_type_1_fields(); -- smbios_validate_table(ms); -+ smbios_validate_table(expected_t4_count); - smbios_immutable = true; - } - *length = smbios_entries_len; -@@ -1250,10 +1247,9 @@ void smbios_get_tables(MachineState *ms, - smbios_build_type_2_table(); - smbios_build_type_3_table(); - -- smbios_smp_sockets = ms->smp.sockets; -- assert(smbios_smp_sockets >= 1); -+ assert(ms->smp.sockets >= 1); - -- for (i = 0; i < smbios_smp_sockets; i++) { -+ for (i = 0; i < ms->smp.sockets; i++) { - smbios_build_type_4_table(ms, i); - } - -@@ -1299,7 +1295,7 @@ void smbios_get_tables(MachineState *ms, - smbios_build_type_41_table(errp); - smbios_build_type_127_table(); - -- smbios_validate_table(ms); -+ smbios_validate_table(ms->smp.sockets); - smbios_entry_point_setup(); - smbios_immutable = true; - } -diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index f8dd07fe4c..95ec64ce2c 100644 ---- a/include/hw/firmware/smbios.h -+++ b/include/hw/firmware/smbios.h -@@ -315,7 +315,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, - const char *stream_product, - const char *stream_version, - SmbiosEntryPointType ep_type); --uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); -+uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length); - void smbios_get_tables(MachineState *ms, - const struct smbios_phys_mem_area *mem_array, - const unsigned int mem_array_size, --- -2.39.3 - diff --git a/SOURCES/kvm-smbios-handle-errors-consistently.patch b/SOURCES/kvm-smbios-handle-errors-consistently.patch deleted file mode 100644 index ac75c76..0000000 --- a/SOURCES/kvm-smbios-handle-errors-consistently.patch +++ /dev/null @@ -1,217 +0,0 @@ -From 7271c4424c6d90f0bb34f8090eb4e192eb2b2537 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Tue, 20 Feb 2024 10:30:28 +0100 -Subject: [PATCH 14/20] smbios: handle errors consistently - -RH-Author: Igor Mammedov -RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS -RH-Jira: RHEL-21705 -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [12/18] d8a5a70602ae0665ce35e5bf87b2d8420f9189bc - -JIRA: https://issues.redhat.com/browse/RHEL-21705 - - Current code uses mix of error_report()+exit(1) - and error_setg() to handle errors. - Use newer error_setg() everywhere, beside consistency - it will allow to detect error condition without killing - QEMU and attempt switch-over to SMBIOS3.x tables/entrypoint - in follow up patch. - - while at it, clear smbios_tables pointer after freeing. - that will avoid double free if smbios_get_tables() is called - multiple times. - - Signed-off-by: Igor Mammedov - Reviewed-by: Ani Sinha - -Conflicts: include/hw/firmware/smbios.h - due to downstream specific smbios_set_defaults() - -Signed-off-by: Igor Mammedov ---- - hw/i386/fw_cfg.c | 3 ++- - hw/smbios/smbios.c | 34 ++++++++++++++++++++++------------ - hw/smbios/smbios_legacy.c | 22 ++++++++++++++-------- - include/hw/firmware/smbios.h | 4 ++-- - 4 files changed, 40 insertions(+), 23 deletions(-) - -diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c -index a25793a68f..bdc3cc4556 100644 ---- a/hw/i386/fw_cfg.c -+++ b/hw/i386/fw_cfg.c -@@ -73,7 +73,8 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) - smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); - - if (pcmc->smbios_legacy_mode) { -- smbios_tables = smbios_get_table_legacy(&smbios_tables_len); -+ smbios_tables = smbios_get_table_legacy(&smbios_tables_len, -+ &error_fatal); - fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, - smbios_tables, smbios_tables_len); - return; -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index e40204550e..b5745c6c2d 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -19,7 +19,6 @@ - #include "qemu/units.h" - #include "qapi/error.h" - #include "qemu/config-file.h" --#include "qemu/error-report.h" - #include "qemu/module.h" - #include "qemu/option.h" - #include "sysemu/sysemu.h" -@@ -497,23 +496,25 @@ opts_init(smbios_register_config); - */ - #define SMBIOS_21_MAX_TABLES_LEN 0xffff - --static void smbios_check_type4_count(uint32_t expected_t4_count) -+static bool smbios_check_type4_count(uint32_t expected_t4_count, Error **errp) - { - if (smbios_type4_count && smbios_type4_count != expected_t4_count) { -- error_report("Expected %d SMBIOS Type 4 tables, got %d instead", -- expected_t4_count, smbios_type4_count); -- exit(1); -+ error_setg(errp, "Expected %d SMBIOS Type 4 tables, got %d instead", -+ expected_t4_count, smbios_type4_count); -+ return false; - } -+ return true; - } - --void smbios_validate_table(void) -+bool smbios_validate_table(Error **errp) - { - if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_32 && - smbios_tables_len > SMBIOS_21_MAX_TABLES_LEN) { -- error_report("SMBIOS 2.1 table length %zu exceeds %d", -- smbios_tables_len, SMBIOS_21_MAX_TABLES_LEN); -- exit(1); -+ error_setg(errp, "SMBIOS 2.1 table length %zu exceeds %d", -+ smbios_tables_len, SMBIOS_21_MAX_TABLES_LEN); -+ return false; - } -+ return true; - } - - bool smbios_skip_table(uint8_t type, bool required_table) -@@ -1167,15 +1168,18 @@ void smbios_get_tables(MachineState *ms, - smbios_build_type_41_table(errp); - smbios_build_type_127_table(); - -- smbios_check_type4_count(ms->smp.sockets); -- smbios_validate_table(); -+ if (!smbios_check_type4_count(ms->smp.sockets, errp)) { -+ goto err_exit; -+ } -+ if (!smbios_validate_table(errp)) { -+ goto err_exit; -+ } - smbios_entry_point_setup(); - - /* return tables blob and entry point (anchor), and their sizes */ - *tables = smbios_tables; - *tables_len = smbios_tables_len; - *anchor = (uint8_t *)&ep; -- - /* calculate length based on anchor string */ - if (!strncmp((char *)&ep, "_SM_", 4)) { - *anchor_len = sizeof(struct smbios_21_entry_point); -@@ -1184,6 +1188,12 @@ void smbios_get_tables(MachineState *ms, - } else { - abort(); - } -+ -+ return; -+err_exit: -+ g_free(smbios_tables); -+ smbios_tables = NULL; -+ return; - } - - static void save_opt(const char **dest, QemuOpts *opts, const char *name) -diff --git a/hw/smbios/smbios_legacy.c b/hw/smbios/smbios_legacy.c -index 21f143e738..a6544bf55a 100644 ---- a/hw/smbios/smbios_legacy.c -+++ b/hw/smbios/smbios_legacy.c -@@ -19,7 +19,7 @@ - #include "qemu/bswap.h" - #include "hw/firmware/smbios.h" - #include "sysemu/sysemu.h" --#include "qemu/error-report.h" -+#include "qapi/error.h" - - struct smbios_header { - uint16_t length; -@@ -128,7 +128,7 @@ static void smbios_build_type_1_fields(void) - } - } - --uint8_t *smbios_get_table_legacy(size_t *length) -+uint8_t *smbios_get_table_legacy(size_t *length, Error **errp) - { - int i; - size_t usr_offset; -@@ -136,15 +136,15 @@ uint8_t *smbios_get_table_legacy(size_t *length) - /* complain if fields were given for types > 1 */ - if (find_next_bit(smbios_have_fields_bitmap, - SMBIOS_MAX_TYPE + 1, 2) < SMBIOS_MAX_TYPE + 1) { -- error_report("can't process fields for smbios " -+ error_setg(errp, "can't process fields for smbios " - "types > 1 on machine versions < 2.1!"); -- exit(1); -+ goto err_exit; - } - - if (test_bit(4, smbios_have_binfile_bitmap)) { -- error_report("can't process table for smbios " -- "type 4 on machine versions < 2.1!"); -- exit(1); -+ error_setg(errp, "can't process table for smbios " -+ "type 4 on machine versions < 2.1!"); -+ goto err_exit; - } - - g_free(smbios_entries); -@@ -173,7 +173,13 @@ uint8_t *smbios_get_table_legacy(size_t *length) - - smbios_build_type_0_fields(); - smbios_build_type_1_fields(); -- smbios_validate_table(); -+ if (!smbios_validate_table(errp)) { -+ goto err_exit; -+ } -+ - *length = smbios_entries_len; - return smbios_entries; -+err_exit: -+ g_free(smbios_entries); -+ return NULL; - } -diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 92e9aba415..44af3a0d82 100644 ---- a/include/hw/firmware/smbios.h -+++ b/include/hw/firmware/smbios.h -@@ -326,7 +326,7 @@ struct smbios_type_127 { - struct smbios_structure_header header; - } QEMU_PACKED; - --void smbios_validate_table(void); -+bool smbios_validate_table(Error **errp); - void smbios_add_usr_blob_size(size_t size); - void smbios_entry_add(QemuOpts *opts, Error **errp); - void smbios_set_cpuid(uint32_t version, uint32_t features); -@@ -336,7 +336,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, - SmbiosEntryPointType ep_type, - const char *stream_product, - const char *stream_version); --uint8_t *smbios_get_table_legacy(size_t *length); -+uint8_t *smbios_get_table_legacy(size_t *length, Error **errp); - void smbios_get_tables(MachineState *ms, - const struct smbios_phys_mem_area *mem_array, - const unsigned int mem_array_size, --- -2.39.3 - diff --git a/SOURCES/kvm-smbios-in-case-of-entry-point-is-auto-try-to-build-v.patch b/SOURCES/kvm-smbios-in-case-of-entry-point-is-auto-try-to-build-v.patch deleted file mode 100644 index fd1bc19..0000000 --- a/SOURCES/kvm-smbios-in-case-of-entry-point-is-auto-try-to-build-v.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 38220bc61bdb1614f34a53481f7604720c9e9e5a Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Tue, 20 Feb 2024 10:41:06 +0100 -Subject: [PATCH 18/20] smbios: in case of entry point is 'auto' try to build - v2 tables 1st - -RH-Author: Igor Mammedov -RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS -RH-Jira: RHEL-21705 -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [16/18] becbcb3d8dad4842e5939bb75e21f4e737a4a325 - -JIRA: https://issues.redhat.com/browse/RHEL-21705 - -QEMU for some time now uses SMBIOS 3.0 for PC/Q35 machines by -default, however Windows has a bug in locating SMBIOS 3.0 -entrypoint and fails to find tables when booted on SeaBIOS -(on UEFI SMBIOS 3.0 tables work fine since firmware hands -over tables in another way) - -Missing SMBIOS tables may lead to some issues for guest -though (worst are: possible reactiveation, inability to -get virtio drivers from 'Windows Update') - -It's unclear at this point if MS will fix the issue on their -side. So instead of it (or rather in addition) this patch -will try to workaround the issue. - -aka, use smbios-entry-point-type=auto to make QEMU try -generating conservative SMBIOS 2.0 tables and if that -fails (due to limits/requested configuration) fallback -to SMBIOS 3.0 tables. - -With this in place majority of users will use SMBIOS 2.0 -tables which work fine with (Windows + legacy BIOS). -The configurations that is not to possible to describe -with SMBIOS 2.0 will switch automatically to SMBIOS 3.0 -(which will trigger Windows bug but there is nothing -QEMU can do here, so go and aks Microsoft to real fix). - -Signed-off-by: Igor Mammedov -Reviewed-by: Ani Sinha -Tested-by: Fiona Ebner ---- - hw/smbios/smbios.c | 52 +++++++++++++++++++++++++++++++++++++++++++--- - 1 file changed, 49 insertions(+), 3 deletions(-) - -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 4521ea386c..3d9dcb0d31 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -1097,7 +1097,7 @@ static void smbios_entry_point_setup(SmbiosEntryPointType ep_type) - } - } - --void smbios_get_tables(MachineState *ms, -+static bool smbios_get_tables_ep(MachineState *ms, - SmbiosEntryPointType ep_type, - const struct smbios_phys_mem_area *mem_array, - const unsigned int mem_array_size, -@@ -1106,6 +1106,7 @@ void smbios_get_tables(MachineState *ms, - Error **errp) - { - unsigned i, dimm_cnt, offset; -+ ERRP_GUARD(); - - assert(ep_type == SMBIOS_ENTRY_POINT_TYPE_32 || - ep_type == SMBIOS_ENTRY_POINT_TYPE_64); -@@ -1192,11 +1193,56 @@ void smbios_get_tables(MachineState *ms, - abort(); - } - -- return; -+ return true; - err_exit: - g_free(smbios_tables); - smbios_tables = NULL; -- return; -+ return false; -+} -+ -+void smbios_get_tables(MachineState *ms, -+ SmbiosEntryPointType ep_type, -+ const struct smbios_phys_mem_area *mem_array, -+ const unsigned int mem_array_size, -+ uint8_t **tables, size_t *tables_len, -+ uint8_t **anchor, size_t *anchor_len, -+ Error **errp) -+{ -+ Error *local_err = NULL; -+ bool is_valid; -+ ERRP_GUARD(); -+ -+ switch (ep_type) { -+ case SMBIOS_ENTRY_POINT_TYPE_AUTO: -+ case SMBIOS_ENTRY_POINT_TYPE_32: -+ is_valid = smbios_get_tables_ep(ms, SMBIOS_ENTRY_POINT_TYPE_32, -+ mem_array, mem_array_size, -+ tables, tables_len, -+ anchor, anchor_len, -+ &local_err); -+ if (is_valid || ep_type != SMBIOS_ENTRY_POINT_TYPE_AUTO) { -+ break; -+ } -+ /* -+ * fall through in case AUTO endpoint is selected and -+ * SMBIOS 2.x tables can't be generated, to try if SMBIOS 3.x -+ * tables would work -+ */ -+ case SMBIOS_ENTRY_POINT_TYPE_64: -+ error_free(local_err); -+ local_err = NULL; -+ is_valid = smbios_get_tables_ep(ms, SMBIOS_ENTRY_POINT_TYPE_64, -+ mem_array, mem_array_size, -+ tables, tables_len, -+ anchor, anchor_len, -+ &local_err); -+ break; -+ default: -+ abort(); -+ } -+ if (!is_valid) { -+ error_propagate(errp, local_err); -+ } - } - - static void save_opt(const char **dest, QemuOpts *opts, const char *name) --- -2.39.3 - diff --git a/SOURCES/kvm-smbios-rename-expose-structures-bitmaps-used-by-both.patch b/SOURCES/kvm-smbios-rename-expose-structures-bitmaps-used-by-both.patch deleted file mode 100644 index cba6134..0000000 --- a/SOURCES/kvm-smbios-rename-expose-structures-bitmaps-used-by-both.patch +++ /dev/null @@ -1,330 +0,0 @@ -From 36b0256e27f9d5268c5413891b4a7322819ae9db Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Mon, 4 Mar 2024 15:56:19 +0100 -Subject: [PATCH 12/20] smbios: rename/expose structures/bitmaps used by both - legacy and modern code - -RH-Author: Igor Mammedov -RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS -RH-Jira: RHEL-21705 -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [10/18] 59fe438d5b7f6e584a6bb02597e4d4724fe2cece - -JIRA: https://issues.redhat.com/browse/RHEL-21705 - - As a preparation to move legacy handling into a separate file, - add prefix 'smbios_' to type0/type1/have_binfile_bitmap/have_fields_bitmap - and expose them in smbios.h so that they can be reused in - legacy and modern code. - - Doing it as a separate patch to avoid rename cluttering follow-up - patch which will move legacy code into a separate file. - - Signed-off-by: Igor Mammedov - Reviewed-by: Ani Sinha - -Conflicts: hw/smbios/smbios.c - due to setting downstream type1.family/type1.sku defaults - -Signed-off-by: Igor Mammedov ---- - hw/smbios/smbios.c | 117 ++++++++++++++++------------------- - include/hw/firmware/smbios.h | 16 +++++ - 2 files changed, 71 insertions(+), 62 deletions(-) - -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index c48a290478..eb9927335d 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -81,19 +81,11 @@ static int smbios_type4_count = 0; - static bool smbios_have_defaults; - static uint32_t smbios_cpuid_version, smbios_cpuid_features; - --static DECLARE_BITMAP(have_binfile_bitmap, SMBIOS_MAX_TYPE+1); --static DECLARE_BITMAP(have_fields_bitmap, SMBIOS_MAX_TYPE+1); -+DECLARE_BITMAP(smbios_have_binfile_bitmap, SMBIOS_MAX_TYPE + 1); -+DECLARE_BITMAP(smbios_have_fields_bitmap, SMBIOS_MAX_TYPE + 1); - --static struct { -- const char *vendor, *version, *date; -- bool have_major_minor, uefi; -- uint8_t major, minor; --} type0; -- --static struct { -- const char *manufacturer, *product, *version, *serial, *sku, *family; -- /* uuid is in qemu_uuid */ --} type1; -+smbios_type0_t smbios_type0; -+smbios_type1_t smbios_type1; - - static struct { - const char *manufacturer, *product, *version, *serial, *asset, *location; -@@ -584,36 +576,36 @@ static void smbios_maybe_add_str(int type, int offset, const char *data) - static void smbios_build_type_0_fields(void) - { - smbios_maybe_add_str(0, offsetof(struct smbios_type_0, vendor_str), -- type0.vendor); -+ smbios_type0.vendor); - smbios_maybe_add_str(0, offsetof(struct smbios_type_0, bios_version_str), -- type0.version); -+ smbios_type0.version); - smbios_maybe_add_str(0, offsetof(struct smbios_type_0, - bios_release_date_str), -- type0.date); -- if (type0.have_major_minor) { -+ smbios_type0.date); -+ if (smbios_type0.have_major_minor) { - smbios_add_field(0, offsetof(struct smbios_type_0, - system_bios_major_release), -- &type0.major, 1); -+ &smbios_type0.major, 1); - smbios_add_field(0, offsetof(struct smbios_type_0, - system_bios_minor_release), -- &type0.minor, 1); -+ &smbios_type0.minor, 1); - } - } - - static void smbios_build_type_1_fields(void) - { - smbios_maybe_add_str(1, offsetof(struct smbios_type_1, manufacturer_str), -- type1.manufacturer); -+ smbios_type1.manufacturer); - smbios_maybe_add_str(1, offsetof(struct smbios_type_1, product_name_str), -- type1.product); -+ smbios_type1.product); - smbios_maybe_add_str(1, offsetof(struct smbios_type_1, version_str), -- type1.version); -+ smbios_type1.version); - smbios_maybe_add_str(1, offsetof(struct smbios_type_1, serial_number_str), -- type1.serial); -+ smbios_type1.serial); - smbios_maybe_add_str(1, offsetof(struct smbios_type_1, sku_number_str), -- type1.sku); -+ smbios_type1.sku); - smbios_maybe_add_str(1, offsetof(struct smbios_type_1, family_str), -- type1.family); -+ smbios_type1.family); - if (qemu_uuid_set) { - /* We don't encode the UUID in the "wire format" here because this - * function is for legacy mode and needs to keep the guest ABI, and -@@ -631,14 +623,14 @@ uint8_t *smbios_get_table_legacy(size_t *length) - size_t usr_offset; - - /* also complain if fields were given for types > 1 */ -- if (find_next_bit(have_fields_bitmap, -+ if (find_next_bit(smbios_have_fields_bitmap, - SMBIOS_MAX_TYPE + 1, 2) < SMBIOS_MAX_TYPE + 1) { - error_report("can't process fields for smbios " - "types > 1 on machine versions < 2.1!"); - exit(1); - } - -- if (test_bit(4, have_binfile_bitmap)) { -+ if (test_bit(4, smbios_have_binfile_bitmap)) { - error_report("can't process table for smbios " - "type 4 on machine versions < 2.1!"); - exit(1); -@@ -679,10 +671,10 @@ uint8_t *smbios_get_table_legacy(size_t *length) - - bool smbios_skip_table(uint8_t type, bool required_table) - { -- if (test_bit(type, have_binfile_bitmap)) { -+ if (test_bit(type, smbios_have_binfile_bitmap)) { - return true; /* user provided their own binary blob(s) */ - } -- if (test_bit(type, have_fields_bitmap)) { -+ if (test_bit(type, smbios_have_fields_bitmap)) { - return false; /* user provided fields via command line */ - } - if (smbios_have_defaults && required_table) { -@@ -710,25 +702,25 @@ static void smbios_build_type_0_table(void) - { - SMBIOS_BUILD_TABLE_PRE(0, T0_BASE, false); /* optional, leave up to BIOS */ - -- SMBIOS_TABLE_SET_STR(0, vendor_str, type0.vendor); -- SMBIOS_TABLE_SET_STR(0, bios_version_str, type0.version); -+ SMBIOS_TABLE_SET_STR(0, vendor_str, smbios_type0.vendor); -+ SMBIOS_TABLE_SET_STR(0, bios_version_str, smbios_type0.version); - - t->bios_starting_address_segment = cpu_to_le16(0xE800); /* from SeaBIOS */ - -- SMBIOS_TABLE_SET_STR(0, bios_release_date_str, type0.date); -+ SMBIOS_TABLE_SET_STR(0, bios_release_date_str, smbios_type0.date); - - t->bios_rom_size = 0; /* hardcoded in SeaBIOS with FIXME comment */ - - t->bios_characteristics = cpu_to_le64(0x08); /* Not supported */ - t->bios_characteristics_extension_bytes[0] = 0; - t->bios_characteristics_extension_bytes[1] = 0x14; /* TCD/SVVP | VM */ -- if (type0.uefi) { -+ if (smbios_type0.uefi) { - t->bios_characteristics_extension_bytes[1] |= 0x08; /* |= UEFI */ - } - -- if (type0.have_major_minor) { -- t->system_bios_major_release = type0.major; -- t->system_bios_minor_release = type0.minor; -+ if (smbios_type0.have_major_minor) { -+ t->system_bios_major_release = smbios_type0.major; -+ t->system_bios_minor_release = smbios_type0.minor; - } else { - t->system_bios_major_release = 0; - t->system_bios_minor_release = 0; -@@ -758,18 +750,18 @@ static void smbios_build_type_1_table(void) - { - SMBIOS_BUILD_TABLE_PRE(1, T1_BASE, true); /* required */ - -- SMBIOS_TABLE_SET_STR(1, manufacturer_str, type1.manufacturer); -- SMBIOS_TABLE_SET_STR(1, product_name_str, type1.product); -- SMBIOS_TABLE_SET_STR(1, version_str, type1.version); -- SMBIOS_TABLE_SET_STR(1, serial_number_str, type1.serial); -+ SMBIOS_TABLE_SET_STR(1, manufacturer_str, smbios_type1.manufacturer); -+ SMBIOS_TABLE_SET_STR(1, product_name_str, smbios_type1.product); -+ SMBIOS_TABLE_SET_STR(1, version_str, smbios_type1.version); -+ SMBIOS_TABLE_SET_STR(1, serial_number_str, smbios_type1.serial); - if (qemu_uuid_set) { - smbios_encode_uuid(&t->uuid, &qemu_uuid); - } else { - memset(&t->uuid, 0, 16); - } - t->wake_up_type = 0x06; /* power switch */ -- SMBIOS_TABLE_SET_STR(1, sku_number_str, type1.sku); -- SMBIOS_TABLE_SET_STR(1, family_str, type1.family); -+ SMBIOS_TABLE_SET_STR(1, sku_number_str, smbios_type1.sku); -+ SMBIOS_TABLE_SET_STR(1, family_str, smbios_type1.family); - - SMBIOS_BUILD_TABLE_POST; - } -@@ -1184,12 +1176,12 @@ void smbios_set_defaults(const char *manufacturer, const char *product, - * - * We get 'System Manufacturer' and 'Baseboard Manufacturer' - */ -- SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); -- SMBIOS_SET_DEFAULT(type1.product, product); -- SMBIOS_SET_DEFAULT(type1.version, version); -- SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); -+ SMBIOS_SET_DEFAULT(smbios_type1.manufacturer, manufacturer); -+ SMBIOS_SET_DEFAULT(smbios_type1.product, product); -+ SMBIOS_SET_DEFAULT(smbios_type1.version, version); -+ SMBIOS_SET_DEFAULT(smbios_type1.family, "Red Hat Enterprise Linux"); - if (stream_version != NULL) { -- SMBIOS_SET_DEFAULT(type1.sku, stream_version); -+ SMBIOS_SET_DEFAULT(smbios_type1.sku, stream_version); - } - SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); - if (stream_product != NULL) { -@@ -1468,13 +1460,13 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) - } - - if (header->type <= SMBIOS_MAX_TYPE) { -- if (test_bit(header->type, have_fields_bitmap)) { -+ if (test_bit(header->type, smbios_have_fields_bitmap)) { - error_setg(errp, - "can't load type %d struct, fields already specified!", - header->type); - return; - } -- set_bit(header->type, have_binfile_bitmap); -+ set_bit(header->type, smbios_have_binfile_bitmap); - } - - if (header->type == 4) { -@@ -1505,41 +1497,42 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) - return; - } - -- if (test_bit(type, have_binfile_bitmap)) { -+ if (test_bit(type, smbios_have_binfile_bitmap)) { - error_setg(errp, "can't add fields, binary file already loaded!"); - return; - } -- set_bit(type, have_fields_bitmap); -+ set_bit(type, smbios_have_fields_bitmap); - - switch (type) { - case 0: - if (!qemu_opts_validate(opts, qemu_smbios_type0_opts, errp)) { - return; - } -- save_opt(&type0.vendor, opts, "vendor"); -- save_opt(&type0.version, opts, "version"); -- save_opt(&type0.date, opts, "date"); -- type0.uefi = qemu_opt_get_bool(opts, "uefi", false); -+ save_opt(&smbios_type0.vendor, opts, "vendor"); -+ save_opt(&smbios_type0.version, opts, "version"); -+ save_opt(&smbios_type0.date, opts, "date"); -+ smbios_type0.uefi = qemu_opt_get_bool(opts, "uefi", false); - - val = qemu_opt_get(opts, "release"); - if (val) { -- if (sscanf(val, "%hhu.%hhu", &type0.major, &type0.minor) != 2) { -+ if (sscanf(val, "%hhu.%hhu", &smbios_type0.major, -+ &smbios_type0.minor) != 2) { - error_setg(errp, "Invalid release"); - return; - } -- type0.have_major_minor = true; -+ smbios_type0.have_major_minor = true; - } - return; - case 1: - if (!qemu_opts_validate(opts, qemu_smbios_type1_opts, errp)) { - return; - } -- save_opt(&type1.manufacturer, opts, "manufacturer"); -- save_opt(&type1.product, opts, "product"); -- save_opt(&type1.version, opts, "version"); -- save_opt(&type1.serial, opts, "serial"); -- save_opt(&type1.sku, opts, "sku"); -- save_opt(&type1.family, opts, "family"); -+ save_opt(&smbios_type1.manufacturer, opts, "manufacturer"); -+ save_opt(&smbios_type1.product, opts, "product"); -+ save_opt(&smbios_type1.version, opts, "version"); -+ save_opt(&smbios_type1.serial, opts, "serial"); -+ save_opt(&smbios_type1.sku, opts, "sku"); -+ save_opt(&smbios_type1.family, opts, "family"); - - val = qemu_opt_get(opts, "uuid"); - if (val) { -diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index d55018e5e3..333de0d5fc 100644 ---- a/include/hw/firmware/smbios.h -+++ b/include/hw/firmware/smbios.h -@@ -2,6 +2,7 @@ - #define QEMU_SMBIOS_H - - #include "qapi/qapi-types-machine.h" -+#include "qemu/bitmap.h" - - /* - * SMBIOS Support -@@ -16,8 +17,23 @@ - * - */ - -+typedef struct { -+ const char *vendor, *version, *date; -+ bool have_major_minor, uefi; -+ uint8_t major, minor; -+} smbios_type0_t; -+extern smbios_type0_t smbios_type0; -+ -+typedef struct { -+ const char *manufacturer, *product, *version, *serial, *sku, *family; -+ /* uuid is in qemu_uuid */ -+} smbios_type1_t; -+extern smbios_type1_t smbios_type1; - - #define SMBIOS_MAX_TYPE 127 -+extern DECLARE_BITMAP(smbios_have_binfile_bitmap, SMBIOS_MAX_TYPE + 1); -+extern DECLARE_BITMAP(smbios_have_fields_bitmap, SMBIOS_MAX_TYPE + 1); -+ - #define offsetofend(TYPE, MEMBER) \ - (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) - --- -2.39.3 - diff --git a/SOURCES/kvm-string-output-visitor-Fix-pseudo-struct-handling.patch b/SOURCES/kvm-string-output-visitor-Fix-pseudo-struct-handling.patch deleted file mode 100644 index 81ae2f1..0000000 --- a/SOURCES/kvm-string-output-visitor-Fix-pseudo-struct-handling.patch +++ /dev/null @@ -1,190 +0,0 @@ -From c5f9e92cd49a2171a5b0223cafd7fab3f45edb82 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 9 Jan 2024 19:17:17 +0100 -Subject: [PATCH 06/22] string-output-visitor: Fix (pseudo) struct handling - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [2/17] 84e226f161680dd61b6635e213203d062c1aa556 (stefanha/centos-stream-qemu-kvm) - -Commit ff32bb53 tried to get minimal struct support into the string -output visitor by just making it return "". Unfortunately, it -forgot that the caller will still make more visitor calls for the -content of the struct. - -If the struct is contained in a list, such as IOThreadVirtQueueMapping, -in the better case its fields show up as separate list entries. In the -worse case, it contains another list, and the string output visitor -doesn't support nested lists and asserts that this doesn't happen. So as -soon as the optional "vqs" field in IOThreadVirtQueueMapping is -specified, we get a crash. - -This can be reproduced with the following command line: - - echo "info qtree" | ./qemu-system-x86_64 \ - -object iothread,id=t0 \ - -blockdev null-co,node-name=disk \ - -device '{"driver": "virtio-blk-pci", "drive": "disk", - "iothread-vq-mapping": [{"iothread": "t0", "vqs": [0]}]}' \ - -monitor stdio - -Fix the problem by counting the nesting level of structs and ignoring -any visitor calls for values (apart from start/end_struct) while we're -not on the top level. - -Lists nested directly within lists remain unimplemented, as we don't -currently have a use case for them. - -Fixes: ff32bb53476539d352653f4ed56372dced73a388 -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2069 -Reported-by: Aihua Liang -Signed-off-by: Kevin Wolf -Message-ID: <20240109181717.42493-1-kwolf@redhat.com> -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Kevin Wolf -(cherry picked from commit 014b99a8e41c8cd1e895137654b44dec5430122c) -Signed-off-by: Stefan Hajnoczi ---- - qapi/string-output-visitor.c | 46 ++++++++++++++++++++++++++++++++++++ - 1 file changed, 46 insertions(+) - -diff --git a/qapi/string-output-visitor.c b/qapi/string-output-visitor.c -index f0c1dea89e..5115536b15 100644 ---- a/qapi/string-output-visitor.c -+++ b/qapi/string-output-visitor.c -@@ -65,6 +65,7 @@ struct StringOutputVisitor - } range_start, range_end; - GList *ranges; - void *list; /* Only needed for sanity checking the caller */ -+ unsigned int struct_nesting; - }; - - static StringOutputVisitor *to_sov(Visitor *v) -@@ -144,6 +145,10 @@ static bool print_type_int64(Visitor *v, const char *name, int64_t *obj, - StringOutputVisitor *sov = to_sov(v); - GList *l; - -+ if (sov->struct_nesting) { -+ return true; -+ } -+ - switch (sov->list_mode) { - case LM_NONE: - string_output_append(sov, *obj); -@@ -231,6 +236,10 @@ static bool print_type_size(Visitor *v, const char *name, uint64_t *obj, - uint64_t val; - char *out, *psize; - -+ if (sov->struct_nesting) { -+ return true; -+ } -+ - if (!sov->human) { - out = g_strdup_printf("%"PRIu64, *obj); - string_output_set(sov, out); -@@ -250,6 +259,11 @@ static bool print_type_bool(Visitor *v, const char *name, bool *obj, - Error **errp) - { - StringOutputVisitor *sov = to_sov(v); -+ -+ if (sov->struct_nesting) { -+ return true; -+ } -+ - string_output_set(sov, g_strdup(*obj ? "true" : "false")); - return true; - } -@@ -260,6 +274,10 @@ static bool print_type_str(Visitor *v, const char *name, char **obj, - StringOutputVisitor *sov = to_sov(v); - char *out; - -+ if (sov->struct_nesting) { -+ return true; -+ } -+ - if (sov->human) { - out = *obj ? g_strdup_printf("\"%s\"", *obj) : g_strdup(""); - } else { -@@ -273,6 +291,11 @@ static bool print_type_number(Visitor *v, const char *name, double *obj, - Error **errp) - { - StringOutputVisitor *sov = to_sov(v); -+ -+ if (sov->struct_nesting) { -+ return true; -+ } -+ - string_output_set(sov, g_strdup_printf("%.17g", *obj)); - return true; - } -@@ -283,6 +306,10 @@ static bool print_type_null(Visitor *v, const char *name, QNull **obj, - StringOutputVisitor *sov = to_sov(v); - char *out; - -+ if (sov->struct_nesting) { -+ return true; -+ } -+ - if (sov->human) { - out = g_strdup(""); - } else { -@@ -295,6 +322,9 @@ static bool print_type_null(Visitor *v, const char *name, QNull **obj, - static bool start_struct(Visitor *v, const char *name, void **obj, - size_t size, Error **errp) - { -+ StringOutputVisitor *sov = to_sov(v); -+ -+ sov->struct_nesting++; - return true; - } - -@@ -302,6 +332,10 @@ static void end_struct(Visitor *v, void **obj) - { - StringOutputVisitor *sov = to_sov(v); - -+ if (--sov->struct_nesting) { -+ return; -+ } -+ - /* TODO actually print struct fields */ - string_output_set(sov, g_strdup("")); - } -@@ -312,6 +346,10 @@ start_list(Visitor *v, const char *name, GenericList **list, size_t size, - { - StringOutputVisitor *sov = to_sov(v); - -+ if (sov->struct_nesting) { -+ return true; -+ } -+ - /* we can't traverse a list in a list */ - assert(sov->list_mode == LM_NONE); - /* We don't support visits without a list */ -@@ -329,6 +367,10 @@ static GenericList *next_list(Visitor *v, GenericList *tail, size_t size) - StringOutputVisitor *sov = to_sov(v); - GenericList *ret = tail->next; - -+ if (sov->struct_nesting) { -+ return ret; -+ } -+ - if (ret && !ret->next) { - sov->list_mode = LM_END; - } -@@ -339,6 +381,10 @@ static void end_list(Visitor *v, void **obj) - { - StringOutputVisitor *sov = to_sov(v); - -+ if (sov->struct_nesting) { -+ return; -+ } -+ - assert(sov->list == obj); - assert(sov->list_mode == LM_STARTED || - sov->list_mode == LM_END || --- -2.39.3 - diff --git a/SOURCES/kvm-string-output-visitor-show-structs-as-omitted.patch b/SOURCES/kvm-string-output-visitor-show-structs-as-omitted.patch deleted file mode 100644 index f83635d..0000000 --- a/SOURCES/kvm-string-output-visitor-show-structs-as-omitted.patch +++ /dev/null @@ -1,90 +0,0 @@ -From fb2069be402ec1322834c555714f0e993778cc9d Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 12 Dec 2023 08:49:34 -0500 -Subject: [PATCH 05/22] string-output-visitor: show structs as "" - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [1/17] 0c08e8237d28fbdbdbc7576d4c17d2eeeb413c2a (stefanha/centos-stream-qemu-kvm) - -StringOutputVisitor crashes when it visits a struct because -->start_struct() is NULL. - -Show "" instead of crashing. This is necessary because the -virtio-blk-pci iothread-vq-mapping parameter that I'd like to introduce -soon is a list of IOThreadMapping structs. - -This patch is a quick fix to solve the crash, but the long-term solution -is replacing StringOutputVisitor with something that can handle the full -gamut of values in QEMU. - -Cc: Markus Armbruster -Signed-off-by: Stefan Hajnoczi -Message-ID: <20231212134934.500289-1-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Reviewed-by: Markus Armbruster -Signed-off-by: Kevin Wolf -(cherry picked from commit ff32bb53476539d352653f4ed56372dced73a388) -Signed-off-by: Stefan Hajnoczi ---- - include/qapi/string-output-visitor.h | 6 +++--- - qapi/string-output-visitor.c | 16 ++++++++++++++++ - 2 files changed, 19 insertions(+), 3 deletions(-) - -diff --git a/include/qapi/string-output-visitor.h b/include/qapi/string-output-visitor.h -index 268dfe9986..b1ee473b30 100644 ---- a/include/qapi/string-output-visitor.h -+++ b/include/qapi/string-output-visitor.h -@@ -26,9 +26,9 @@ typedef struct StringOutputVisitor StringOutputVisitor; - * If everything else succeeds, pass @result to visit_complete() to - * collect the result of the visit. - * -- * The string output visitor does not implement support for visiting -- * QAPI structs, alternates, null, or arbitrary QTypes. It also -- * requires a non-null list argument to visit_start_list(). -+ * The string output visitor does not implement support for alternates, null, -+ * or arbitrary QTypes. Struct fields are not shown. It also requires a -+ * non-null list argument to visit_start_list(). - */ - Visitor *string_output_visitor_new(bool human, char **result); - -diff --git a/qapi/string-output-visitor.c b/qapi/string-output-visitor.c -index c0cb72dbe4..f0c1dea89e 100644 ---- a/qapi/string-output-visitor.c -+++ b/qapi/string-output-visitor.c -@@ -292,6 +292,20 @@ static bool print_type_null(Visitor *v, const char *name, QNull **obj, - return true; - } - -+static bool start_struct(Visitor *v, const char *name, void **obj, -+ size_t size, Error **errp) -+{ -+ return true; -+} -+ -+static void end_struct(Visitor *v, void **obj) -+{ -+ StringOutputVisitor *sov = to_sov(v); -+ -+ /* TODO actually print struct fields */ -+ string_output_set(sov, g_strdup("")); -+} -+ - static bool - start_list(Visitor *v, const char *name, GenericList **list, size_t size, - Error **errp) -@@ -379,6 +393,8 @@ Visitor *string_output_visitor_new(bool human, char **result) - v->visitor.type_str = print_type_str; - v->visitor.type_number = print_type_number; - v->visitor.type_null = print_type_null; -+ v->visitor.start_struct = start_struct; -+ v->visitor.end_struct = end_struct; - v->visitor.start_list = start_list; - v->visitor.next_list = next_list; - v->visitor.end_list = end_list; --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-Add-new-CPU-model-SierraForest.patch b/SOURCES/kvm-target-i386-Add-new-CPU-model-SierraForest.patch new file mode 100644 index 0000000..c72f290 --- /dev/null +++ b/SOURCES/kvm-target-i386-Add-new-CPU-model-SierraForest.patch @@ -0,0 +1,215 @@ +From d9595fecd03c9a69ac562e3f240d50b2fa8d14a4 Mon Sep 17 00:00:00 2001 +From: Tao Su +Date: Wed, 20 Mar 2024 10:10:44 +0800 +Subject: [PATCH 006/100] target/i386: Add new CPU model SierraForest +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [6/91] 4bc71f82c258db46569a7e08965d1d358b19416c (bonzini/rhel-qemu-kvm) + +According to table 1-2 in Intel Architecture Instruction Set Extensions and +Future Features (rev 051) [1], SierraForest has the following new features +which have already been virtualized: + +- CMPCCXADD CPUID.(EAX=7,ECX=1):EAX[bit 7] +- AVX-IFMA CPUID.(EAX=7,ECX=1):EAX[bit 23] +- AVX-VNNI-INT8 CPUID.(EAX=7,ECX=1):EDX[bit 4] +- AVX-NE-CONVERT CPUID.(EAX=7,ECX=1):EDX[bit 5] + +Add above features to new CPU model SierraForest. Comparing with GraniteRapids +CPU model, SierraForest bare-metal removes the following features: + +- HLE CPUID.(EAX=7,ECX=0):EBX[bit 4] +- RTM CPUID.(EAX=7,ECX=0):EBX[bit 11] +- AVX512F CPUID.(EAX=7,ECX=0):EBX[bit 16] +- AVX512DQ CPUID.(EAX=7,ECX=0):EBX[bit 17] +- AVX512_IFMA CPUID.(EAX=7,ECX=0):EBX[bit 21] +- AVX512CD CPUID.(EAX=7,ECX=0):EBX[bit 28] +- AVX512BW CPUID.(EAX=7,ECX=0):EBX[bit 30] +- AVX512VL CPUID.(EAX=7,ECX=0):EBX[bit 31] +- AVX512_VBMI CPUID.(EAX=7,ECX=0):ECX[bit 1] +- AVX512_VBMI2 CPUID.(EAX=7,ECX=0):ECX[bit 6] +- AVX512_VNNI CPUID.(EAX=7,ECX=0):ECX[bit 11] +- AVX512_BITALG CPUID.(EAX=7,ECX=0):ECX[bit 12] +- AVX512_VPOPCNTDQ CPUID.(EAX=7,ECX=0):ECX[bit 14] +- LA57 CPUID.(EAX=7,ECX=0):ECX[bit 16] +- TSXLDTRK CPUID.(EAX=7,ECX=0):EDX[bit 16] +- AMX-BF16 CPUID.(EAX=7,ECX=0):EDX[bit 22] +- AVX512_FP16 CPUID.(EAX=7,ECX=0):EDX[bit 23] +- AMX-TILE CPUID.(EAX=7,ECX=0):EDX[bit 24] +- AMX-INT8 CPUID.(EAX=7,ECX=0):EDX[bit 25] +- AVX512_BF16 CPUID.(EAX=7,ECX=1):EAX[bit 5] +- fast zero-length MOVSB CPUID.(EAX=7,ECX=1):EAX[bit 10] +- fast short CMPSB, SCASB CPUID.(EAX=7,ECX=1):EAX[bit 12] +- AMX-FP16 CPUID.(EAX=7,ECX=1):EAX[bit 21] +- PREFETCHI CPUID.(EAX=7,ECX=1):EDX[bit 14] +- XFD CPUID.(EAX=0xD,ECX=1):EAX[bit 4] +- EPT_PAGE_WALK_LENGTH_5 VMX_EPT_VPID_CAP(0x48c)[bit 7] + +Add all features of GraniteRapids CPU model except above features to +SierraForest CPU model. + +SierraForest doesn’t support TSX and RTM but supports TAA_NO. When RTM is +not enabled in host, KVM will not report TAA_NO. So, just don't include +TAA_NO in SierraForest CPU model. + +[1] https://cdrdv2.intel.com/v1/dl/getContent/671368 + +Reviewed-by: Zhao Liu +Reviewed-by: Xiaoyao Li +Signed-off-by: Tao Su +Message-ID: <20240320021044.508263-1-tao1.su@linux.intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 6e82d3b6220777667968a04c87e1667f164ebe88) +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 126 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 0aa88d9b48..efbadc3ed7 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -4127,6 +4127,132 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ }, + }, + }, ++ { ++ .name = "SierraForest", ++ .level = 0x23, ++ .vendor = CPUID_VENDOR_INTEL, ++ .family = 6, ++ .model = 175, ++ .stepping = 0, ++ /* ++ * please keep the ascending order so that we can have a clear view of ++ * bit position of each feature. ++ */ ++ .features[FEAT_1_EDX] = ++ CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | ++ CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | ++ CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | ++ CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR | ++ CPUID_SSE | CPUID_SSE2, ++ .features[FEAT_1_ECX] = ++ CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 | ++ CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 | ++ CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | ++ CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES | ++ CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND, ++ .features[FEAT_8000_0001_EDX] = ++ CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | ++ CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, ++ .features[FEAT_8000_0001_ECX] = ++ CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, ++ .features[FEAT_8000_0008_EBX] = ++ CPUID_8000_0008_EBX_WBNOINVD, ++ .features[FEAT_7_0_EBX] = ++ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | ++ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | ++ CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | ++ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | ++ CPUID_7_0_EBX_SHA_NI, ++ .features[FEAT_7_0_ECX] = ++ CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_GFNI | ++ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | ++ CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT, ++ .features[FEAT_7_0_EDX] = ++ CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE | ++ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_ARCH_CAPABILITIES | ++ CPUID_7_0_EDX_SPEC_CTRL_SSBD, ++ .features[FEAT_ARCH_CAPABILITIES] = ++ MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | ++ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | ++ MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_SBDR_SSDP_NO | ++ MSR_ARCH_CAP_FBSDP_NO | MSR_ARCH_CAP_PSDP_NO | ++ MSR_ARCH_CAP_PBRSB_NO, ++ .features[FEAT_XSAVE] = ++ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | ++ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, ++ .features[FEAT_6_EAX] = ++ CPUID_6_EAX_ARAT, ++ .features[FEAT_7_1_EAX] = ++ CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_CMPCCXADD | ++ CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_AVX_IFMA, ++ .features[FEAT_7_1_EDX] = ++ CPUID_7_1_EDX_AVX_VNNI_INT8 | CPUID_7_1_EDX_AVX_NE_CONVERT, ++ .features[FEAT_7_2_EDX] = ++ CPUID_7_2_EDX_MCDT_NO, ++ .features[FEAT_VMX_BASIC] = ++ MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = ++ MSR_VMX_EPT_EXECONLY | MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | ++ MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB | ++ MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | ++ MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = ++ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_VMWRITE_VMEXIT, ++ .features[FEAT_VMX_PINBASED_CTLS] = ++ VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING | ++ VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER | ++ VMX_PIN_BASED_POSTED_INTR, ++ .features[FEAT_VMX_PROCBASED_CTLS] = ++ VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING | ++ VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING | ++ VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING | ++ VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC | ++ VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING | ++ VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ VMX_SECONDARY_EXEC_RDRAND_EXITING | ++ VMX_SECONDARY_EXEC_ENABLE_INVPCID | ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | ++ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML | ++ VMX_SECONDARY_EXEC_XSAVES, ++ .features[FEAT_VMX_VMFUNC] = ++ MSR_VMX_VMFUNC_EPT_SWITCHING, ++ .xlevel = 0x80000008, ++ .model_id = "Intel Xeon Processor (SierraForest)", ++ .versions = (X86CPUVersionDefinition[]) { ++ { .version = 1 }, ++ { /* end of list */ }, ++ }, ++ }, + { + .name = "Denverton", + .level = 21, +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Export-RFDS-bit-to-guests.patch b/SOURCES/kvm-target-i386-Export-RFDS-bit-to-guests.patch new file mode 100644 index 0000000..74de391 --- /dev/null +++ b/SOURCES/kvm-target-i386-Export-RFDS-bit-to-guests.patch @@ -0,0 +1,50 @@ +From ae6229a3e45318b1101291b99a0e894399dcb1db Mon Sep 17 00:00:00 2001 +From: Pawan Gupta +Date: Wed, 13 Mar 2024 07:53:23 -0700 +Subject: [PATCH 007/100] target/i386: Export RFDS bit to guests + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [7/91] 7eb6cae8821a2e953d3ff2033fa2e973011ad771 (bonzini/rhel-qemu-kvm) + +Register File Data Sampling (RFDS) is a CPU side-channel vulnerability +that may expose stale register value. CPUs that set RFDS_NO bit in MSR +IA32_ARCH_CAPABILITIES indicate that they are not vulnerable to RFDS. +Similarly, RFDS_CLEAR indicates that CPU is affected by RFDS, and has +the microcode to help mitigate RFDS. + +Make RFDS_CLEAR and RFDS_NO bits available to guests. + +Signed-off-by: Pawan Gupta +Reviewed-by: Xiaoyao Li +Reviewed-by: Zhao Liu +Message-ID: <9a38877857392b5c2deae7e7db1b170d15510314.1710341348.git.pawan.kumar.gupta@linux.intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 41bdd9812863c150284a9339a048ed88c40f4df7) +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index efbadc3ed7..489c853b42 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1158,8 +1158,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, "sbdr-ssdp-no", "fbsdp-no", "psdp-no", + NULL, "fb-clear", NULL, NULL, + NULL, NULL, NULL, NULL, +- "pbrsb-no", NULL, "gds-no", NULL, +- NULL, NULL, NULL, NULL, ++ "pbrsb-no", NULL, "gds-no", "rfds-no", ++ "rfds-clear", NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_ARCH_CAPABILITIES, +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch b/SOURCES/kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch new file mode 100644 index 0000000..f37dbc2 --- /dev/null +++ b/SOURCES/kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch @@ -0,0 +1,192 @@ +From 4a811f54cdb3c9329f193ea43c76ed4eb1b14c19 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 19 Mar 2024 15:29:33 +0100 +Subject: [PATCH 022/100] target/i386: Implement mc->kvm_type() to get VM type + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [22/91] d58cf6ead2de37852adc15c7642166904403453f (bonzini/rhel-qemu-kvm) + +KVM is introducing a new API to create confidential guests, which +will be used by TDX and SEV-SNP but is also available for SEV and +SEV-ES. The API uses the VM type argument to KVM_CREATE_VM to +identify which confidential computing technology to use. + +Since there are no other expected uses of VM types, delegate +mc->kvm_type() for x86 boards to the confidential-guest-support +object pointed to by ms->cgs. + +For example, if a sev-guest object is specified to confidential-guest-support, +like, + + qemu -machine ...,confidential-guest-support=sev0 \ + -object sev-guest,id=sev0,... + +it will check if a VM type KVM_X86_SEV_VM or KVM_X86_SEV_ES_VM +is supported, and if so use them together with the KVM_SEV_INIT2 +function of the KVM_MEMORY_ENCRYPT_OP ioctl. If not, it will fall back to +KVM_SEV_INIT and KVM_SEV_ES_INIT. + +This is a preparatory work towards TDX and SEV-SNP support, but it +will also enable support for VMSA features such as DebugSwap, which +are only available via KVM_SEV_INIT2. + +Co-developed-by: Xiaoyao Li +Signed-off-by: Xiaoyao Li +Signed-off-by: Paolo Bonzini +(cherry picked from commit ee88612df1e8d6c2bfec75bff3f9482ea44acec1) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 11 ++++++++ + target/i386/confidential-guest.h | 19 ++++++++++++++ + target/i386/kvm/kvm.c | 44 ++++++++++++++++++++++++++++++++ + target/i386/kvm/kvm_i386.h | 2 ++ + 4 files changed, 76 insertions(+) + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 84a4801977..3d5b51e92d 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1381,6 +1381,16 @@ static void machine_set_sgx_epc(Object *obj, Visitor *v, const char *name, + qapi_free_SgxEPCList(list); + } + ++static int x86_kvm_type(MachineState *ms, const char *vm_type) ++{ ++ /* ++ * No x86 machine has a kvm-type property. If one is added that has ++ * it, it should call kvm_get_vm_type() directly or not use it at all. ++ */ ++ assert(vm_type == NULL); ++ return kvm_enabled() ? kvm_get_vm_type(ms) : 0; ++} ++ + static void x86_machine_initfn(Object *obj) + { + X86MachineState *x86ms = X86_MACHINE(obj); +@@ -1405,6 +1415,7 @@ static void x86_machine_class_init(ObjectClass *oc, void *data) + mc->cpu_index_to_instance_props = x86_cpu_index_to_props; + mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; + mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; ++ mc->kvm_type = x86_kvm_type; + x86mc->save_tsc_khz = true; + x86mc->fwcfg_dma_enabled = true; + nc->nmi_monitor_handler = x86_nmi; +diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h +index ca12d5a8fb..532e172a60 100644 +--- a/target/i386/confidential-guest.h ++++ b/target/i386/confidential-guest.h +@@ -36,5 +36,24 @@ struct X86ConfidentialGuest { + struct X86ConfidentialGuestClass { + /* */ + ConfidentialGuestSupportClass parent; ++ ++ /* */ ++ int (*kvm_type)(X86ConfidentialGuest *cg); + }; ++ ++/** ++ * x86_confidential_guest_kvm_type: ++ * ++ * Calls #X86ConfidentialGuestClass.unplug callback of @plug_handler. ++ */ ++static inline int x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg) ++{ ++ X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); ++ ++ if (klass->kvm_type) { ++ return klass->kvm_type(cg); ++ } else { ++ return 0; ++ } ++} + #endif +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index a12207a8ee..1f0ab12c2e 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -31,6 +31,7 @@ + #include "sysemu/kvm_int.h" + #include "sysemu/runstate.h" + #include "kvm_i386.h" ++#include "../confidential-guest.h" + #include "sev.h" + #include "xen-emu.h" + #include "hyperv.h" +@@ -161,6 +162,49 @@ static KVMMSRHandlers msr_handlers[KVM_MSR_FILTER_MAX_RANGES]; + static RateLimit bus_lock_ratelimit_ctrl; + static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value); + ++static const char *vm_type_name[] = { ++ [KVM_X86_DEFAULT_VM] = "default", ++}; ++ ++bool kvm_is_vm_type_supported(int type) ++{ ++ uint32_t machine_types; ++ ++ /* ++ * old KVM doesn't support KVM_CAP_VM_TYPES but KVM_X86_DEFAULT_VM ++ * is always supported ++ */ ++ if (type == KVM_X86_DEFAULT_VM) { ++ return true; ++ } ++ ++ machine_types = kvm_check_extension(KVM_STATE(current_machine->accelerator), ++ KVM_CAP_VM_TYPES); ++ return !!(machine_types & BIT(type)); ++} ++ ++int kvm_get_vm_type(MachineState *ms) ++{ ++ int kvm_type = KVM_X86_DEFAULT_VM; ++ ++ if (ms->cgs) { ++ if (!object_dynamic_cast(OBJECT(ms->cgs), TYPE_X86_CONFIDENTIAL_GUEST)) { ++ error_report("configuration type %s not supported for x86 guests", ++ object_get_typename(OBJECT(ms->cgs))); ++ exit(1); ++ } ++ kvm_type = x86_confidential_guest_kvm_type( ++ X86_CONFIDENTIAL_GUEST(ms->cgs)); ++ } ++ ++ if (!kvm_is_vm_type_supported(kvm_type)) { ++ error_report("vm-type %s not supported by KVM", vm_type_name[kvm_type]); ++ exit(1); ++ } ++ ++ return kvm_type; ++} ++ + bool kvm_has_smm(void) + { + return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM); +diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h +index 30fedcffea..6b44844d95 100644 +--- a/target/i386/kvm/kvm_i386.h ++++ b/target/i386/kvm/kvm_i386.h +@@ -37,6 +37,7 @@ bool kvm_hv_vpindex_settable(void); + bool kvm_enable_sgx_provisioning(KVMState *s); + bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); + ++int kvm_get_vm_type(MachineState *ms); + void kvm_arch_reset_vcpu(X86CPU *cs); + void kvm_arch_after_reset_vcpu(X86CPU *cpu); + void kvm_arch_do_init_vcpu(X86CPU *cs); +@@ -49,6 +50,7 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); + + #ifdef CONFIG_KVM + ++bool kvm_is_vm_type_supported(int type); + bool kvm_has_adjust_clock_stable(void); + bool kvm_has_exception_payload(void); + void kvm_synchronize_all_tsc(void); +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch b/SOURCES/kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch new file mode 100644 index 0000000..9844da7 --- /dev/null +++ b/SOURCES/kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch @@ -0,0 +1,68 @@ +From fe60f8d47b6e14f17dd6c06b03bd00e6bcdbeefb Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 20 Mar 2024 17:31:38 +0800 +Subject: [PATCH 005/100] target/i386: Introduce Icelake-Server-v7 to enable + TSX + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [5/91] 66d865899e0d510b6c86763422d6b28b904b208a (bonzini/rhel-qemu-kvm) + +When start L2 guest with both L1/L2 using Icelake-Server-v3 or above, +QEMU reports below warning: + +"warning: host doesn't support requested feature: MSR(10AH).taa-no [bit 8]" + +Reason is QEMU Icelake-Server-v3 has TSX feature disabled but enables taa-no +bit. It's meaningless that TSX isn't supported but still claim TSX is secure. +So L1 KVM doesn't expose taa-no to L2 if TSX is unsupported, then starting L2 +triggers the warning. + +Fix it by introducing a new version Icelake-Server-v7 which has both TSX +and taa-no features. Then guest can use TSX securely when it see taa-no. + +This matches the production Icelake which supports TSX and isn't susceptible +to TSX Async Abort (TAA) vulnerabilities, a.k.a, taa-no. + +Ideally, TSX should have being enabled together with taa-no since v3, but for +compatibility, we'd better to add v7 to enable it. + +Fixes: d965dc35592d ("target/i386: Add ARCH_CAPABILITIES related bits into Icelake-Server CPU model") +Tested-by: Xiangfei Ma +Signed-off-by: Zhenzhong Duan +Message-ID: <20240320093138.80267-2-zhenzhong.duan@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c895fa54e3060c5ac6f3888dce96c9b78626072b) +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index a7f71422ea..0aa88d9b48 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3840,6 +3840,16 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + }, + }, ++ { ++ .version = 7, ++ .note = "TSX, taa-no", ++ .props = (PropValue[]) { ++ /* Restore TSX features removed by -v2 above */ ++ { "hle", "on" }, ++ { "rtm", "on" }, ++ { /* end of list */ } ++ }, ++ }, + { /* end of list */ } + } + }, +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch b/SOURCES/kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch new file mode 100644 index 0000000..ace0367 --- /dev/null +++ b/SOURCES/kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch @@ -0,0 +1,49 @@ +From 070dda07559a7488c62fc80a8c79e8baaee125eb Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 3 Jul 2024 10:37:23 +0200 +Subject: [PATCH 087/100] target/i386: SEV: fix formatting of CPUID mismatch + message + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [87/91] 36bc2cc80d5ffc1ceeb1836540660ff45885a818 (bonzini/rhel-qemu-kvm) + +Fixes: 70943ad8e4d ("i386/sev: Add support for SNP CPUID validation", 2024-06-05) +Signed-off-by: Paolo Bonzini +(cherry picked from commit f45ef010e19fe86314bffd5d5c9d5d77f4ce8103) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index c40562dce3..37de80adc7 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -839,7 +839,7 @@ sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old, + size_t i; + + if (old->count != new->count) { +- error_report("SEV-SNP: CPUID validation failed due to count mismatch," ++ error_report("SEV-SNP: CPUID validation failed due to count mismatch, " + "provided: %d, expected: %d", old->count, new->count); + return; + } +@@ -851,8 +851,8 @@ sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old, + new_func = &new->entries[i]; + + if (memcmp(old_func, new_func, sizeof(SnpCpuidFunc))) { +- error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x" +- "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x" ++ error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x, " ++ "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x, " + "expected: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x", + old_func->eax_in, old_func->ecx_in, + old_func->eax, old_func->ebx, old_func->ecx, old_func->edx, +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch b/SOURCES/kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch new file mode 100644 index 0000000..3030d59 --- /dev/null +++ b/SOURCES/kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch @@ -0,0 +1,42 @@ +From 37b7e2185f1d23dd5f5a95b545b8d760492915ed Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 2 Aug 2024 01:43:37 +0200 +Subject: [PATCH 091/100] target/i386: SEV: fix mismatch in vcek-disabled + property name + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [91/91] 3a8abc4a0547b985cb79cef29bd3e8350d3d4b48 (bonzini/rhel-qemu-kvm) + +The vcek-disabled property of the sev-snp-guest object is misspelled +vcek-required (which I suppose would use the opposite polarity) in +the call to object_class_property_add_bool(). Fix it. + +Reported-by: Zixi Chen +Reviewed-by: Pankaj Gupta +Signed-off-by: Paolo Bonzini +(cherry picked from commit d4392415c328f83b2e30517a3561be523874f441) +--- + target/i386/sev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index b921defb63..aed565dbe8 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -2378,7 +2378,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + object_class_property_add_bool(oc, "author-key-enabled", + sev_snp_guest_get_author_key_enabled, + sev_snp_guest_set_author_key_enabled); +- object_class_property_add_bool(oc, "vcek-required", ++ object_class_property_add_bool(oc, "vcek-disabled", + sev_snp_guest_get_vcek_disabled, + sev_snp_guest_set_vcek_disabled); + object_class_property_add_str(oc, "host-data", +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch b/SOURCES/kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch new file mode 100644 index 0000000..7c17e53 --- /dev/null +++ b/SOURCES/kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch @@ -0,0 +1,146 @@ +From 6bb738fb90a3a1221ae35596b3d03a17e0b1c34d Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 19 Mar 2024 15:30:25 +0100 +Subject: [PATCH 023/100] target/i386: SEV: use KVM_SEV_INIT2 if possible + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [23/91] 9579d772ae5124a94c6b1e3a4566bf3470d2bc8f (bonzini/rhel-qemu-kvm) + +Implement support for the KVM_X86_SEV_VM and KVM_X86_SEV_ES_VM virtual +machine types, and the KVM_SEV_INIT2 function of KVM_MEMORY_ENCRYPT_OP. + +These replace the KVM_SEV_INIT and KVM_SEV_ES_INIT functions, and have +several advantages: + +- sharing the initialization sequence with SEV-SNP and TDX + +- allowing arguments including the set of desired VMSA features + +- protection against invalid use of KVM_GET/SET_* ioctls for guests + with encrypted state + +If the KVM_X86_SEV_VM and KVM_X86_SEV_ES_VM types are not supported, +fall back to KVM_SEV_INIT and KVM_SEV_ES_INIT (which use the +default x86 VM type). + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 663e2f443e5722370708ce2f4c27d94a2087d2d3) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 2 ++ + target/i386/sev.c | 41 +++++++++++++++++++++++++++++++++++++---- + 2 files changed, 39 insertions(+), 4 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 1f0ab12c2e..408568d053 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -164,6 +164,8 @@ static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value); + + static const char *vm_type_name[] = { + [KVM_X86_DEFAULT_VM] = "default", ++ [KVM_X86_SEV_VM] = "SEV", ++ [KVM_X86_SEV_ES_VM] = "SEV-ES", + }; + + bool kvm_is_vm_type_supported(int type) +diff --git a/target/i386/sev.c b/target/i386/sev.c +index ebe36d4c10..9dab4060b8 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -26,6 +26,7 @@ + #include "qemu/error-report.h" + #include "crypto/hash.h" + #include "sysemu/kvm.h" ++#include "kvm/kvm_i386.h" + #include "sev.h" + #include "sysemu/sysemu.h" + #include "sysemu/runstate.h" +@@ -56,6 +57,8 @@ OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) + struct SevGuestState { + X86ConfidentialGuest parent_obj; + ++ int kvm_type; ++ + /* configuration parameters */ + char *sev_device; + uint32_t policy; +@@ -850,6 +853,26 @@ sev_vm_state_change(void *opaque, bool running, RunState state) + } + } + ++static int sev_kvm_type(X86ConfidentialGuest *cg) ++{ ++ SevGuestState *sev = SEV_GUEST(cg); ++ int kvm_type; ++ ++ if (sev->kvm_type != -1) { ++ goto out; ++ } ++ ++ kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; ++ if (kvm_is_vm_type_supported(kvm_type)) { ++ sev->kvm_type = kvm_type; ++ } else { ++ sev->kvm_type = KVM_X86_DEFAULT_VM; ++ } ++ ++out: ++ return sev->kvm_type; ++} ++ + static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + SevGuestState *sev = SEV_GUEST(cgs); +@@ -929,13 +952,19 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + __func__); + goto err; + } +- cmd = KVM_SEV_ES_INIT; +- } else { +- cmd = KVM_SEV_INIT; + } + + trace_kvm_sev_init(); +- ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); ++ if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev)) == KVM_X86_DEFAULT_VM) { ++ cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; ++ ++ ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); ++ } else { ++ struct kvm_sev_init args = { 0 }; ++ ++ ret = sev_ioctl(sev->sev_fd, KVM_SEV_INIT2, &args, &fw_error); ++ } ++ + if (ret) { + error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); +@@ -1327,8 +1356,10 @@ static void + sev_guest_class_init(ObjectClass *oc, void *data) + { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); ++ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->kvm_init = sev_kvm_init; ++ x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "sev-device", + sev_guest_get_sev_device, +@@ -1357,6 +1388,8 @@ sev_guest_instance_init(Object *obj) + { + SevGuestState *sev = SEV_GUEST(obj); + ++ sev->kvm_type = -1; ++ + sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); + sev->policy = DEFAULT_GUEST_POLICY; + object_property_add_uint32_ptr(obj, "policy", &sev->policy, +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-add-guest-phys-bits-cpu-property.patch b/SOURCES/kvm-target-i386-add-guest-phys-bits-cpu-property.patch new file mode 100644 index 0000000..cd41279 --- /dev/null +++ b/SOURCES/kvm-target-i386-add-guest-phys-bits-cpu-property.patch @@ -0,0 +1,124 @@ +From 090c64ea622534ff2ae6c9b66cdf0b1ddb58bf26 Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Mon, 18 Mar 2024 16:53:36 +0100 +Subject: [PATCH 002/100] target/i386: add guest-phys-bits cpu property + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [2/91] 6603e842012dc484e1f571ea0a77b59095f37003 (bonzini/rhel-qemu-kvm) + +Allows to set guest-phys-bits (cpuid leaf 80000008, eax[23:16]) +via -cpu $model,guest-phys-bits=$nr. + +Signed-off-by: Gerd Hoffmann +Message-ID: <20240318155336.156197-3-kraxel@redhat.com> +Reviewed-by: Zhao Liu +Signed-off-by: Paolo Bonzini +(cherry picked from commit 513ba32dccc659c80722b3a43233b26eaa50309a) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 2 ++ + target/i386/cpu.c | 22 ++++++++++++++++++++++ + target/i386/cpu.h | 8 ++++++++ + 3 files changed, 32 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 648762d908..b9fde3cec1 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -349,6 +349,8 @@ GlobalProperty pc_rhel_compat[] = { + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + + GlobalProperty pc_rhel_9_5_compat[] = { ++ /* pc_rhel_9_5_compat from pc_compat_pc_9_0 (backported from 9.1) */ ++ { TYPE_X86_CPU, "guest-phys-bits", "0" }, + }; + const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat); + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index be7b0663cd..a7f71422ea 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6591,6 +6591,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) { + /* 64 bit processor */ + *eax |= (cpu_x86_virtual_addr_width(env) << 8); ++ *eax |= (cpu->guest_phys_bits << 16); + } + *ebx = env->features[FEAT_8000_0008_EBX]; + if (cs->nr_cores * cs->nr_threads > 1) { +@@ -7350,6 +7351,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + goto out; + } + ++ if (cpu->guest_phys_bits == -1) { ++ /* ++ * If it was not set by the user, or by the accelerator via ++ * cpu_exec_realizefn, clear. ++ */ ++ cpu->guest_phys_bits = 0; ++ } ++ + if (cpu->ucode_rev == 0) { + /* + * The default is the same as KVM's. Note that this check +@@ -7400,6 +7409,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + if (cpu->phys_bits == 0) { + cpu->phys_bits = TCG_PHYS_ADDR_BITS; + } ++ if (cpu->guest_phys_bits && ++ (cpu->guest_phys_bits > cpu->phys_bits || ++ cpu->guest_phys_bits < 32)) { ++ error_setg(errp, "guest-phys-bits should be between 32 and %u " ++ " (but is %u)", ++ cpu->phys_bits, cpu->guest_phys_bits); ++ return; ++ } + } else { + /* For 32 bit systems don't use the user set value, but keep + * phys_bits consistent with what we tell the guest. +@@ -7408,6 +7425,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + error_setg(errp, "phys-bits is not user-configurable in 32 bit"); + return; + } ++ if (cpu->guest_phys_bits != 0) { ++ error_setg(errp, "guest-phys-bits is not user-configurable in 32 bit"); ++ return; ++ } + + if (env->features[FEAT_1_EDX] & (CPUID_PSE36 | CPUID_PAE)) { + cpu->phys_bits = 36; +@@ -7908,6 +7929,7 @@ static Property x86_cpu_properties[] = { + DEFINE_PROP_BOOL("x-force-features", X86CPU, force_features, false), + DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), + DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0), ++ DEFINE_PROP_UINT32("guest-phys-bits", X86CPU, guest_phys_bits, -1), + DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false), + DEFINE_PROP_UINT8("host-phys-bits-limit", X86CPU, host_phys_bits_limit, 0), + DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true), +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 6b05738079..6112e27bfd 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -2027,6 +2027,14 @@ struct ArchCPU { + /* Number of physical address bits supported */ + uint32_t phys_bits; + ++ /* ++ * Number of guest physical address bits available. Usually this is ++ * identical to host physical address bits. With NPT or EPT 4-level ++ * paging, guest physical address space might be restricted to 48 bits ++ * even if the host cpu supports more physical address bits. ++ */ ++ uint32_t guest_phys_bits; ++ + /* in order to simplify APIC support, we leave this pointer to the + user */ + struct DeviceState *apic_state; +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-introduce-x86-confidential-guest.patch b/SOURCES/kvm-target-i386-introduce-x86-confidential-guest.patch new file mode 100644 index 0000000..dec3220 --- /dev/null +++ b/SOURCES/kvm-target-i386-introduce-x86-confidential-guest.patch @@ -0,0 +1,161 @@ +From 0573fcd1775b6613127b1906d59d02e65f7519f3 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 18 Mar 2024 11:07:43 -0400 +Subject: [PATCH 021/100] target/i386: introduce x86-confidential-guest + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [21/91] e86d3bcde7e1c2fa1ba8c9bc83e02033644f1ac0 (bonzini/rhel-qemu-kvm) + +Introduce a common superclass for x86 confidential guest implementations. +It will extend ConfidentialGuestSupportClass with a method that provides +the VM type to be passed to KVM_CREATE_VM. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit d82e9c843d662f13821026618aba936eda31a6c0) +Signed-off-by: Paolo Bonzini +--- + target/i386/confidential-guest.c | 33 ++++++++++++++++++++++++++ + target/i386/confidential-guest.h | 40 ++++++++++++++++++++++++++++++++ + target/i386/meson.build | 2 +- + target/i386/sev.c | 6 ++--- + 4 files changed, 77 insertions(+), 4 deletions(-) + create mode 100644 target/i386/confidential-guest.c + create mode 100644 target/i386/confidential-guest.h + +diff --git a/target/i386/confidential-guest.c b/target/i386/confidential-guest.c +new file mode 100644 +index 0000000000..b3727845ad +--- /dev/null ++++ b/target/i386/confidential-guest.c +@@ -0,0 +1,33 @@ ++/* ++ * QEMU Confidential Guest support ++ * ++ * Copyright (C) 2024 Red Hat, Inc. ++ * ++ * Authors: ++ * Paolo Bonzini ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or ++ * later. See the COPYING file in the top-level directory. ++ * ++ */ ++ ++#include "qemu/osdep.h" ++ ++#include "confidential-guest.h" ++ ++OBJECT_DEFINE_ABSTRACT_TYPE(X86ConfidentialGuest, ++ x86_confidential_guest, ++ X86_CONFIDENTIAL_GUEST, ++ CONFIDENTIAL_GUEST_SUPPORT) ++ ++static void x86_confidential_guest_class_init(ObjectClass *oc, void *data) ++{ ++} ++ ++static void x86_confidential_guest_init(Object *obj) ++{ ++} ++ ++static void x86_confidential_guest_finalize(Object *obj) ++{ ++} +diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h +new file mode 100644 +index 0000000000..ca12d5a8fb +--- /dev/null ++++ b/target/i386/confidential-guest.h +@@ -0,0 +1,40 @@ ++/* ++ * x86-specific confidential guest methods. ++ * ++ * Copyright (c) 2024 Red Hat Inc. ++ * ++ * Authors: ++ * Paolo Bonzini ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++#ifndef TARGET_I386_CG_H ++#define TARGET_I386_CG_H ++ ++#include "qom/object.h" ++ ++#include "exec/confidential-guest-support.h" ++ ++#define TYPE_X86_CONFIDENTIAL_GUEST "x86-confidential-guest" ++ ++OBJECT_DECLARE_TYPE(X86ConfidentialGuest, ++ X86ConfidentialGuestClass, ++ X86_CONFIDENTIAL_GUEST) ++ ++struct X86ConfidentialGuest { ++ /* */ ++ ConfidentialGuestSupport parent_obj; ++}; ++ ++/** ++ * X86ConfidentialGuestClass: ++ * ++ * Class to be implemented by confidential-guest-support concrete objects ++ * for the x86 target. ++ */ ++struct X86ConfidentialGuestClass { ++ /* */ ++ ConfidentialGuestSupportClass parent; ++}; ++#endif +diff --git a/target/i386/meson.build b/target/i386/meson.build +index 7c74bfa859..8abce725f8 100644 +--- a/target/i386/meson.build ++++ b/target/i386/meson.build +@@ -6,7 +6,7 @@ i386_ss.add(files( + 'xsave_helper.c', + 'cpu-dump.c', + )) +-i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c')) ++i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c', 'confidential-guest.c')) + + # x86 cpu type + i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c')) +diff --git a/target/i386/sev.c b/target/i386/sev.c +index c49a8fd55e..ebe36d4c10 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -35,7 +35,7 @@ + #include "monitor/monitor.h" + #include "monitor/hmp-target.h" + #include "qapi/qapi-commands-misc-target.h" +-#include "exec/confidential-guest-support.h" ++#include "confidential-guest.h" + #include "hw/i386/pc.h" + #include "exec/address-spaces.h" + +@@ -54,7 +54,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) + * -machine ...,memory-encryption=sev0 + */ + struct SevGuestState { +- ConfidentialGuestSupport parent_obj; ++ X86ConfidentialGuest parent_obj; + + /* configuration parameters */ + char *sev_device; +@@ -1372,7 +1372,7 @@ sev_guest_instance_init(Object *obj) + + /* sev guest info */ + static const TypeInfo sev_guest_info = { +- .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, ++ .parent = TYPE_X86_CONFIDENTIAL_GUEST, + .name = TYPE_SEV_GUEST, + .instance_size = sizeof(SevGuestState), + .instance_finalize = sev_guest_finalize, +-- +2.39.3 + diff --git a/SOURCES/kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch b/SOURCES/kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch deleted file mode 100644 index a2d712f..0000000 --- a/SOURCES/kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch +++ /dev/null @@ -1,205 +0,0 @@ -From cc8d794932e26df7c7f3c8cc0c1f42da8d52f12b Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Mon, 15 Jan 2024 10:26:52 +0100 -Subject: [PATCH 069/101] target/s390x/kvm/pv: Provide some more useful - information if decryption fails -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 213: s390x: Provide some more useful information if decryption of a PV image fails -RH-Jira: RHEL-18212 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck -RH-Commit: [1/1] 4ffb61869f7df33e23d3e0ebf8c29e386e3f6cbc (thuth/qemu-kvm-cs9) - -JIRA: https://issues.redhat.com/browse/RHEL-18212 - -commit 7af51621b16ae86646cc2dc9dee30de8176ff761 -Author: Thomas Huth -Date: Wed Jan 10 15:29:16 2024 +0100 - - target/s390x/kvm/pv: Provide some more useful information if decryption fails - - It's a common scenario to copy guest images from one host to another - to run the guest on the other machine. This (of course) does not work - with "secure execution" guests since they are encrypted with one certain - host key. However, if you still (accidentally) do it, you only get a - very user-unfriendly error message that looks like this: - - qemu-system-s390x: KVM PV command 2 (KVM_PV_SET_SEC_PARMS) failed: - header rc 108 rrc 5 IOCTL rc: -22 - - Let's provide at least a somewhat nicer hint to the users so that they - are able to figure out what might have gone wrong. - - Buglink: https://issues.redhat.com/browse/RHEL-18212 - Message-ID: <20240110142916.850605-1-thuth@redhat.com> - Reviewed-by: Philippe Mathieu-Daudé - Reviewed-by: Cédric Le Goater - Reviewed-by: Claudio Imbrenda - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - hw/s390x/ipl.c | 5 ++--- - hw/s390x/ipl.h | 2 +- - hw/s390x/s390-virtio-ccw.c | 5 ++++- - target/s390x/kvm/pv.c | 25 ++++++++++++++++++++----- - target/s390x/kvm/pv.h | 5 +++-- - 5 files changed, 30 insertions(+), 12 deletions(-) - -diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c -index 515dcf51b5..b23a6a0ef3 100644 ---- a/hw/s390x/ipl.c -+++ b/hw/s390x/ipl.c -@@ -703,7 +703,7 @@ static void s390_ipl_prepare_qipl(S390CPU *cpu) - cpu_physical_memory_unmap(addr, len, 1, len); - } - --int s390_ipl_prepare_pv_header(void) -+int s390_ipl_prepare_pv_header(Error **errp) - { - IplParameterBlock *ipib = s390_ipl_get_iplb_pv(); - IPLBlockPV *ipib_pv = &ipib->pv; -@@ -712,8 +712,7 @@ int s390_ipl_prepare_pv_header(void) - - cpu_physical_memory_read(ipib_pv->pv_header_addr, hdr, - ipib_pv->pv_header_len); -- rc = s390_pv_set_sec_parms((uintptr_t)hdr, -- ipib_pv->pv_header_len); -+ rc = s390_pv_set_sec_parms((uintptr_t)hdr, ipib_pv->pv_header_len, errp); - g_free(hdr); - return rc; - } -diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h -index 7fc86e7905..57cd125769 100644 ---- a/hw/s390x/ipl.h -+++ b/hw/s390x/ipl.h -@@ -107,7 +107,7 @@ typedef union IplParameterBlock IplParameterBlock; - - int s390_ipl_set_loadparm(uint8_t *loadparm); - void s390_ipl_update_diag308(IplParameterBlock *iplb); --int s390_ipl_prepare_pv_header(void); -+int s390_ipl_prepare_pv_header(Error **errp); - int s390_ipl_pv_unpack(void); - void s390_ipl_prepare_cpu(S390CPU *cpu); - IplParameterBlock *s390_ipl_get_iplb(void); -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 984891b82a..e26ce26f5a 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -391,7 +391,7 @@ static int s390_machine_protect(S390CcwMachineState *ms) - } - - /* Set SE header and unpack */ -- rc = s390_ipl_prepare_pv_header(); -+ rc = s390_ipl_prepare_pv_header(&local_err); - if (rc) { - goto out_err; - } -@@ -410,6 +410,9 @@ static int s390_machine_protect(S390CcwMachineState *ms) - return rc; - - out_err: -+ if (local_err) { -+ error_report_err(local_err); -+ } - s390_machine_unprotect(ms); - return rc; - } -diff --git a/target/s390x/kvm/pv.c b/target/s390x/kvm/pv.c -index 6a69be7e5c..7ca7faec73 100644 ---- a/target/s390x/kvm/pv.c -+++ b/target/s390x/kvm/pv.c -@@ -29,7 +29,8 @@ static bool info_valid; - static struct kvm_s390_pv_info_vm info_vm; - static struct kvm_s390_pv_info_dump info_dump; - --static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) -+static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data, -+ int *pvrc) - { - struct kvm_pv_cmd pv_cmd = { - .cmd = cmd, -@@ -46,6 +47,9 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) - "IOCTL rc: %d", cmd, cmdname, pv_cmd.rc, pv_cmd.rrc, - rc); - } -+ if (pvrc) { -+ *pvrc = pv_cmd.rc; -+ } - return rc; - } - -@@ -53,12 +57,13 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) - * This macro lets us pass the command as a string to the function so - * we can print it on an error. - */ --#define s390_pv_cmd(cmd, data) __s390_pv_cmd(cmd, #cmd, data) -+#define s390_pv_cmd(cmd, data) __s390_pv_cmd(cmd, #cmd, data, NULL) -+#define s390_pv_cmd_pvrc(cmd, data, pvrc) __s390_pv_cmd(cmd, #cmd, data, pvrc) - #define s390_pv_cmd_exit(cmd, data) \ - { \ - int rc; \ - \ -- rc = __s390_pv_cmd(cmd, #cmd, data);\ -+ rc = __s390_pv_cmd(cmd, #cmd, data, NULL); \ - if (rc) { \ - exit(1); \ - } \ -@@ -142,14 +147,24 @@ bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) - return true; - } - --int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) -+int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, Error **errp) - { -+ int ret, pvrc; - struct kvm_s390_pv_sec_parm args = { - .origin = origin, - .length = length, - }; - -- return s390_pv_cmd(KVM_PV_SET_SEC_PARMS, &args); -+ ret = s390_pv_cmd_pvrc(KVM_PV_SET_SEC_PARMS, &args, &pvrc); -+ if (ret) { -+ error_setg(errp, "Failed to set secure execution parameters"); -+ if (pvrc == 0x108) { -+ error_append_hint(errp, "Please check whether the image is " -+ "correctly encrypted for this host\n"); -+ } -+ } -+ -+ return ret; - } - - /* -diff --git a/target/s390x/kvm/pv.h b/target/s390x/kvm/pv.h -index 7b935e2246..5877d28ff1 100644 ---- a/target/s390x/kvm/pv.h -+++ b/target/s390x/kvm/pv.h -@@ -42,7 +42,7 @@ int s390_pv_query_info(void); - int s390_pv_vm_enable(void); - void s390_pv_vm_disable(void); - bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms); --int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); -+int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, Error **errp); - int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); - void s390_pv_prep_reset(void); - int s390_pv_verify(void); -@@ -62,7 +62,8 @@ static inline int s390_pv_query_info(void) { return 0; } - static inline int s390_pv_vm_enable(void) { return 0; } - static inline void s390_pv_vm_disable(void) {} - static inline bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) { return false; } --static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } -+static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, -+ Error **errp) { return 0; } - static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; } - static inline void s390_pv_prep_reset(void) {} - static inline int s390_pv_verify(void) { return 0; } --- -2.39.3 - diff --git a/SOURCES/kvm-tests-remove-aio_context_acquire-tests.patch b/SOURCES/kvm-tests-remove-aio_context_acquire-tests.patch deleted file mode 100644 index 9b3eefb..0000000 --- a/SOURCES/kvm-tests-remove-aio_context_acquire-tests.patch +++ /dev/null @@ -1,125 +0,0 @@ -From 420bf75353286324822c3bbca3b52a7a56ed668c Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:00 -0500 -Subject: [PATCH 083/101] tests: remove aio_context_acquire() tests - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [14/26] f6421037c1523bc957f3be0f4ad05571ae012dba (kmwolf/centos-qemu-kvm) - -The aio_context_acquire() API is being removed. Drop the test case that -calls the API. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Reviewed-by: Kevin Wolf -Message-ID: <20231205182011.1976568-4-stefanha@redhat.com> -Signed-off-by: Kevin Wolf ---- - tests/unit/test-aio.c | 67 +------------------------------------------ - 1 file changed, 1 insertion(+), 66 deletions(-) - -diff --git a/tests/unit/test-aio.c b/tests/unit/test-aio.c -index 337b6e4ea7..e77d86be87 100644 ---- a/tests/unit/test-aio.c -+++ b/tests/unit/test-aio.c -@@ -100,76 +100,12 @@ static void event_ready_cb(EventNotifier *e) - - /* Tests using aio_*. */ - --typedef struct { -- QemuMutex start_lock; -- EventNotifier notifier; -- bool thread_acquired; --} AcquireTestData; -- --static void *test_acquire_thread(void *opaque) --{ -- AcquireTestData *data = opaque; -- -- /* Wait for other thread to let us start */ -- qemu_mutex_lock(&data->start_lock); -- qemu_mutex_unlock(&data->start_lock); -- -- /* event_notifier_set might be called either before or after -- * the main thread's call to poll(). The test case's outcome -- * should be the same in either case. -- */ -- event_notifier_set(&data->notifier); -- aio_context_acquire(ctx); -- aio_context_release(ctx); -- -- data->thread_acquired = true; /* success, we got here */ -- -- return NULL; --} -- - static void set_event_notifier(AioContext *nctx, EventNotifier *notifier, - EventNotifierHandler *handler) - { - aio_set_event_notifier(nctx, notifier, handler, NULL, NULL); - } - --static void dummy_notifier_read(EventNotifier *n) --{ -- event_notifier_test_and_clear(n); --} -- --static void test_acquire(void) --{ -- QemuThread thread; -- AcquireTestData data; -- -- /* Dummy event notifier ensures aio_poll() will block */ -- event_notifier_init(&data.notifier, false); -- set_event_notifier(ctx, &data.notifier, dummy_notifier_read); -- g_assert(!aio_poll(ctx, false)); /* consume aio_notify() */ -- -- qemu_mutex_init(&data.start_lock); -- qemu_mutex_lock(&data.start_lock); -- data.thread_acquired = false; -- -- qemu_thread_create(&thread, "test_acquire_thread", -- test_acquire_thread, -- &data, QEMU_THREAD_JOINABLE); -- -- /* Block in aio_poll(), let other thread kick us and acquire context */ -- aio_context_acquire(ctx); -- qemu_mutex_unlock(&data.start_lock); /* let the thread run */ -- g_assert(aio_poll(ctx, true)); -- g_assert(!data.thread_acquired); -- aio_context_release(ctx); -- -- qemu_thread_join(&thread); -- set_event_notifier(ctx, &data.notifier, NULL); -- event_notifier_cleanup(&data.notifier); -- -- g_assert(data.thread_acquired); --} -- - static void test_bh_schedule(void) - { - BHTestData data = { .n = 0 }; -@@ -879,7 +815,7 @@ static void test_worker_thread_co_enter(void) - qemu_thread_get_self(&this_thread); - co = qemu_coroutine_create(co_check_current_thread, &this_thread); - -- qemu_thread_create(&worker_thread, "test_acquire_thread", -+ qemu_thread_create(&worker_thread, "test_aio_co_enter", - test_aio_co_enter, - co, QEMU_THREAD_JOINABLE); - -@@ -899,7 +835,6 @@ int main(int argc, char **argv) - while (g_main_context_iteration(NULL, false)); - - g_test_init(&argc, &argv, NULL); -- g_test_add_func("/aio/acquire", test_acquire); - g_test_add_func("/aio/bh/schedule", test_bh_schedule); - g_test_add_func("/aio/bh/schedule10", test_bh_schedule10); - g_test_add_func("/aio/bh/cancel", test_bh_cancel); --- -2.39.3 - diff --git a/SOURCES/kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch b/SOURCES/kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch deleted file mode 100644 index 0afdea2..0000000 --- a/SOURCES/kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch +++ /dev/null @@ -1,46 +0,0 @@ -From bbe64d706b3cb8b10ecd22bd71cf76b21eea257f Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 25 Jan 2024 17:58:03 +0100 -Subject: [PATCH 20/22] tests/unit: Bump test-replication timeout to 60 seconds - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [16/17] 200768aedee44d10aa8d199b92a9c17a9002fc3f (stefanha/centos-stream-qemu-kvm) - -We're seeing timeouts for this test on CI runs (specifically for -ubuntu-20.04-s390x-all). It doesn't fail consistently, but even the -successful runs take about 27 or 28 seconds, which is not very far from -the 30 seconds timeout. - -Bump the timeout a bit to make failure less likely even on this CI host. - -Signed-off-by: Kevin Wolf -Message-ID: <20240125165803.48373-1-kwolf@redhat.com> -Reviewed-by: Thomas Huth -Signed-off-by: Kevin Wolf -(cherry picked from commit 63b18312d14ac984acaf13c7c55d9baa2d61496e) -Signed-off-by: Stefan Hajnoczi ---- - tests/unit/meson.build | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/tests/unit/meson.build b/tests/unit/meson.build -index a05d471090..28db6adea8 100644 ---- a/tests/unit/meson.build -+++ b/tests/unit/meson.build -@@ -173,7 +173,8 @@ test_env.set('G_TEST_BUILDDIR', meson.current_build_dir()) - - slow_tests = { - 'test-crypto-tlscredsx509': 45, -- 'test-crypto-tlssession': 45 -+ 'test-crypto-tlssession': 45, -+ 'test-replication': 60, - } - - foreach test_name, extra: tests --- -2.39.3 - diff --git a/SOURCES/kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch b/SOURCES/kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch new file mode 100644 index 0000000..d21d298 --- /dev/null +++ b/SOURCES/kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch @@ -0,0 +1,59 @@ +From b02dc1e5c0f01228053e784f9ec7ac3a47e91d7c Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:36:25 -0500 +Subject: [PATCH 026/100] trace/kvm: Split address space and slot id in + trace_kvm_set_user_memory() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [26/91] 640511c4ab0ba76bb4483f6c3fb73e060d914f0a (bonzini/rhel-qemu-kvm) + +The upper 16 bits of kvm_userspace_memory_region::slot are +address space id. Parse it separately in trace_kvm_set_user_memory(). + +Signed-off-by: Xiaoyao Li +Message-ID: <20240229063726.610065-5-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 72853afc638b3e28779c86dd05da2f3bb149fe2c) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 5 +++-- + accel/kvm/trace-events | 2 +- + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index b51e09a583..9bd235c969 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -303,8 +303,9 @@ static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, boo + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + slot->old_flags = mem.flags; + err: +- trace_kvm_set_user_memory(mem.slot, mem.flags, mem.guest_phys_addr, +- mem.memory_size, mem.userspace_addr, ret); ++ trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags, ++ mem.guest_phys_addr, mem.memory_size, ++ mem.userspace_addr, ret); + if (ret < 0) { + error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," + " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", +diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events +index a25902597b..9f599abc17 100644 +--- a/accel/kvm/trace-events ++++ b/accel/kvm/trace-events +@@ -15,7 +15,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" + kvm_irqchip_release_virq(int virq) "virq %d" + kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d" + kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d" +-kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" ++kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" + kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32 + kvm_resample_fd_notify(int gsi) "gsi %d" + kvm_dirty_ring_full(int id) "vcpu %d" +-- +2.39.3 + diff --git a/SOURCES/kvm-ui-clipboard-add-asserts-for-update-and-request.patch b/SOURCES/kvm-ui-clipboard-add-asserts-for-update-and-request.patch deleted file mode 100644 index b51961c..0000000 --- a/SOURCES/kvm-ui-clipboard-add-asserts-for-update-and-request.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 6e4f68e9ba3fe75ca6f200f189f96bb402f0ee8e Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Wed, 24 Jan 2024 11:57:49 +0100 -Subject: [PATCH 02/20] ui/clipboard: add asserts for update and request -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -RH-MergeRequest: 228: ui/clipboard: mark type as not available when there is no data -RH-Jira: RHEL-19629 -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Gerd Hoffmann -RH-Commit: [2/2] 176b4b835fd8aa226f2fa93fd334b9384080cf21 (jmaloy/jmaloy-qemu-kvm-2) - -JIRA: https://issues.redhat.com/browse/RHEL-19629 -CVE: CVE-2023-6683 -Upstream: Merged - -ui/clipboard: add asserts for update and request - -commit 9c416582611b7495bdddb4c5456c7acb64b78938 -Author: Fiona Ebner -Date: Wed Jan 24 11:57:49 2024 +0100 - - ui/clipboard: add asserts for update and request - - Should an issue like CVE-2023-6683 ever appear again in the future, - it will be more obvious which assumption was violated. - - Suggested-by: Marc-André Lureau - Signed-off-by: Fiona Ebner - Reviewed-by: Marc-André Lureau - Message-ID: <20240124105749.204610-2-f.ebner@proxmox.com> - -Signed-off-by: Jon Maloy ---- - ui/clipboard.c | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/ui/clipboard.c b/ui/clipboard.c -index b3f6fa3c9e..4264884a6c 100644 ---- a/ui/clipboard.c -+++ b/ui/clipboard.c -@@ -65,12 +65,24 @@ bool qemu_clipboard_check_serial(QemuClipboardInfo *info, bool client) - - void qemu_clipboard_update(QemuClipboardInfo *info) - { -+ uint32_t type; - QemuClipboardNotify notify = { - .type = QEMU_CLIPBOARD_UPDATE_INFO, - .info = info, - }; - assert(info->selection < QEMU_CLIPBOARD_SELECTION__COUNT); - -+ for (type = 0; type < QEMU_CLIPBOARD_TYPE__COUNT; type++) { -+ /* -+ * If data is missing, the clipboard owner's 'request' callback needs to -+ * be set. Otherwise, there is no way to get the clipboard data and -+ * qemu_clipboard_request() cannot be called. -+ */ -+ if (info->types[type].available && !info->types[type].data) { -+ assert(info->owner && info->owner->request); -+ } -+ } -+ - notifier_list_notify(&clipboard_notifiers, ¬ify); - - if (cbinfo[info->selection] != info) { -@@ -132,6 +144,8 @@ void qemu_clipboard_request(QemuClipboardInfo *info, - !info->owner) - return; - -+ assert(info->owner->request); -+ - info->types[type].requested = true; - info->owner->request(info, type); - } --- -2.39.3 - diff --git a/SOURCES/kvm-ui-clipboard-mark-type-as-not-available-when-there-i.patch b/SOURCES/kvm-ui-clipboard-mark-type-as-not-available-when-there-i.patch deleted file mode 100644 index 00c9369..0000000 --- a/SOURCES/kvm-ui-clipboard-mark-type-as-not-available-when-there-i.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 097516bef2993d917e76d92066ca2eb067e45394 Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Wed, 24 Jan 2024 11:57:48 +0100 -Subject: [PATCH 01/20] ui/clipboard: mark type as not available when there is - no data -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -RH-MergeRequest: 228: ui/clipboard: mark type as not available when there is no data -RH-Jira: RHEL-19629 -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Gerd Hoffmann -RH-Commit: [1/2] 74ded03d6376f9693733d673502219f76eab7099 (jmaloy/jmaloy-qemu-kvm-2) - -JIRA: https://issues.redhat.com/browse/RHEL-19629 -CVE: CVE-2023-6683 -Upstream: Merged - -commit 405484b29f6548c7b86549b0f961b906337aa68a -Author: Fiona Ebner -Date: Wed Jan 24 11:57:48 2024 +0100 - - ui/clipboard: mark type as not available when there is no data - - With VNC, a client can send a non-extended VNC_MSG_CLIENT_CUT_TEXT - message with len=0. In qemu_clipboard_set_data(), the clipboard info - will be updated setting data to NULL (because g_memdup(data, size) - returns NULL when size is 0). If the client does not set the - VNC_ENCODING_CLIPBOARD_EXT feature when setting up the encodings, then - the 'request' callback for the clipboard peer is not initialized. - Later, because data is NULL, qemu_clipboard_request() can be reached - via vdagent_chr_write() and vdagent_clipboard_recv_request() and - there, the clipboard owner's 'request' callback will be attempted to - be called, but that is a NULL pointer. - - In particular, this can happen when using the KRDC (22.12.3) VNC - client. - - Another scenario leading to the same issue is with two clients (say - noVNC and KRDC): - - The noVNC client sets the extension VNC_FEATURE_CLIPBOARD_EXT and - initializes its cbpeer. - - The KRDC client does not, but triggers a vnc_client_cut_text() (note - it's not the _ext variant)). There, a new clipboard info with it as - the 'owner' is created and via qemu_clipboard_set_data() is called, - which in turn calls qemu_clipboard_update() with that info. - - In qemu_clipboard_update(), the notifier for the noVNC client will be - called, i.e. vnc_clipboard_notify() and also set vs->cbinfo for the - noVNC client. The 'owner' in that clipboard info is the clipboard peer - for the KRDC client, which did not initialize the 'request' function. - That sounds correct to me, it is the owner of that clipboard info. - - Then when noVNC sends a VNC_MSG_CLIENT_CUT_TEXT message (it did set - the VNC_FEATURE_CLIPBOARD_EXT feature correctly, so a check for it - passes), that clipboard info is passed to qemu_clipboard_request() and - the original segfault still happens. - - Fix the issue by handling updates with size 0 differently. In - particular, mark in the clipboard info that the type is not available. - - While at it, switch to g_memdup2(), because g_memdup() is deprecated. - - Cc: qemu-stable@nongnu.org - Fixes: CVE-2023-6683 - Reported-by: Markus Frank - Suggested-by: Marc-André Lureau - Signed-off-by: Fiona Ebner - Reviewed-by: Marc-André Lureau - Tested-by: Markus Frank - Message-ID: <20240124105749.204610-1-f.ebner@proxmox.com> - -Signed-off-by: Jon Maloy ---- - ui/clipboard.c | 12 +++++++++--- - 1 file changed, 9 insertions(+), 3 deletions(-) - -diff --git a/ui/clipboard.c b/ui/clipboard.c -index 3d14bffaf8..b3f6fa3c9e 100644 ---- a/ui/clipboard.c -+++ b/ui/clipboard.c -@@ -163,9 +163,15 @@ void qemu_clipboard_set_data(QemuClipboardPeer *peer, - } - - g_free(info->types[type].data); -- info->types[type].data = g_memdup(data, size); -- info->types[type].size = size; -- info->types[type].available = true; -+ if (size) { -+ info->types[type].data = g_memdup2(data, size); -+ info->types[type].size = size; -+ info->types[type].available = true; -+ } else { -+ info->types[type].data = NULL; -+ info->types[type].size = 0; -+ info->types[type].available = false; -+ } - - if (update) { - qemu_clipboard_update(info); --- -2.39.3 - diff --git a/SOURCES/kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch b/SOURCES/kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch new file mode 100644 index 0000000..f141bf1 --- /dev/null +++ b/SOURCES/kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch @@ -0,0 +1,62 @@ +From e185104a10a37174d13d981fa1febafbb7e651aa Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 3 Jun 2024 13:49:49 +0200 +Subject: [PATCH 050/100] update-linux-headers: fix forwarding to asm-generic + headers + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [50/91] 3c98a7fe790d943bb5ff8dca1da83f5944ec3e2e (bonzini/rhel-qemu-kvm) + +Afer commit 3efc75ad9d9 ("scripts/update-linux-headers.sh: Remove +temporary directory inbetween", 2024-05-29), updating linux-headers/ +results in errors such as + + cp: cannot stat '/tmp/tmp.1A1Eejh1UE/headers/include/asm/bitsperlong.h': No such file or directory + +because Loongarch does not have an asm/bitsperlong.h file and uses the +generic version. Before commit 3efc75ad9d9, the missing file would +incorrectly cause stale files to be included in linux-headers/. The files +were never committed to qemu.git, but were wrong nevertheless. The build +would just use the system version of the files, which is opposite to +the idea of importing Linux header files into QEMU's tree. + +Create forwarding headers, resembling the ones that are generated during a +kernel build by scripts/Makefile.asm-generic, if a file is only installed +under include/asm-generic/. + +Reviewed-by: Thomas Huth +Signed-off-by: Paolo Bonzini +(cherry picked from commit ef7c70f020ca1fe9e7c98ea2cd9d6ba3c5714716) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index f084bee72e..78c0f2c43e 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -119,7 +119,14 @@ for arch in $ARCHLIST; do + rm -rf "$output/linux-headers/asm-$arch" + mkdir -p "$output/linux-headers/asm-$arch" + for header in kvm.h unistd.h bitsperlong.h mman.h; do +- cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch" ++ if test -f "$hdrdir/include/asm/$header"; then ++ cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch" ++ elif test -f "$hdrdir/include/asm-generic/$header"; then ++ # not installed as , but used as such in kernel sources ++ cat <$output/linux-headers/asm-$arch/$header ++#include ++EOF ++ fi + done + + if [ $arch = mips ]; then +-- +2.39.3 + diff --git a/SOURCES/kvm-update-linux-headers-import-linux-kvm_para.h-header.patch b/SOURCES/kvm-update-linux-headers-import-linux-kvm_para.h-header.patch new file mode 100644 index 0000000..a75a2aa --- /dev/null +++ b/SOURCES/kvm-update-linux-headers-import-linux-kvm_para.h-header.patch @@ -0,0 +1,175 @@ +From 8d6c37ddc253f63202cc9519670c258e9d81b98e Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 3 Jun 2024 14:25:06 +0200 +Subject: [PATCH 053/100] update-linux-headers: import linux/kvm_para.h header + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [53/91] 27d4db0ecec7d0b8adeba1ec85fca32eacee1009 (bonzini/rhel-qemu-kvm) + +Right now QEMU is importing arch/x86/include/uapi/asm/kvm_para.h +because it includes definitions for kvmclock and for KVM CPUID +bits. However, other definitions for KVM hypercall values and return +codes are included in include/uapi/linux/kvm_para.h and they will be +used by SEV-SNP. + +To ensure that it is possible to include both and +"standard-headers/asm-x86/kvm_para.h" without conflicts, provide +linux/kvm_para.h as a portable header too, and forward linux-headers/ +files to those in include/standard-headers. Note that +will include architecture-specific definitions as well, but +"standard-headers/linux/kvm_para.h" will not because it can be used in +architecture-independent files. + +This could easily be extended to other architectures, but right now +they do not need any symbol in their specific kvm_para.h files. + +Reviewed-by: Thomas Huth +Signed-off-by: Paolo Bonzini +(cherry picked from commit aa274c33c39e7de981dc195abe60e1a246c9d248) +Signed-off-by: Paolo Bonzini +--- + include/standard-headers/linux/kvm_para.h | 38 +++++++++++++++++++++++ + linux-headers/asm-x86/kvm_para.h | 1 + + linux-headers/linux/kvm_para.h | 2 ++ + scripts/update-linux-headers.sh | 22 ++++++++++++- + 4 files changed, 62 insertions(+), 1 deletion(-) + create mode 100644 include/standard-headers/linux/kvm_para.h + create mode 100644 linux-headers/asm-x86/kvm_para.h + create mode 100644 linux-headers/linux/kvm_para.h + +diff --git a/include/standard-headers/linux/kvm_para.h b/include/standard-headers/linux/kvm_para.h +new file mode 100644 +index 0000000000..015c166302 +--- /dev/null ++++ b/include/standard-headers/linux/kvm_para.h +@@ -0,0 +1,38 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef __LINUX_KVM_PARA_H ++#define __LINUX_KVM_PARA_H ++ ++/* ++ * This header file provides a method for making a hypercall to the host ++ * Architectures should define: ++ * - kvm_hypercall0, kvm_hypercall1... ++ * - kvm_arch_para_features ++ * - kvm_para_available ++ */ ++ ++/* Return values for hypercalls */ ++#define KVM_ENOSYS 1000 ++#define KVM_EFAULT EFAULT ++#define KVM_EINVAL EINVAL ++#define KVM_E2BIG E2BIG ++#define KVM_EPERM EPERM ++#define KVM_EOPNOTSUPP 95 ++ ++#define KVM_HC_VAPIC_POLL_IRQ 1 ++#define KVM_HC_MMU_OP 2 ++#define KVM_HC_FEATURES 3 ++#define KVM_HC_PPC_MAP_MAGIC_PAGE 4 ++#define KVM_HC_KICK_CPU 5 ++#define KVM_HC_MIPS_GET_CLOCK_FREQ 6 ++#define KVM_HC_MIPS_EXIT_VM 7 ++#define KVM_HC_MIPS_CONSOLE_OUTPUT 8 ++#define KVM_HC_CLOCK_PAIRING 9 ++#define KVM_HC_SEND_IPI 10 ++#define KVM_HC_SCHED_YIELD 11 ++#define KVM_HC_MAP_GPA_RANGE 12 ++ ++/* ++ * hypercalls use architecture specific ++ */ ++ ++#endif /* __LINUX_KVM_PARA_H */ +diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h +new file mode 100644 +index 0000000000..1d3e0e0b07 +--- /dev/null ++++ b/linux-headers/asm-x86/kvm_para.h +@@ -0,0 +1 @@ ++#include "standard-headers/asm-x86/kvm_para.h" +diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h +new file mode 100644 +index 0000000000..6a1e672259 +--- /dev/null ++++ b/linux-headers/linux/kvm_para.h +@@ -0,0 +1,2 @@ ++#include "standard-headers/linux/kvm_para.h" ++#include +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 90759dcfe0..64d1989961 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -64,6 +64,7 @@ cp_portable() { + -e 'linux/kernel' \ + -e 'linux/sysinfo' \ + -e 'asm/setup_data.h' \ ++ -e 'asm/kvm_para.h' \ + > /dev/null + then + echo "Unexpected #include in input file $f". +@@ -71,6 +72,15 @@ cp_portable() { + fi + + header=$(basename "$f"); ++ ++ if test -z "$arch"; then ++ # Let users of include/standard-headers/linux/ headers pick the ++ # asm-* header that they care about ++ arch_cmd='/]*\)>/d' ++ else ++ arch_cmd='s/]*\)>/"standard-headers\/asm-'$arch'\/\1"/' ++ fi ++ + sed -e 's/__aligned_u64/__u64 __attribute__((aligned(8)))/g' \ + -e 's/__u\([0-9][0-9]*\)/uint\1_t/g' \ + -e 's/u\([0-9][0-9]*\)/uint\1_t/g' \ +@@ -79,7 +89,7 @@ cp_portable() { + -e 's/__be\([0-9][0-9]*\)/uint\1_t/g' \ + -e 's/"\(input-event-codes\.h\)"/"standard-headers\/linux\/\1"/' \ + -e 's/]*\)>/"standard-headers\/linux\/\1"/' \ +- -e 's/]*\)>/"standard-headers\/asm-'$arch'\/\1"/' \ ++ -e "$arch_cmd" \ + -e 's/__bitwise//' \ + -e 's/__attribute__((packed))/QEMU_PACKED/' \ + -e 's/__inline__/inline/' \ +@@ -159,7 +169,12 @@ EOF + cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" + cp "$hdrdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/" + cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/" ++ + cp_portable "$hdrdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch" ++ cat <$output/linux-headers/asm-$arch/kvm_para.h ++#include "standard-headers/asm-$arch/kvm_para.h" ++EOF ++ + # Remove everything except the macros from bootparam.h avoiding the + # unnecessary import of several video/ist/etc headers + sed -e '/__ASSEMBLY__/,/__ASSEMBLY__/d' \ +@@ -209,6 +224,10 @@ if [ -d "$linux/LICENSES" ]; then + done + fi + ++cat <$output/linux-headers/linux/kvm_para.h ++#include "standard-headers/linux/kvm_para.h" ++#include ++EOF + cat <$output/linux-headers/linux/virtio_config.h + #include "standard-headers/linux/virtio_config.h" + EOF +@@ -231,6 +250,7 @@ for i in "$hdrdir"/include/linux/*virtio*.h \ + "$hdrdir/include/linux/ethtool.h" \ + "$hdrdir/include/linux/const.h" \ + "$hdrdir/include/linux/kernel.h" \ ++ "$hdrdir/include/linux/kvm_para.h" \ + "$hdrdir/include/linux/vhost_types.h" \ + "$hdrdir/include/linux/sysinfo.h"; do + cp_portable "$i" "$output/include/standard-headers/linux" +-- +2.39.3 + diff --git a/SOURCES/kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch b/SOURCES/kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch new file mode 100644 index 0000000..cb0a4d4 --- /dev/null +++ b/SOURCES/kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch @@ -0,0 +1,95 @@ +From 00e250d9df1949d363758a34e3f46d8c71be054f Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 3 Jun 2024 14:16:55 +0200 +Subject: [PATCH 051/100] update-linux-headers: move pvpanic.h to correct + directory + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [51/91] 10efff5bbcb867ba34f3f9ff8045381ea96f94c7 (bonzini/rhel-qemu-kvm) + +Linux has , not . Use the same +directory for QEMU's include/standard-headers/ copy. + +Reviewed-by: Thomas Huth +Signed-off-by: Paolo Bonzini +(cherry picked from commit b8116f4cbaa0f64bb07564f20b3b5219e23c8bff) +Signed-off-by: Paolo Bonzini +--- + hw/misc/pvpanic-isa.c | 2 +- + hw/misc/pvpanic-pci.c | 2 +- + hw/misc/pvpanic.c | 2 +- + include/standard-headers/{linux => misc}/pvpanic.h | 0 + scripts/update-linux-headers.sh | 6 ++++-- + 5 files changed, 7 insertions(+), 5 deletions(-) + rename include/standard-headers/{linux => misc}/pvpanic.h (100%) + +diff --git a/hw/misc/pvpanic-isa.c b/hw/misc/pvpanic-isa.c +index ccec50f61b..b4f84c4110 100644 +--- a/hw/misc/pvpanic-isa.c ++++ b/hw/misc/pvpanic-isa.c +@@ -21,7 +21,7 @@ + #include "hw/misc/pvpanic.h" + #include "qom/object.h" + #include "hw/isa/isa.h" +-#include "standard-headers/linux/pvpanic.h" ++#include "standard-headers/misc/pvpanic.h" + #include "hw/acpi/acpi_aml_interface.h" + + OBJECT_DECLARE_SIMPLE_TYPE(PVPanicISAState, PVPANIC_ISA_DEVICE) +diff --git a/hw/misc/pvpanic-pci.c b/hw/misc/pvpanic-pci.c +index 83be95d0d2..4d44a881da 100644 +--- a/hw/misc/pvpanic-pci.c ++++ b/hw/misc/pvpanic-pci.c +@@ -21,7 +21,7 @@ + #include "hw/misc/pvpanic.h" + #include "qom/object.h" + #include "hw/pci/pci_device.h" +-#include "standard-headers/linux/pvpanic.h" ++#include "standard-headers/misc/pvpanic.h" + + OBJECT_DECLARE_SIMPLE_TYPE(PVPanicPCIState, PVPANIC_PCI_DEVICE) + +diff --git a/hw/misc/pvpanic.c b/hw/misc/pvpanic.c +index 1540e9091a..80289ecf5f 100644 +--- a/hw/misc/pvpanic.c ++++ b/hw/misc/pvpanic.c +@@ -21,7 +21,7 @@ + #include "hw/qdev-properties.h" + #include "hw/misc/pvpanic.h" + #include "qom/object.h" +-#include "standard-headers/linux/pvpanic.h" ++#include "standard-headers/misc/pvpanic.h" + + static void handle_event(int event) + { +diff --git a/include/standard-headers/linux/pvpanic.h b/include/standard-headers/misc/pvpanic.h +similarity index 100% +rename from include/standard-headers/linux/pvpanic.h +rename to include/standard-headers/misc/pvpanic.h +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 78c0f2c43e..90759dcfe0 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -232,10 +232,12 @@ for i in "$hdrdir"/include/linux/*virtio*.h \ + "$hdrdir/include/linux/const.h" \ + "$hdrdir/include/linux/kernel.h" \ + "$hdrdir/include/linux/vhost_types.h" \ +- "$hdrdir/include/linux/sysinfo.h" \ +- "$hdrdir/include/misc/pvpanic.h"; do ++ "$hdrdir/include/linux/sysinfo.h"; do + cp_portable "$i" "$output/include/standard-headers/linux" + done ++mkdir -p "$output/include/standard-headers/misc" ++cp_portable "$hdrdir/include/misc/pvpanic.h" \ ++ "$output/include/standard-headers/misc" + mkdir -p "$output/include/standard-headers/drm" + cp_portable "$hdrdir/include/drm/drm_fourcc.h" \ + "$output/include/standard-headers/drm" +-- +2.39.3 + diff --git a/SOURCES/kvm-util-char_dev-Add-open_cdev.patch b/SOURCES/kvm-util-char_dev-Add-open_cdev.patch deleted file mode 100644 index 1f1e870..0000000 --- a/SOURCES/kvm-util-char_dev-Add-open_cdev.patch +++ /dev/null @@ -1,175 +0,0 @@ -From de167878ec4ca159cc6def5134c91c5fe9b5ab96 Mon Sep 17 00:00:00 2001 -From: Yi Liu -Date: Tue, 21 Nov 2023 16:44:01 +0800 -Subject: [PATCH 022/101] util/char_dev: Add open_cdev() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [21/67] 72bf9ec3ccc9959626235bd270ec84caa4cee435 (eauger1/centos-qemu-kvm) - -/dev/vfio/devices/vfioX may not exist. In that case it is still possible -to open /dev/char/$major:$minor instead. Add helper function to abstract -the cdev open. - -Suggested-by: Jason Gunthorpe -Signed-off-by: Yi Liu -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit d6b5c4c1b516a8176b74ec35a0af8cf89b04b6c1) -Signed-off-by: Eric Auger ---- - MAINTAINERS | 2 + - include/qemu/chardev_open.h | 16 ++++++++ - util/chardev_open.c | 81 +++++++++++++++++++++++++++++++++++++ - util/meson.build | 1 + - 4 files changed, 100 insertions(+) - create mode 100644 include/qemu/chardev_open.h - create mode 100644 util/chardev_open.c - -diff --git a/MAINTAINERS b/MAINTAINERS -index a5a446914a..ca70bb4e64 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -2174,6 +2174,8 @@ M: Zhenzhong Duan - S: Supported - F: backends/iommufd.c - F: include/sysemu/iommufd.h -+F: include/qemu/chardev_open.h -+F: util/chardev_open.c - - vhost - M: Michael S. Tsirkin -diff --git a/include/qemu/chardev_open.h b/include/qemu/chardev_open.h -new file mode 100644 -index 0000000000..64e8fcfdcb ---- /dev/null -+++ b/include/qemu/chardev_open.h -@@ -0,0 +1,16 @@ -+/* -+ * QEMU Chardev Helper -+ * -+ * Copyright (C) 2023 Intel Corporation. -+ * -+ * Authors: Yi Liu -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2. See -+ * the COPYING file in the top-level directory. -+ */ -+ -+#ifndef QEMU_CHARDEV_OPEN_H -+#define QEMU_CHARDEV_OPEN_H -+ -+int open_cdev(const char *devpath, dev_t cdev); -+#endif -diff --git a/util/chardev_open.c b/util/chardev_open.c -new file mode 100644 -index 0000000000..f776429788 ---- /dev/null -+++ b/util/chardev_open.c -@@ -0,0 +1,81 @@ -+/* -+ * Copyright (c) 2019, Mellanox Technologies. All rights reserved. -+ * Copyright (C) 2023 Intel Corporation. -+ * -+ * This software is available to you under a choice of one of two -+ * licenses. You may choose to be licensed under the terms of the GNU -+ * General Public License (GPL) Version 2, available from the file -+ * COPYING in the main directory of this source tree, or the -+ * OpenIB.org BSD license below: -+ * -+ * Redistribution and use in source and binary forms, with or -+ * without modification, are permitted provided that the following -+ * conditions are met: -+ * -+ * - Redistributions of source code must retain the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer. -+ * -+ * - Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer in the documentation and/or other materials -+ * provided with the distribution. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ * -+ * Authors: Yi Liu -+ * -+ * Copied from -+ * https://github.com/linux-rdma/rdma-core/blob/master/util/open_cdev.c -+ * -+ */ -+ -+#include "qemu/osdep.h" -+#include "qemu/chardev_open.h" -+ -+static int open_cdev_internal(const char *path, dev_t cdev) -+{ -+ struct stat st; -+ int fd; -+ -+ fd = qemu_open_old(path, O_RDWR); -+ if (fd == -1) { -+ return -1; -+ } -+ if (fstat(fd, &st) || !S_ISCHR(st.st_mode) || -+ (cdev != 0 && st.st_rdev != cdev)) { -+ close(fd); -+ return -1; -+ } -+ return fd; -+} -+ -+static int open_cdev_robust(dev_t cdev) -+{ -+ g_autofree char *devpath = NULL; -+ -+ /* -+ * This assumes that udev is being used and is creating the /dev/char/ -+ * symlinks. -+ */ -+ devpath = g_strdup_printf("/dev/char/%u:%u", major(cdev), minor(cdev)); -+ return open_cdev_internal(devpath, cdev); -+} -+ -+int open_cdev(const char *devpath, dev_t cdev) -+{ -+ int fd; -+ -+ fd = open_cdev_internal(devpath, cdev); -+ if (fd == -1 && cdev != 0) { -+ return open_cdev_robust(cdev); -+ } -+ return fd; -+} -diff --git a/util/meson.build b/util/meson.build -index c2322ef6e7..174c133368 100644 ---- a/util/meson.build -+++ b/util/meson.build -@@ -108,6 +108,7 @@ if have_block - util_ss.add(files('filemonitor-stub.c')) - endif - util_ss.add(when: 'CONFIG_LINUX', if_true: files('vfio-helpers.c')) -+ util_ss.add(when: 'CONFIG_LINUX', if_true: files('chardev_open.c')) - endif - - if cpu == 'aarch64' --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch b/SOURCES/kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch deleted file mode 100644 index 040288f..0000000 --- a/SOURCES/kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch +++ /dev/null @@ -1,154 +0,0 @@ -From f554328f6f4702743af71befcb83c25c36e4fa4d Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:25 +0800 -Subject: [PATCH 046/101] vfio: Introduce a helper function to initialize - VFIODevice -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [45/67] 73225f394540bf5aeb70c0bdb89771f19a6d286d (eauger1/centos-qemu-kvm) - -Introduce a helper function to replace the common code to initialize -VFIODevice in pci, platform, ap and ccw VFIO device. - -No functional change intended. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 6106a329141af7d47bdc3346ce9820d4714e0e5d) -Signed-off-by: Eric Auger ---- - hw/vfio/ap.c | 8 ++------ - hw/vfio/ccw.c | 8 ++------ - hw/vfio/helpers.c | 11 +++++++++++ - hw/vfio/pci.c | 6 ++---- - hw/vfio/platform.c | 6 ++---- - include/hw/vfio/vfio-common.h | 2 ++ - 6 files changed, 21 insertions(+), 20 deletions(-) - -diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c -index 95fe7cd98b..e157aa1ff7 100644 ---- a/hw/vfio/ap.c -+++ b/hw/vfio/ap.c -@@ -226,18 +226,14 @@ static void vfio_ap_instance_init(Object *obj) - VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj); - VFIODevice *vbasedev = &vapdev->vdev; - -- vbasedev->type = VFIO_DEVICE_TYPE_AP; -- vbasedev->ops = &vfio_ap_ops; -- vbasedev->dev = DEVICE(vapdev); -- vbasedev->fd = -1; -- - /* - * vfio-ap devices operate in a way compatible with discarding of - * memory in RAM blocks, as no pages are pinned in the host. - * This needs to be set before vfio_get_device() for vfio common to - * handle ram_block_discard_disable(). - */ -- vbasedev->ram_block_discard_allowed = true; -+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_AP, &vfio_ap_ops, -+ DEVICE(vapdev), true); - } - - #ifdef CONFIG_IOMMUFD -diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c -index 6305a4c1b8..90e4a53437 100644 ---- a/hw/vfio/ccw.c -+++ b/hw/vfio/ccw.c -@@ -683,11 +683,6 @@ static void vfio_ccw_instance_init(Object *obj) - VFIOCCWDevice *vcdev = VFIO_CCW(obj); - VFIODevice *vbasedev = &vcdev->vdev; - -- vbasedev->type = VFIO_DEVICE_TYPE_CCW; -- vbasedev->ops = &vfio_ccw_ops; -- vbasedev->dev = DEVICE(vcdev); -- vbasedev->fd = -1; -- - /* - * All vfio-ccw devices are believed to operate in a way compatible with - * discarding of memory in RAM blocks, ie. pages pinned in the host are -@@ -696,7 +691,8 @@ static void vfio_ccw_instance_init(Object *obj) - * needs to be set before vfio_get_device() for vfio common to handle - * ram_block_discard_disable(). - */ -- vbasedev->ram_block_discard_allowed = true; -+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_CCW, &vfio_ccw_ops, -+ DEVICE(vcdev), true); - } - - #ifdef CONFIG_IOMMUFD -diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c -index 3592c3d54e..6789870802 100644 ---- a/hw/vfio/helpers.c -+++ b/hw/vfio/helpers.c -@@ -652,3 +652,14 @@ void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) - } - vbasedev->fd = fd; - } -+ -+void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, -+ DeviceState *dev, bool ram_discard) -+{ -+ vbasedev->type = type; -+ vbasedev->ops = ops; -+ vbasedev->dev = dev; -+ vbasedev->fd = -1; -+ -+ vbasedev->ram_block_discard_allowed = ram_discard; -+} -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 3f5900cc46..83c3238608 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3353,10 +3353,8 @@ static void vfio_instance_init(Object *obj) - vdev->host.slot = ~0U; - vdev->host.function = ~0U; - -- vbasedev->type = VFIO_DEVICE_TYPE_PCI; -- vbasedev->ops = &vfio_pci_ops; -- vbasedev->dev = DEVICE(vdev); -- vbasedev->fd = -1; -+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_pci_ops, -+ DEVICE(vdev), false); - - vdev->nv_gpudirect_clique = 0xFF; - -diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c -index 506eb8193f..a8d9b7da63 100644 ---- a/hw/vfio/platform.c -+++ b/hw/vfio/platform.c -@@ -657,10 +657,8 @@ static void vfio_platform_instance_init(Object *obj) - VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj); - VFIODevice *vbasedev = &vdev->vbasedev; - -- vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; -- vbasedev->ops = &vfio_platform_ops; -- vbasedev->dev = DEVICE(vdev); -- vbasedev->fd = -1; -+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PLATFORM, &vfio_platform_ops, -+ DEVICE(vdev), false); - } - - #ifdef CONFIG_IOMMUFD -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index efcba19f66..b8aa8a5495 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -257,4 +257,6 @@ int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova, - /* Returns 0 on success, or a negative errno. */ - int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); - void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); -+void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, -+ DeviceState *dev, bool ram_discard); - #endif /* HW_VFIO_VFIO_COMMON_H */ --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch b/SOURCES/kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch deleted file mode 100644 index d41e8fb..0000000 --- a/SOURCES/kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch +++ /dev/null @@ -1,129 +0,0 @@ -From 7f392385d1b865904eae4b6681e3e7a87eb3af3d Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:27 +0800 -Subject: [PATCH 002/101] vfio: Introduce base object for VFIOContainer and - targeted interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [1/67] e63af50c2cb94f286b2d91f58c2d19dd862e019d (eauger1/centos-qemu-kvm) - -Introduce a dumb VFIOContainerBase object and its targeted interface. -This is willingly not a QOM object because we don't want it to be -visible from the user interface. The VFIOContainerBase will be -smoothly populated in subsequent patches as well as interfaces. - -No functional change intended. - -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Yi Sun -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit f61dddd73232e3d82d560d1e1bca120446021f2f) -Signed-off-by: Eric Auger ---- - include/hw/vfio/vfio-common.h | 8 ++--- - include/hw/vfio/vfio-container-base.h | 50 +++++++++++++++++++++++++++ - 2 files changed, 52 insertions(+), 6 deletions(-) - create mode 100644 include/hw/vfio/vfio-container-base.h - -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index a4a22accb9..586d153c12 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -30,6 +30,7 @@ - #include - #endif - #include "sysemu/sysemu.h" -+#include "hw/vfio/vfio-container-base.h" - - #define VFIO_MSG_PREFIX "vfio %s: " - -@@ -81,6 +82,7 @@ typedef struct VFIOAddressSpace { - struct VFIOGroup; - - typedef struct VFIOContainer { -+ VFIOContainerBase bcontainer; - VFIOAddressSpace *space; - int fd; /* /dev/vfio/vfio, empowered by the attached groups */ - MemoryListener listener; -@@ -201,12 +203,6 @@ typedef struct VFIODisplay { - } dmabuf; - } VFIODisplay; - --typedef struct { -- unsigned long *bitmap; -- hwaddr size; -- hwaddr pages; --} VFIOBitmap; -- - VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); - void vfio_put_address_space(VFIOAddressSpace *space); - bool vfio_devices_all_running_and_saving(VFIOContainer *container); -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -new file mode 100644 -index 0000000000..1d6daaea5d ---- /dev/null -+++ b/include/hw/vfio/vfio-container-base.h -@@ -0,0 +1,50 @@ -+/* -+ * VFIO BASE CONTAINER -+ * -+ * Copyright (C) 2023 Intel Corporation. -+ * Copyright Red Hat, Inc. 2023 -+ * -+ * Authors: Yi Liu -+ * Eric Auger -+ * -+ * SPDX-License-Identifier: GPL-2.0-or-later -+ */ -+ -+#ifndef HW_VFIO_VFIO_CONTAINER_BASE_H -+#define HW_VFIO_VFIO_CONTAINER_BASE_H -+ -+#include "exec/memory.h" -+ -+typedef struct VFIODevice VFIODevice; -+typedef struct VFIOIOMMUOps VFIOIOMMUOps; -+ -+typedef struct { -+ unsigned long *bitmap; -+ hwaddr size; -+ hwaddr pages; -+} VFIOBitmap; -+ -+/* -+ * This is the base object for vfio container backends -+ */ -+typedef struct VFIOContainerBase { -+ const VFIOIOMMUOps *ops; -+} VFIOContainerBase; -+ -+struct VFIOIOMMUOps { -+ /* basic feature */ -+ int (*dma_map)(VFIOContainerBase *bcontainer, -+ hwaddr iova, ram_addr_t size, -+ void *vaddr, bool readonly); -+ int (*dma_unmap)(VFIOContainerBase *bcontainer, -+ hwaddr iova, ram_addr_t size, -+ IOMMUTLBEntry *iotlb); -+ int (*attach_device)(const char *name, VFIODevice *vbasedev, -+ AddressSpace *as, Error **errp); -+ void (*detach_device)(VFIODevice *vbasedev); -+ /* migration feature */ -+ int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); -+ int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, -+ hwaddr iova, hwaddr size); -+}; -+#endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch b/SOURCES/kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch deleted file mode 100644 index 03fb220..0000000 --- a/SOURCES/kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch +++ /dev/null @@ -1,276 +0,0 @@ -From 84b15fad1af781d06d0206d362de0801d7a18d0b Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:17 +0800 -Subject: [PATCH 038/101] vfio: Make VFIOContainerBase poiner parameter const - in VFIOIOMMUOps callbacks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [37/67] 95eb9edc7fcfefbd4b075f6f04941ed4a19ff87d (eauger1/centos-qemu-kvm) - -Some of the callbacks in VFIOIOMMUOps pass VFIOContainerBase poiner, -those callbacks only need read access to the sub object of VFIOContainerBase. -So make VFIOContainerBase, VFIOContainer and VFIOIOMMUFDContainer as const -in these callbacks. - -Local functions called by those callbacks also need same changes to avoid -build error. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 4517c33c31d392f08fa96a9db911da1e3507be94) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 9 +++---- - hw/vfio/container-base.c | 2 +- - hw/vfio/container.c | 34 ++++++++++++++------------- - hw/vfio/iommufd.c | 8 +++---- - include/hw/vfio/vfio-common.h | 12 ++++++---- - include/hw/vfio/vfio-container-base.h | 12 ++++++---- - 6 files changed, 42 insertions(+), 35 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 6569732b7a..08a3e57672 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -204,7 +204,7 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainerBase *bcontainer) - return true; - } - --bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer) -+bool vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer) - { - VFIODevice *vbasedev; - -@@ -221,7 +221,8 @@ bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer) - * Check if all VFIO devices are running and migration is active, which is - * essentially equivalent to the migration being in pre-copy phase. - */ --bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer) -+bool -+vfio_devices_all_running_and_mig_active(const VFIOContainerBase *bcontainer) - { - VFIODevice *vbasedev; - -@@ -1139,7 +1140,7 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, - return 0; - } - --int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, -+int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, hwaddr iova, - hwaddr size) - { -@@ -1162,7 +1163,7 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, - return 0; - } - --int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, -+int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova, - uint64_t size, ram_addr_t ram_addr) - { - bool all_device_dirty_tracking = -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index eee2dcfe76..1ffd25bbfa 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -63,7 +63,7 @@ int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, - return bcontainer->ops->set_dirty_page_tracking(bcontainer, start); - } - --int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, -+int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, - hwaddr iova, hwaddr size) - { -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 1dbf9b9a17..b22feb8ded 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -61,11 +61,11 @@ static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state) - } - } - --static int vfio_dma_unmap_bitmap(VFIOContainer *container, -+static int vfio_dma_unmap_bitmap(const VFIOContainer *container, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) - { -- VFIOContainerBase *bcontainer = &container->bcontainer; -+ const VFIOContainerBase *bcontainer = &container->bcontainer; - struct vfio_iommu_type1_dma_unmap *unmap; - struct vfio_bitmap *bitmap; - VFIOBitmap vbmap; -@@ -117,11 +117,12 @@ unmap_exit: - /* - * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 - */ --static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, -- ram_addr_t size, IOMMUTLBEntry *iotlb) -+static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer, -+ hwaddr iova, ram_addr_t size, -+ IOMMUTLBEntry *iotlb) - { -- VFIOContainer *container = container_of(bcontainer, VFIOContainer, -- bcontainer); -+ const VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - struct vfio_iommu_type1_dma_unmap unmap = { - .argsz = sizeof(unmap), - .flags = 0, -@@ -174,11 +175,11 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, - return 0; - } - --static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, -+static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, - ram_addr_t size, void *vaddr, bool readonly) - { -- VFIOContainer *container = container_of(bcontainer, VFIOContainer, -- bcontainer); -+ const VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - struct vfio_iommu_type1_dma_map map = { - .argsz = sizeof(map), - .flags = VFIO_DMA_MAP_FLAG_READ, -@@ -207,11 +208,12 @@ static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, - return -errno; - } - --static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, -- bool start) -+static int -+vfio_legacy_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, -+ bool start) - { -- VFIOContainer *container = container_of(bcontainer, VFIOContainer, -- bcontainer); -+ const VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - int ret; - struct vfio_iommu_type1_dirty_bitmap dirty = { - .argsz = sizeof(dirty), -@@ -233,12 +235,12 @@ static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, - return ret; - } - --static int vfio_legacy_query_dirty_bitmap(VFIOContainerBase *bcontainer, -+static int vfio_legacy_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, - hwaddr iova, hwaddr size) - { -- VFIOContainer *container = container_of(bcontainer, VFIOContainer, -- bcontainer); -+ const VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - struct vfio_iommu_type1_dirty_bitmap *dbitmap; - struct vfio_iommu_type1_dirty_bitmap_get *range; - int ret; -diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c -index 5accd26484..87a561c545 100644 ---- a/hw/vfio/iommufd.c -+++ b/hw/vfio/iommufd.c -@@ -26,10 +26,10 @@ - #include "qemu/chardev_open.h" - #include "pci.h" - --static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, -+static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, - ram_addr_t size, void *vaddr, bool readonly) - { -- VFIOIOMMUFDContainer *container = -+ const VFIOIOMMUFDContainer *container = - container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); - - return iommufd_backend_map_dma(container->be, -@@ -37,11 +37,11 @@ static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, - iova, size, vaddr, readonly); - } - --static int iommufd_cdev_unmap(VFIOContainerBase *bcontainer, -+static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) - { -- VFIOIOMMUFDContainer *container = -+ const VFIOIOMMUFDContainer *container = - container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); - - /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 697bf24a35..efcba19f66 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -244,13 +244,15 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); - void vfio_migration_exit(VFIODevice *vbasedev); - - int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size); --bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer); --bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer); --int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, -+bool -+vfio_devices_all_running_and_mig_active(const VFIOContainerBase *bcontainer); -+bool -+vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer); -+int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, hwaddr iova, - hwaddr size); --int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, -- uint64_t size, ram_addr_t ram_addr); -+int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova, -+ uint64_t size, ram_addr_t ram_addr); - - /* Returns 0 on success, or a negative errno. */ - int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 45bb19c767..2ae297ccda 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -82,7 +82,7 @@ void vfio_container_del_section_window(VFIOContainerBase *bcontainer, - MemoryRegionSection *section); - int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, - bool start); --int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, -+int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, - hwaddr iova, hwaddr size); - -@@ -93,18 +93,20 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); - - struct VFIOIOMMUOps { - /* basic feature */ -- int (*dma_map)(VFIOContainerBase *bcontainer, -+ int (*dma_map)(const VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly); -- int (*dma_unmap)(VFIOContainerBase *bcontainer, -+ int (*dma_unmap)(const VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb); - int (*attach_device)(const char *name, VFIODevice *vbasedev, - AddressSpace *as, Error **errp); - void (*detach_device)(VFIODevice *vbasedev); - /* migration feature */ -- int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); -- int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, -+ int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer, -+ bool start); -+ int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer, -+ VFIOBitmap *vbmap, - hwaddr iova, hwaddr size); - /* PCI specific */ - int (*pci_hot_reset)(VFIODevice *vbasedev, bool single); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch b/SOURCES/kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch deleted file mode 100644 index ffd8b9f..0000000 --- a/SOURCES/kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 57bdfc821d6f4b4f9c6b1ff05bf0114e5cabc77e Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:13 +0800 -Subject: [PATCH 034/101] vfio/ap: Allow the selection of a given iommu backend -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [33/67] a12bb86e5b627ccf246fb9ce60820595589ff8e5 (eauger1/centos-qemu-kvm) - -Now we support two types of iommu backends, let's add the capability -to select one of them. This depends on whether an iommufd object has -been linked with the vfio-ap device: - -if the user wants to use the legacy backend, it shall not -link the vfio-ap device with any iommufd object: - - -device vfio-ap,sysfsdev=/sys/bus/mdev/devices/XXX - -This is called the legacy mode/backend. - -If the user wants to use the iommufd backend (/dev/iommu) it -shall pass an iommufd object id in the vfio-ap device options: - - -object iommufd,id=iommufd0 - -device vfio-ap,sysfsdev=/sys/bus/mdev/devices/XXX,iommufd=iommufd0 - -Suggested-by: Alex Williamson -Signed-off-by: Zhenzhong Duan -Reviewed-by: Matthew Rosato -Reviewed-by: Cédric Le Goater -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 336f308958d598f3db351bb7d94cc57b4b2d448d) -Signed-off-by: Eric Auger ---- - hw/vfio/ap.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c -index bbf69ff55a..80629609ae 100644 ---- a/hw/vfio/ap.c -+++ b/hw/vfio/ap.c -@@ -11,10 +11,12 @@ - */ - - #include "qemu/osdep.h" -+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ - #include - #include - #include "qapi/error.h" - #include "hw/vfio/vfio-common.h" -+#include "sysemu/iommufd.h" - #include "hw/s390x/ap-device.h" - #include "qemu/error-report.h" - #include "qemu/event_notifier.h" -@@ -204,6 +206,10 @@ static void vfio_ap_unrealize(DeviceState *dev) - - static Property vfio_ap_properties[] = { - DEFINE_PROP_STRING("sysfsdev", VFIOAPDevice, vdev.sysfsdev), -+#ifdef CONFIG_IOMMUFD -+ DEFINE_PROP_LINK("iommufd", VFIOAPDevice, vdev.iommufd, -+ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), -+#endif - DEFINE_PROP_END_OF_LIST(), - }; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch b/SOURCES/kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch deleted file mode 100644 index 1055329..0000000 --- a/SOURCES/kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch +++ /dev/null @@ -1,87 +0,0 @@ -From db09b7c60c01ee75d602261ee959a96fa0d89d68 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:14 +0800 -Subject: [PATCH 035/101] vfio/ap: Make vfio cdev pre-openable by passing a - file handle -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [34/67] aaafa6088a9b0302d53aa539f67792d02ea0f663 (eauger1/centos-qemu-kvm) - -This gives management tools like libvirt a chance to open the vfio -cdev with privilege and pass FD to qemu. This way qemu never needs -to have privilege to open a VFIO or iommu cdev node. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Matthew Rosato -Reviewed-by: Cédric Le Goater -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 5e7ba401b71d18544a3e44b2a58b9e63fd5148d5) -Signed-off-by: Eric Auger ---- - hw/vfio/ap.c | 23 ++++++++++++++++++++++- - 1 file changed, 22 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c -index 80629609ae..f180e4a32a 100644 ---- a/hw/vfio/ap.c -+++ b/hw/vfio/ap.c -@@ -160,7 +160,10 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) - VFIOAPDevice *vapdev = VFIO_AP_DEVICE(dev); - VFIODevice *vbasedev = &vapdev->vdev; - -- vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); -+ if (vfio_device_get_name(vbasedev, errp) < 0) { -+ return; -+ } -+ - vbasedev->ops = &vfio_ap_ops; - vbasedev->type = VFIO_DEVICE_TYPE_AP; - vbasedev->dev = dev; -@@ -230,11 +233,28 @@ static const VMStateDescription vfio_ap_vmstate = { - .unmigratable = 1, - }; - -+static void vfio_ap_instance_init(Object *obj) -+{ -+ VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj); -+ -+ vapdev->vdev.fd = -1; -+} -+ -+#ifdef CONFIG_IOMMUFD -+static void vfio_ap_set_fd(Object *obj, const char *str, Error **errp) -+{ -+ vfio_device_set_fd(&VFIO_AP_DEVICE(obj)->vdev, str, errp); -+} -+#endif -+ - static void vfio_ap_class_init(ObjectClass *klass, void *data) - { - DeviceClass *dc = DEVICE_CLASS(klass); - - device_class_set_props(dc, vfio_ap_properties); -+#ifdef CONFIG_IOMMUFD -+ object_class_property_add_str(klass, "fd", NULL, vfio_ap_set_fd); -+#endif - dc->vmsd = &vfio_ap_vmstate; - dc->desc = "VFIO-based AP device assignment"; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); -@@ -249,6 +269,7 @@ static const TypeInfo vfio_ap_info = { - .name = TYPE_VFIO_AP_DEVICE, - .parent = TYPE_AP_DEVICE, - .instance_size = sizeof(VFIOAPDevice), -+ .instance_init = vfio_ap_instance_init, - .class_init = vfio_ap_class_init, - }; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch b/SOURCES/kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch deleted file mode 100644 index ed60920..0000000 --- a/SOURCES/kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch +++ /dev/null @@ -1,81 +0,0 @@ -From b8630ecb698e31311089ba4e224d5e2c08c8e665 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:23 +0800 -Subject: [PATCH 044/101] vfio/ap: Move VFIODevice initializations in - vfio_ap_instance_init -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [43/67] 95a527f649b28c5c78903e99735107667e8468b1 (eauger1/centos-qemu-kvm) - -Some of the VFIODevice initializations is in vfio_ap_realize, -move all of them in vfio_ap_instance_init. - -No functional change intended. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Eric Farman -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit cbbcc2f1706aa1a08637142744d2f5f6515ac93f) -Signed-off-by: Eric Auger ---- - hw/vfio/ap.c | 26 +++++++++++++------------- - 1 file changed, 13 insertions(+), 13 deletions(-) - -diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c -index f180e4a32a..95fe7cd98b 100644 ---- a/hw/vfio/ap.c -+++ b/hw/vfio/ap.c -@@ -164,18 +164,6 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) - return; - } - -- vbasedev->ops = &vfio_ap_ops; -- vbasedev->type = VFIO_DEVICE_TYPE_AP; -- vbasedev->dev = dev; -- -- /* -- * vfio-ap devices operate in a way compatible with discarding of -- * memory in RAM blocks, as no pages are pinned in the host. -- * This needs to be set before vfio_get_device() for vfio common to -- * handle ram_block_discard_disable(). -- */ -- vapdev->vdev.ram_block_discard_allowed = true; -- - ret = vfio_attach_device(vbasedev->name, vbasedev, - &address_space_memory, errp); - if (ret) { -@@ -236,8 +224,20 @@ static const VMStateDescription vfio_ap_vmstate = { - static void vfio_ap_instance_init(Object *obj) - { - VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj); -+ VFIODevice *vbasedev = &vapdev->vdev; - -- vapdev->vdev.fd = -1; -+ vbasedev->type = VFIO_DEVICE_TYPE_AP; -+ vbasedev->ops = &vfio_ap_ops; -+ vbasedev->dev = DEVICE(vapdev); -+ vbasedev->fd = -1; -+ -+ /* -+ * vfio-ap devices operate in a way compatible with discarding of -+ * memory in RAM blocks, as no pages are pinned in the host. -+ * This needs to be set before vfio_get_device() for vfio common to -+ * handle ram_block_discard_disable(). -+ */ -+ vbasedev->ram_block_discard_allowed = true; - } - - #ifdef CONFIG_IOMMUFD --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch b/SOURCES/kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch deleted file mode 100644 index ff64a91..0000000 --- a/SOURCES/kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 732115c80eb0dd672925a0737e09643d8a889abd Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:15 +0800 -Subject: [PATCH 036/101] vfio/ccw: Allow the selection of a given iommu - backend -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [35/67] 1701de023a9f3b3f0420689bf851e11aee88800d (eauger1/centos-qemu-kvm) - -Now we support two types of iommu backends, let's add the capability -to select one of them. This depends on whether an iommufd object has -been linked with the vfio-ccw device: - -If the user wants to use the legacy backend, it shall not -link the vfio-ccw device with any iommufd object: - - -device vfio-ccw,sysfsdev=/sys/bus/mdev/devices/XXX - -This is called the legacy mode/backend. - -If the user wants to use the iommufd backend (/dev/iommu) it -shall pass an iommufd object id in the vfio-ccw device options: - - -object iommufd,id=iommufd0 - -device vfio-ccw,sysfsdev=/sys/bus/mdev/devices/XXX,iommufd=iommufd0 - -Suggested-by: Alex Williamson -Signed-off-by: Zhenzhong Duan -Reviewed-by: Matthew Rosato -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Farman -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit e70f971a6c1230138843d7ab82267e4a5aaf6bda) -Signed-off-by: Eric Auger ---- - hw/vfio/ccw.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c -index d857bb8d0f..d2d58bb677 100644 ---- a/hw/vfio/ccw.c -+++ b/hw/vfio/ccw.c -@@ -15,12 +15,14 @@ - */ - - #include "qemu/osdep.h" -+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ - #include - #include - #include - - #include "qapi/error.h" - #include "hw/vfio/vfio-common.h" -+#include "sysemu/iommufd.h" - #include "hw/s390x/s390-ccw.h" - #include "hw/s390x/vfio-ccw.h" - #include "hw/qdev-properties.h" -@@ -677,6 +679,10 @@ static void vfio_ccw_unrealize(DeviceState *dev) - static Property vfio_ccw_properties[] = { - DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev), - DEFINE_PROP_BOOL("force-orb-pfch", VFIOCCWDevice, force_orb_pfch, false), -+#ifdef CONFIG_IOMMUFD -+ DEFINE_PROP_LINK("iommufd", VFIOCCWDevice, vdev.iommufd, -+ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), -+#endif - DEFINE_PROP_END_OF_LIST(), - }; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch b/SOURCES/kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch deleted file mode 100644 index 6c91d85..0000000 --- a/SOURCES/kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 0ff08afdec19f4decaf750fa7d158e0ea498ff28 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:16 +0800 -Subject: [PATCH 037/101] vfio/ccw: Make vfio cdev pre-openable by passing a - file handle -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [36/67] cc0d8f51cffa5d5a7aebc2334b908b9877179ae7 (eauger1/centos-qemu-kvm) - -This gives management tools like libvirt a chance to open the vfio -cdev with privilege and pass FD to qemu. This way qemu never needs -to have privilege to open a VFIO or iommu cdev node. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Matthew Rosato -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Farman -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 909a6254edaa8d0b0e3f1c0a623862e73d1842e9) -Signed-off-by: Eric Auger ---- - hw/vfio/ccw.c | 25 ++++++++++++++++++++++--- - 1 file changed, 22 insertions(+), 3 deletions(-) - -diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c -index d2d58bb677..2afdf17dbe 100644 ---- a/hw/vfio/ccw.c -+++ b/hw/vfio/ccw.c -@@ -590,11 +590,12 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) - } - } - -+ if (vfio_device_get_name(vbasedev, errp) < 0) { -+ return; -+ } -+ - vbasedev->ops = &vfio_ccw_ops; - vbasedev->type = VFIO_DEVICE_TYPE_CCW; -- vbasedev->name = g_strdup_printf("%x.%x.%04x", vcdev->cdev.hostid.cssid, -- vcdev->cdev.hostid.ssid, -- vcdev->cdev.hostid.devid); - vbasedev->dev = dev; - - /* -@@ -691,12 +692,29 @@ static const VMStateDescription vfio_ccw_vmstate = { - .unmigratable = 1, - }; - -+static void vfio_ccw_instance_init(Object *obj) -+{ -+ VFIOCCWDevice *vcdev = VFIO_CCW(obj); -+ -+ vcdev->vdev.fd = -1; -+} -+ -+#ifdef CONFIG_IOMMUFD -+static void vfio_ccw_set_fd(Object *obj, const char *str, Error **errp) -+{ -+ vfio_device_set_fd(&VFIO_CCW(obj)->vdev, str, errp); -+} -+#endif -+ - static void vfio_ccw_class_init(ObjectClass *klass, void *data) - { - DeviceClass *dc = DEVICE_CLASS(klass); - S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass); - - device_class_set_props(dc, vfio_ccw_properties); -+#ifdef CONFIG_IOMMUFD -+ object_class_property_add_str(klass, "fd", NULL, vfio_ccw_set_fd); -+#endif - dc->vmsd = &vfio_ccw_vmstate; - dc->desc = "VFIO-based subchannel assignment"; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); -@@ -714,6 +732,7 @@ static const TypeInfo vfio_ccw_info = { - .name = TYPE_VFIO_CCW, - .parent = TYPE_S390_CCW, - .instance_size = sizeof(VFIOCCWDevice), -+ .instance_init = vfio_ccw_instance_init, - .class_init = vfio_ccw_class_init, - }; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch b/SOURCES/kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch deleted file mode 100644 index 95b85f9..0000000 --- a/SOURCES/kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 2ef1c050722115247962e3cd4d8fcf73727e597e Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:24 +0800 -Subject: [PATCH 045/101] vfio/ccw: Move VFIODevice initializations in - vfio_ccw_instance_init -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [44/67] 3345ed58f491aba8fd51bcc172af267ae53e6c8c (eauger1/centos-qemu-kvm) - -Some of the VFIODevice initializations is in vfio_ccw_realize, -move all of them in vfio_ccw_instance_init. - -No functional change intended. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Eric Farman -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit c12b55ad6f9d3b4792b590e9211bd7319e4a2d70) -Signed-off-by: Eric Auger ---- - hw/vfio/ccw.c | 30 +++++++++++++++--------------- - 1 file changed, 15 insertions(+), 15 deletions(-) - -diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c -index 2afdf17dbe..6305a4c1b8 100644 ---- a/hw/vfio/ccw.c -+++ b/hw/vfio/ccw.c -@@ -594,20 +594,6 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) - return; - } - -- vbasedev->ops = &vfio_ccw_ops; -- vbasedev->type = VFIO_DEVICE_TYPE_CCW; -- vbasedev->dev = dev; -- -- /* -- * All vfio-ccw devices are believed to operate in a way compatible with -- * discarding of memory in RAM blocks, ie. pages pinned in the host are -- * in the current working set of the guest driver and therefore never -- * overlap e.g., with pages available to the guest balloon driver. This -- * needs to be set before vfio_get_device() for vfio common to handle -- * ram_block_discard_disable(). -- */ -- vbasedev->ram_block_discard_allowed = true; -- - ret = vfio_attach_device(cdev->mdevid, vbasedev, - &address_space_memory, errp); - if (ret) { -@@ -695,8 +681,22 @@ static const VMStateDescription vfio_ccw_vmstate = { - static void vfio_ccw_instance_init(Object *obj) - { - VFIOCCWDevice *vcdev = VFIO_CCW(obj); -+ VFIODevice *vbasedev = &vcdev->vdev; -+ -+ vbasedev->type = VFIO_DEVICE_TYPE_CCW; -+ vbasedev->ops = &vfio_ccw_ops; -+ vbasedev->dev = DEVICE(vcdev); -+ vbasedev->fd = -1; - -- vcdev->vdev.fd = -1; -+ /* -+ * All vfio-ccw devices are believed to operate in a way compatible with -+ * discarding of memory in RAM blocks, ie. pages pinned in the host are -+ * in the current working set of the guest driver and therefore never -+ * overlap e.g., with pages available to the guest balloon driver. This -+ * needs to be set before vfio_get_device() for vfio common to handle -+ * ram_block_discard_disable(). -+ */ -+ vbasedev->ram_block_discard_allowed = true; - } - - #ifdef CONFIG_IOMMUFD --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch b/SOURCES/kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch deleted file mode 100644 index 8615b6d..0000000 --- a/SOURCES/kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch +++ /dev/null @@ -1,98 +0,0 @@ -From 7de36998dd6177380e46b8c5f3a91c3fad75483c Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:30 +0800 -Subject: [PATCH 005/101] vfio/common: Introduce vfio_container_init/destroy - helper -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [4/67] 8287f687ef19cd84afede1e8f3b16ac3caf29a1d (eauger1/centos-qemu-kvm) - -This adds two helper functions vfio_container_init/destroy which will be -used by both legacy and iommufd containers to do base container specific -initialization and release. - -No functional change intended. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit ed2f7f80170251e7cdd2965a13ee97527d1fbec8) -Signed-off-by: Eric Auger ---- - hw/vfio/container-base.c | 9 +++++++++ - hw/vfio/container.c | 4 +++- - include/hw/vfio/vfio-container-base.h | 4 ++++ - 3 files changed, 16 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index 55d3a35fa4..e929435751 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -30,3 +30,12 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, - g_assert(bcontainer->ops->dma_unmap); - return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); - } -+ -+void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUOps *ops) -+{ -+ bcontainer->ops = ops; -+} -+ -+void vfio_container_destroy(VFIOContainerBase *bcontainer) -+{ -+} -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index c04df26323..32a0251dd1 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -559,7 +559,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - QLIST_INIT(&container->giommu_list); - QLIST_INIT(&container->vrdl_list); - bcontainer = &container->bcontainer; -- bcontainer->ops = &vfio_legacy_ops; -+ vfio_container_init(bcontainer, &vfio_legacy_ops); - - ret = vfio_init_container(container, group->fd, errp); - if (ret) { -@@ -661,6 +661,7 @@ put_space_exit: - static void vfio_disconnect_container(VFIOGroup *group) - { - VFIOContainer *container = group->container; -+ VFIOContainerBase *bcontainer = &container->bcontainer; - - QLIST_REMOVE(group, container_next); - group->container = NULL; -@@ -695,6 +696,7 @@ static void vfio_disconnect_container(VFIOGroup *group) - QLIST_REMOVE(giommu, giommu_next); - g_free(giommu); - } -+ vfio_container_destroy(bcontainer); - - trace_vfio_disconnect_container(container->fd); - close(container->fd); -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 56b033f59f..577f52ccbc 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -38,6 +38,10 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb); - -+void vfio_container_init(VFIOContainerBase *bcontainer, -+ const VFIOIOMMUOps *ops); -+void vfio_container_destroy(VFIOContainerBase *bcontainer); -+ - struct VFIOIOMMUOps { - /* basic feature */ - int (*dma_map)(VFIOContainerBase *bcontainer, --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-common-Move-giommu_list-in-base-container.patch b/SOURCES/kvm-vfio-common-Move-giommu_list-in-base-container.patch deleted file mode 100644 index eec555b..0000000 --- a/SOURCES/kvm-vfio-common-Move-giommu_list-in-base-container.patch +++ /dev/null @@ -1,221 +0,0 @@ -From 36f4005c3dbb4c8b63a975494c75281de51c25f9 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 2 Nov 2023 15:12:31 +0800 -Subject: [PATCH 006/101] vfio/common: Move giommu_list in base container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [5/67] ba5898e96c16c7f6e8108ae461b454d3c8c35404 (eauger1/centos-qemu-kvm) - -Move the giommu_list field in the base container and store -the base container in the VFIOGuestIOMMU. - -No functional change intended. - -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Yi Sun -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit dddf83ab99eb832c449249397a1c302c6ed746bf) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 17 +++++++++++------ - hw/vfio/container-base.c | 9 +++++++++ - hw/vfio/container.c | 8 -------- - include/hw/vfio/vfio-common.h | 9 --------- - include/hw/vfio/vfio-container-base.h | 9 +++++++++ - 5 files changed, 29 insertions(+), 23 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index e610771888..43580bcc43 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -292,7 +292,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, - static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - { - VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); -- VFIOContainerBase *bcontainer = &giommu->container->bcontainer; -+ VFIOContainerBase *bcontainer = giommu->bcontainer; - hwaddr iova = iotlb->iova + giommu->iommu_offset; - void *vaddr; - int ret; -@@ -569,6 +569,7 @@ static void vfio_listener_region_add(MemoryListener *listener, - MemoryRegionSection *section) - { - VFIOContainer *container = container_of(listener, VFIOContainer, listener); -+ VFIOContainerBase *bcontainer = &container->bcontainer; - hwaddr iova, end; - Int128 llend, llsize; - void *vaddr; -@@ -612,7 +613,7 @@ static void vfio_listener_region_add(MemoryListener *listener, - giommu->iommu_mr = iommu_mr; - giommu->iommu_offset = section->offset_within_address_space - - section->offset_within_region; -- giommu->container = container; -+ giommu->bcontainer = bcontainer; - llend = int128_add(int128_make64(section->offset_within_region), - section->size); - llend = int128_sub(llend, int128_one()); -@@ -647,7 +648,7 @@ static void vfio_listener_region_add(MemoryListener *listener, - g_free(giommu); - goto fail; - } -- QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); -+ QLIST_INSERT_HEAD(&bcontainer->giommu_list, giommu, giommu_next); - memory_region_iommu_replay(giommu->iommu_mr, &giommu->n); - - return; -@@ -732,6 +733,7 @@ static void vfio_listener_region_del(MemoryListener *listener, - MemoryRegionSection *section) - { - VFIOContainer *container = container_of(listener, VFIOContainer, listener); -+ VFIOContainerBase *bcontainer = &container->bcontainer; - hwaddr iova, end; - Int128 llend, llsize; - int ret; -@@ -744,7 +746,7 @@ static void vfio_listener_region_del(MemoryListener *listener, - if (memory_region_is_iommu(section->mr)) { - VFIOGuestIOMMU *giommu; - -- QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { -+ QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { - if (MEMORY_REGION(giommu->iommu_mr) == section->mr && - giommu->n.start == section->offset_within_region) { - memory_region_unregister_iommu_notifier(section->mr, -@@ -1206,7 +1208,9 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - vfio_giommu_dirty_notifier *gdn = container_of(n, - vfio_giommu_dirty_notifier, n); - VFIOGuestIOMMU *giommu = gdn->giommu; -- VFIOContainer *container = giommu->container; -+ VFIOContainerBase *bcontainer = giommu->bcontainer; -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - hwaddr iova = iotlb->iova + giommu->iommu_offset; - ram_addr_t translated_addr; - int ret = -EINVAL; -@@ -1284,12 +1288,13 @@ static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container, - static int vfio_sync_dirty_bitmap(VFIOContainer *container, - MemoryRegionSection *section) - { -+ VFIOContainerBase *bcontainer = &container->bcontainer; - ram_addr_t ram_addr; - - if (memory_region_is_iommu(section->mr)) { - VFIOGuestIOMMU *giommu; - -- QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { -+ QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { - if (MEMORY_REGION(giommu->iommu_mr) == section->mr && - giommu->n.start == section->offset_within_region) { - Int128 llend; -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index e929435751..20bcb9669a 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -34,8 +34,17 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, - void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUOps *ops) - { - bcontainer->ops = ops; -+ QLIST_INIT(&bcontainer->giommu_list); - } - - void vfio_container_destroy(VFIOContainerBase *bcontainer) - { -+ VFIOGuestIOMMU *giommu, *tmp; -+ -+ QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) { -+ memory_region_unregister_iommu_notifier( -+ MEMORY_REGION(giommu->iommu_mr), &giommu->n); -+ QLIST_REMOVE(giommu, giommu_next); -+ g_free(giommu); -+ } - } -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 32a0251dd1..133d3c8f5c 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -556,7 +556,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - container->dirty_pages_supported = false; - container->dma_max_mappings = 0; - container->iova_ranges = NULL; -- QLIST_INIT(&container->giommu_list); - QLIST_INIT(&container->vrdl_list); - bcontainer = &container->bcontainer; - vfio_container_init(bcontainer, &vfio_legacy_ops); -@@ -686,16 +685,9 @@ static void vfio_disconnect_container(VFIOGroup *group) - - if (QLIST_EMPTY(&container->group_list)) { - VFIOAddressSpace *space = container->space; -- VFIOGuestIOMMU *giommu, *tmp; - - QLIST_REMOVE(container, next); - -- QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) { -- memory_region_unregister_iommu_notifier( -- MEMORY_REGION(giommu->iommu_mr), &giommu->n); -- QLIST_REMOVE(giommu, giommu_next); -- g_free(giommu); -- } - vfio_container_destroy(bcontainer); - - trace_vfio_disconnect_container(container->fd); -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 24a26345e5..6be082b8f2 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -95,7 +95,6 @@ typedef struct VFIOContainer { - uint64_t max_dirty_bitmap_size; - unsigned long pgsizes; - unsigned int dma_max_mappings; -- QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - QLIST_HEAD(, VFIOGroup) group_list; - QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; -@@ -104,14 +103,6 @@ typedef struct VFIOContainer { - GList *iova_ranges; - } VFIOContainer; - --typedef struct VFIOGuestIOMMU { -- VFIOContainer *container; -- IOMMUMemoryRegion *iommu_mr; -- hwaddr iommu_offset; -- IOMMUNotifier n; -- QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; --} VFIOGuestIOMMU; -- - typedef struct VFIORamDiscardListener { - VFIOContainer *container; - MemoryRegion *mr; -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 577f52ccbc..a11aec5755 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -29,8 +29,17 @@ typedef struct { - */ - typedef struct VFIOContainerBase { - const VFIOIOMMUOps *ops; -+ QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; - } VFIOContainerBase; - -+typedef struct VFIOGuestIOMMU { -+ VFIOContainerBase *bcontainer; -+ IOMMUMemoryRegion *iommu_mr; -+ hwaddr iommu_offset; -+ IOMMUNotifier n; -+ QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; -+} VFIOGuestIOMMU; -+ - int vfio_container_dma_map(VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-common-return-early-if-space-isn-t-empty.patch b/SOURCES/kvm-vfio-common-return-early-if-space-isn-t-empty.patch deleted file mode 100644 index 261807a..0000000 --- a/SOURCES/kvm-vfio-common-return-early-if-space-isn-t-empty.patch +++ /dev/null @@ -1,55 +0,0 @@ -From e9476ee64edd81fafd409fb3ceaad80668446bff Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:02 +0800 -Subject: [PATCH 023/101] vfio/common: return early if space isn't empty -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [22/67] 239c21ae7cddc8efabc041b9c7774f15b4964631 (eauger1/centos-qemu-kvm) - -This is a trivial optimization. If there is active container in space, -vfio_reset_handler will never be unregistered. So revert the check of -space->containers and return early. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 1eae5b7bd3ddd03b5591e9122b011c6520064a5a) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 9 ++++++--- - 1 file changed, 6 insertions(+), 3 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 572ae7c934..934f4f5446 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1462,10 +1462,13 @@ VFIOAddressSpace *vfio_get_address_space(AddressSpace *as) - - void vfio_put_address_space(VFIOAddressSpace *space) - { -- if (QLIST_EMPTY(&space->containers)) { -- QLIST_REMOVE(space, list); -- g_free(space); -+ if (!QLIST_EMPTY(&space->containers)) { -+ return; - } -+ -+ QLIST_REMOVE(space, list); -+ g_free(space); -+ - if (QLIST_EMPTY(&vfio_address_spaces)) { - qemu_unregister_reset(vfio_reset_handler, NULL); - } --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Convert-functions-to-base-container.patch b/SOURCES/kvm-vfio-container-Convert-functions-to-base-container.patch deleted file mode 100644 index 62caf8a..0000000 --- a/SOURCES/kvm-vfio-container-Convert-functions-to-base-container.patch +++ /dev/null @@ -1,257 +0,0 @@ -From facad966c42b1ec38b12e45f2b84bd059542b60c Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 2 Nov 2023 15:12:35 +0800 -Subject: [PATCH 010/101] vfio/container: Convert functions to base container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [9/67] a0002d6e9cb0ca76e3e2f25208ecba22dd9f9a88 (eauger1/centos-qemu-kvm) - -In the prospect to get rid of VFIOContainer refs -in common.c lets convert misc functions to use the base -container object instead: - -vfio_devices_all_dirty_tracking -vfio_devices_all_device_dirty_tracking -vfio_devices_all_running_and_mig_active -vfio_devices_query_dirty_bitmap -vfio_get_dirty_bitmap - -Signed-off-by: Eric Auger -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit e1cac6b203f45b5322e831e8d50edfdf18609b09) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 42 +++++++++++++++-------------------- - hw/vfio/container.c | 6 ++--- - hw/vfio/trace-events | 2 +- - include/hw/vfio/vfio-common.h | 9 ++++---- - 4 files changed, 26 insertions(+), 33 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 9415395ed9..cf6618f6ed 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -177,9 +177,8 @@ bool vfio_device_state_is_precopy(VFIODevice *vbasedev) - migration->device_state == VFIO_DEVICE_STATE_PRE_COPY_P2P; - } - --static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) -+static bool vfio_devices_all_dirty_tracking(VFIOContainerBase *bcontainer) - { -- VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - MigrationState *ms = migrate_get_current(); - -@@ -204,9 +203,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) - return true; - } - --bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) -+bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer) - { -- VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - - QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { -@@ -222,9 +220,8 @@ bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) - * Check if all VFIO devices are running and migration is active, which is - * essentially equivalent to the migration being in pre-copy phase. - */ --bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) -+bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer) - { -- VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - - if (!migration_is_active(migrate_get_current())) { -@@ -1082,7 +1079,7 @@ static void vfio_listener_log_global_start(MemoryListener *listener) - VFIOContainer *container = container_of(listener, VFIOContainer, listener); - int ret; - -- if (vfio_devices_all_device_dirty_tracking(container)) { -+ if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { - ret = vfio_devices_dma_logging_start(container); - } else { - ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, -@@ -1101,7 +1098,7 @@ static void vfio_listener_log_global_stop(MemoryListener *listener) - VFIOContainer *container = container_of(listener, VFIOContainer, listener); - int ret = 0; - -- if (vfio_devices_all_device_dirty_tracking(container)) { -+ if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { - vfio_devices_dma_logging_stop(container); - } else { - ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, -@@ -1141,11 +1138,10 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, - return 0; - } - --int vfio_devices_query_dirty_bitmap(VFIOContainer *container, -+int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, hwaddr iova, - hwaddr size) - { -- VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - int ret; - -@@ -1165,17 +1161,16 @@ int vfio_devices_query_dirty_bitmap(VFIOContainer *container, - return 0; - } - --int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, -+int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, - uint64_t size, ram_addr_t ram_addr) - { - bool all_device_dirty_tracking = -- vfio_devices_all_device_dirty_tracking(container); -+ vfio_devices_all_device_dirty_tracking(bcontainer); - uint64_t dirty_pages; - VFIOBitmap vbmap; - int ret; - -- if (!container->bcontainer.dirty_pages_supported && -- !all_device_dirty_tracking) { -+ if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) { - cpu_physical_memory_set_dirty_range(ram_addr, size, - tcg_enabled() ? DIRTY_CLIENTS_ALL : - DIRTY_CLIENTS_NOCODE); -@@ -1188,10 +1183,9 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, - } - - if (all_device_dirty_tracking) { -- ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size); -+ ret = vfio_devices_query_dirty_bitmap(bcontainer, &vbmap, iova, size); - } else { -- ret = vfio_container_query_dirty_bitmap(&container->bcontainer, &vbmap, -- iova, size); -+ ret = vfio_container_query_dirty_bitmap(bcontainer, &vbmap, iova, size); - } - - if (ret) { -@@ -1201,8 +1195,7 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, - dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr, - vbmap.pages); - -- trace_vfio_get_dirty_bitmap(container->fd, iova, size, vbmap.size, -- ram_addr, dirty_pages); -+ trace_vfio_get_dirty_bitmap(iova, size, vbmap.size, ram_addr, dirty_pages); - out: - g_free(vbmap.bitmap); - -@@ -1236,8 +1229,8 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - - rcu_read_lock(); - if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) { -- ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1, -- translated_addr); -+ ret = vfio_get_dirty_bitmap(&container->bcontainer, iova, -+ iotlb->addr_mask + 1, translated_addr); - if (ret) { - error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%s)", -@@ -1266,7 +1259,8 @@ static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section, - * Sync the whole mapped region (spanning multiple individual mappings) - * in one go. - */ -- return vfio_get_dirty_bitmap(vrdl->container, iova, size, ram_addr); -+ return vfio_get_dirty_bitmap(&vrdl->container->bcontainer, iova, size, -+ ram_addr); - } - - static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container, -@@ -1335,7 +1329,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, - ram_addr = memory_region_get_ram_addr(section->mr) + - section->offset_within_region; - -- return vfio_get_dirty_bitmap(container, -+ return vfio_get_dirty_bitmap(&container->bcontainer, - REAL_HOST_PAGE_ALIGN(section->offset_within_address_space), - int128_get64(section->size), ram_addr); - } -@@ -1350,7 +1344,7 @@ static void vfio_listener_log_sync(MemoryListener *listener, - return; - } - -- if (vfio_devices_all_dirty_tracking(container)) { -+ if (vfio_devices_all_dirty_tracking(&container->bcontainer)) { - ret = vfio_sync_dirty_bitmap(container, section); - if (ret) { - error_report("vfio: Failed to sync dirty bitmap, err: %d (%s)", ret, -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 63a906de93..7bd81eab09 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -129,8 +129,8 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, - bool need_dirty_sync = false; - int ret; - -- if (iotlb && vfio_devices_all_running_and_mig_active(container)) { -- if (!vfio_devices_all_device_dirty_tracking(container) && -+ if (iotlb && vfio_devices_all_running_and_mig_active(bcontainer)) { -+ if (!vfio_devices_all_device_dirty_tracking(bcontainer) && - container->bcontainer.dirty_pages_supported) { - return vfio_dma_unmap_bitmap(container, iova, size, iotlb); - } -@@ -162,7 +162,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, - } - - if (need_dirty_sync) { -- ret = vfio_get_dirty_bitmap(container, iova, size, -+ ret = vfio_get_dirty_bitmap(bcontainer, iova, size, - iotlb->translated_addr); - if (ret) { - return ret; -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 9f7fedee98..08a1f9dfa4 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -117,7 +117,7 @@ vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Devic - vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" - vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x" - vfio_legacy_dma_unmap_overflow_workaround(void) "" --vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64 -+vfio_get_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64 - vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 - - # platform.c -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 9740cf9fbc..bc67e1316c 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -186,7 +186,6 @@ typedef struct VFIODisplay { - - VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); - void vfio_put_address_space(VFIOAddressSpace *space); --bool vfio_devices_all_running_and_saving(VFIOContainer *container); - - /* SPAPR specific */ - int vfio_container_add_section_window(VFIOContainer *container, -@@ -260,11 +259,11 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); - void vfio_migration_exit(VFIODevice *vbasedev); - - int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size); --bool vfio_devices_all_running_and_mig_active(VFIOContainer *container); --bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container); --int vfio_devices_query_dirty_bitmap(VFIOContainer *container, -+bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer); -+bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer); -+int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, hwaddr iova, - hwaddr size); --int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, -+int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, - uint64_t size, ram_addr_t ram_addr); - #endif /* HW_VFIO_VFIO_COMMON_H */ --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Implement-attach-detach_device.patch b/SOURCES/kvm-vfio-container-Implement-attach-detach_device.patch deleted file mode 100644 index 92e9a38..0000000 --- a/SOURCES/kvm-vfio-container-Implement-attach-detach_device.patch +++ /dev/null @@ -1,97 +0,0 @@ -From a5d19bfbfddb36fa6d68ca6282a5acd9b245d48a Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 2 Nov 2023 15:12:41 +0800 -Subject: [PATCH 016/101] vfio/container: Implement attach/detach_device -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [15/67] e233c90e4af2061dc0612bc1b1d17be1a47daeae (eauger1/centos-qemu-kvm) - -No functional change intended. - -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Yi Sun -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit 1eb31f13b24c49884d8256f96a6664df2dd0824d) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 16 ++++++++++++++++ - hw/vfio/container.c | 12 +++++------- - 2 files changed, 21 insertions(+), 7 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 8ef2e7967d..483ba82089 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1498,3 +1498,19 @@ retry: - - return info; - } -+ -+int vfio_attach_device(char *name, VFIODevice *vbasedev, -+ AddressSpace *as, Error **errp) -+{ -+ const VFIOIOMMUOps *ops = &vfio_legacy_ops; -+ -+ return ops->attach_device(name, vbasedev, as, errp); -+} -+ -+void vfio_detach_device(VFIODevice *vbasedev) -+{ -+ if (!vbasedev->bcontainer) { -+ return; -+ } -+ vbasedev->bcontainer->ops->detach_device(vbasedev); -+} -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 721c0d7375..6bacf38222 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -873,8 +873,8 @@ static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) - * @name and @vbasedev->name are likely to be different depending - * on the type of the device, hence the need for passing @name - */ --int vfio_attach_device(char *name, VFIODevice *vbasedev, -- AddressSpace *as, Error **errp) -+static int vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev, -+ AddressSpace *as, Error **errp) - { - int groupid = vfio_device_groupid(vbasedev, errp); - VFIODevice *vbasedev_iter; -@@ -914,14 +914,10 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, - return ret; - } - --void vfio_detach_device(VFIODevice *vbasedev) -+static void vfio_legacy_detach_device(VFIODevice *vbasedev) - { - VFIOGroup *group = vbasedev->group; - -- if (!vbasedev->bcontainer) { -- return; -- } -- - QLIST_REMOVE(vbasedev, global_next); - QLIST_REMOVE(vbasedev, container_next); - vbasedev->bcontainer = NULL; -@@ -933,6 +929,8 @@ void vfio_detach_device(VFIODevice *vbasedev) - const VFIOIOMMUOps vfio_legacy_ops = { - .dma_map = vfio_legacy_dma_map, - .dma_unmap = vfio_legacy_dma_unmap, -+ .attach_device = vfio_legacy_attach_device, -+ .detach_device = vfio_legacy_detach_device, - .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, - .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, - }; --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch b/SOURCES/kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch deleted file mode 100644 index 42b406b..0000000 --- a/SOURCES/kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch +++ /dev/null @@ -1,65 +0,0 @@ -From c3c9f366c356032fa57ff7cc664732ba87ceb3fb Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:18 +0100 -Subject: [PATCH 051/101] vfio/container: Initialize VFIOIOMMUOps under - vfio_init_container() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [50/67] f325136391b22babadb1be3394c527deecdcd3ca (eauger1/centos-qemu-kvm) - -vfio_init_container() already defines the IOMMU type of the container. -Do the same for the VFIOIOMMUOps struct. This prepares ground for the -following patches that will deduce the associated VFIOIOMMUOps struct -from the IOMMU type. - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit bffe92af0e7571868d47a1d1cd2205e13054d492) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index afcfe80488..f4a0434a52 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -370,7 +370,7 @@ static int vfio_get_iommu_type(VFIOContainer *container, - } - - static int vfio_init_container(VFIOContainer *container, int group_fd, -- Error **errp) -+ VFIOAddressSpace *space, Error **errp) - { - int iommu_type, ret; - -@@ -401,6 +401,7 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, - } - - container->iommu_type = iommu_type; -+ vfio_container_init(&container->bcontainer, space, &vfio_legacy_ops); - return 0; - } - -@@ -583,9 +584,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - container = g_malloc0(sizeof(*container)); - container->fd = fd; - bcontainer = &container->bcontainer; -- vfio_container_init(bcontainer, space, &vfio_legacy_ops); - -- ret = vfio_init_container(container, group->fd, errp); -+ ret = vfio_init_container(container, group->fd, space, errp); - if (ret) { - goto free_container_exit; - } --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch b/SOURCES/kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch deleted file mode 100644 index 3411ecb..0000000 --- a/SOURCES/kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 29f13011e62f5370ef7fb3248dc85c90ae5bb042 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:21 +0100 -Subject: [PATCH 054/101] vfio/container: Intoduce a new VFIOIOMMUClass::setup - handler -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [53/67] 8641161afc33d68795bcf51a47e89061b34d50a8 (eauger1/centos-qemu-kvm) - -This will help in converting the sPAPR IOMMU backend to a QOM interface. - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit 61d893f2cdb34a2b0255f9b5fbba6b49b94ff730) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 1 + - include/hw/vfio/vfio-container-base.h | 1 + - 2 files changed, 2 insertions(+) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 220e838a91..c22bdd3216 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -1129,6 +1129,7 @@ static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data) - { - VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); - -+ vioc->setup = vfio_legacy_setup; - vioc->dma_map = vfio_legacy_dma_map; - vioc->dma_unmap = vfio_legacy_dma_unmap; - vioc->attach_device = vfio_legacy_attach_device; -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index c60370fc5e..ce8b1fba88 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -109,6 +109,7 @@ struct VFIOIOMMUClass { - InterfaceClass parent_class; - - /* basic feature */ -+ int (*setup)(VFIOContainerBase *bcontainer, Error **errp); - int (*dma_map)(const VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch b/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch deleted file mode 100644 index 7139e64..0000000 --- a/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch +++ /dev/null @@ -1,143 +0,0 @@ -From 5b63e4595e106196ef922b7f762c8f4150d73979 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:19 +0100 -Subject: [PATCH 052/101] vfio/container: Introduce a VFIOIOMMU QOM interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [51/67] 7c06e2165efe94dcd203d44e422a7aa9fac9816c (eauger1/centos-qemu-kvm) - -VFIOContainerBase was not introduced as an abstract QOM object because -it felt unnecessary to expose all the IOMMU backends to the QEMU -machine and human interface. However, we can still abstract the IOMMU -backend handlers using a QOM interface class. This provides more -flexibility when referencing the various implementations. - -Simply transform the VFIOIOMMUOps struct in an InterfaceClass and do -some initial name replacements. Next changes will start converting -VFIOIOMMUOps. - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit fdaa774e67435a328c0e28006c4d749f2198294a) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 2 +- - hw/vfio/container-base.c | 12 +++++++++++- - hw/vfio/pci.c | 2 +- - include/hw/vfio/vfio-container-base.h | 23 +++++++++++++++++++---- - 4 files changed, 32 insertions(+), 7 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 08a3e57672..49dab41566 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1503,7 +1503,7 @@ retry: - int vfio_attach_device(char *name, VFIODevice *vbasedev, - AddressSpace *as, Error **errp) - { -- const VFIOIOMMUOps *ops = &vfio_legacy_ops; -+ const VFIOIOMMUClass *ops = &vfio_legacy_ops; - - #ifdef CONFIG_IOMMUFD - if (vbasedev->iommufd) { -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index 1ffd25bbfa..913ae49077 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -72,7 +72,7 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - } - - void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, -- const VFIOIOMMUOps *ops) -+ const VFIOIOMMUClass *ops) - { - bcontainer->ops = ops; - bcontainer->space = space; -@@ -99,3 +99,13 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer) - - g_list_free_full(bcontainer->iova_ranges, g_free); - } -+ -+static const TypeInfo types[] = { -+ { -+ .name = TYPE_VFIO_IOMMU, -+ .parent = TYPE_INTERFACE, -+ .class_size = sizeof(VFIOIOMMUClass), -+ }, -+}; -+ -+DEFINE_TYPES(types) -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 83c3238608..adb7c09367 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2491,7 +2491,7 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, - static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) - { - VFIODevice *vbasedev = &vdev->vbasedev; -- const VFIOIOMMUOps *ops = vbasedev->bcontainer->ops; -+ const VFIOIOMMUClass *ops = vbasedev->bcontainer->ops; - - return ops->pci_hot_reset(vbasedev, single); - } -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 5c9594b6c7..d6147b4aee 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -16,7 +16,8 @@ - #include "exec/memory.h" - - typedef struct VFIODevice VFIODevice; --typedef struct VFIOIOMMUOps VFIOIOMMUOps; -+typedef struct VFIOIOMMUClass VFIOIOMMUClass; -+#define VFIOIOMMUOps VFIOIOMMUClass /* To remove */ - - typedef struct { - unsigned long *bitmap; -@@ -34,7 +35,7 @@ typedef struct VFIOAddressSpace { - * This is the base object for vfio container backends - */ - typedef struct VFIOContainerBase { -- const VFIOIOMMUOps *ops; -+ const VFIOIOMMUClass *ops; - VFIOAddressSpace *space; - MemoryListener listener; - Error *error; -@@ -88,10 +89,24 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - - void vfio_container_init(VFIOContainerBase *bcontainer, - VFIOAddressSpace *space, -- const VFIOIOMMUOps *ops); -+ const VFIOIOMMUClass *ops); - void vfio_container_destroy(VFIOContainerBase *bcontainer); - --struct VFIOIOMMUOps { -+ -+#define TYPE_VFIO_IOMMU "vfio-iommu" -+ -+/* -+ * VFIOContainerBase is not an abstract QOM object because it felt -+ * unnecessary to expose all the IOMMU backends to the QEMU machine -+ * and human interface. However, we can still abstract the IOMMU -+ * backend handlers using a QOM interface class. This provides more -+ * flexibility when referencing the various implementations. -+ */ -+DECLARE_CLASS_CHECKERS(VFIOIOMMUClass, VFIO_IOMMU, TYPE_VFIO_IOMMU) -+ -+struct VFIOIOMMUClass { -+ InterfaceClass parent_class; -+ - /* basic feature */ - int (*dma_map)(const VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch b/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch deleted file mode 100644 index 60439ff..0000000 --- a/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch +++ /dev/null @@ -1,168 +0,0 @@ -From 58927bf236541b9423f855eca1970f7a3cf864a9 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:20 +0100 -Subject: [PATCH 053/101] vfio/container: Introduce a VFIOIOMMU legacy QOM - interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [52/67] a81f39d13305e84699313e17ae64d10ff4b09067 (eauger1/centos-qemu-kvm) - -Convert the legacy VFIOIOMMUOps struct to the new VFIOIOMMU QOM -interface. The set of of operations for this backend can be referenced -with a literal typename instead of a C struct. This will simplify -support of multiple backends. - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit 9812feefab3a4ff95a6cfd73aecb120b406bc98c) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 6 ++- - hw/vfio/container.c | 58 ++++++++++++++++++++++----- - include/hw/vfio/vfio-common.h | 1 - - include/hw/vfio/vfio-container-base.h | 1 + - 4 files changed, 55 insertions(+), 11 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 49dab41566..2329d0efc8 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1503,13 +1503,17 @@ retry: - int vfio_attach_device(char *name, VFIODevice *vbasedev, - AddressSpace *as, Error **errp) - { -- const VFIOIOMMUClass *ops = &vfio_legacy_ops; -+ const VFIOIOMMUClass *ops = -+ VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY)); - - #ifdef CONFIG_IOMMUFD - if (vbasedev->iommufd) { - ops = &vfio_iommufd_ops; - } - #endif -+ -+ assert(ops); -+ - return ops->attach_device(name, vbasedev, as, errp); - } - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index f4a0434a52..220e838a91 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -369,10 +369,30 @@ static int vfio_get_iommu_type(VFIOContainer *container, - return -EINVAL; - } - -+/* -+ * vfio_get_iommu_ops - get a VFIOIOMMUClass associated with a type -+ */ -+static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp) -+{ -+ ObjectClass *klass = NULL; -+ -+ switch (iommu_type) { -+ case VFIO_TYPE1v2_IOMMU: -+ case VFIO_TYPE1_IOMMU: -+ klass = object_class_by_name(TYPE_VFIO_IOMMU_LEGACY); -+ break; -+ default: -+ g_assert_not_reached(); -+ }; -+ -+ return VFIO_IOMMU_CLASS(klass); -+} -+ - static int vfio_init_container(VFIOContainer *container, int group_fd, - VFIOAddressSpace *space, Error **errp) - { - int iommu_type, ret; -+ const VFIOIOMMUClass *vioc; - - iommu_type = vfio_get_iommu_type(container, errp); - if (iommu_type < 0) { -@@ -401,7 +421,14 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, - } - - container->iommu_type = iommu_type; -- vfio_container_init(&container->bcontainer, space, &vfio_legacy_ops); -+ -+ vioc = vfio_get_iommu_class(iommu_type, errp); -+ if (!vioc) { -+ error_setg(errp, "No available IOMMU models"); -+ return -EINVAL; -+ } -+ -+ vfio_container_init(&container->bcontainer, space, vioc); - return 0; - } - -@@ -1098,12 +1125,25 @@ out_single: - return ret; - } - --const VFIOIOMMUOps vfio_legacy_ops = { -- .dma_map = vfio_legacy_dma_map, -- .dma_unmap = vfio_legacy_dma_unmap, -- .attach_device = vfio_legacy_attach_device, -- .detach_device = vfio_legacy_detach_device, -- .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, -- .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, -- .pci_hot_reset = vfio_legacy_pci_hot_reset, -+static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data) -+{ -+ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); -+ -+ vioc->dma_map = vfio_legacy_dma_map; -+ vioc->dma_unmap = vfio_legacy_dma_unmap; -+ vioc->attach_device = vfio_legacy_attach_device; -+ vioc->detach_device = vfio_legacy_detach_device; -+ vioc->set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking; -+ vioc->query_dirty_bitmap = vfio_legacy_query_dirty_bitmap; -+ vioc->pci_hot_reset = vfio_legacy_pci_hot_reset; - }; -+ -+static const TypeInfo types[] = { -+ { -+ .name = TYPE_VFIO_IOMMU_LEGACY, -+ .parent = TYPE_VFIO_IOMMU, -+ .class_init = vfio_iommu_legacy_class_init, -+ }, -+}; -+ -+DEFINE_TYPES(types) -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index b8aa8a5495..14c497b6b0 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -210,7 +210,6 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; - typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; - extern VFIOGroupList vfio_group_list; - extern VFIODeviceList vfio_device_list; --extern const VFIOIOMMUOps vfio_legacy_ops; - extern const VFIOIOMMUOps vfio_iommufd_ops; - extern const MemoryListener vfio_memory_listener; - extern int vfio_kvm_device_fd; -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index d6147b4aee..c60370fc5e 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -94,6 +94,7 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); - - - #define TYPE_VFIO_IOMMU "vfio-iommu" -+#define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" - - /* - * VFIOContainerBase is not an abstract QOM object because it felt --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch b/SOURCES/kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch deleted file mode 100644 index 2840e2c..0000000 --- a/SOURCES/kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch +++ /dev/null @@ -1,71 +0,0 @@ -From e56f961fbe95a53a52c5eca00b4fca17d825e860 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:28 +0800 -Subject: [PATCH 003/101] vfio/container: Introduce a empty VFIOIOMMUOps -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [2/67] 0de0afffce42fa4a17f6d33a10b6162cdfbe8150 (eauger1/centos-qemu-kvm) - -This empty VFIOIOMMUOps named vfio_legacy_ops will hold all general -IOMMU ops of legacy container. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit d24668579184f4098779983724ec74cd3db62e10) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 5 +++++ - include/hw/vfio/vfio-common.h | 2 +- - 2 files changed, 6 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 242010036a..4bc43ddfa4 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -472,6 +472,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - Error **errp) - { - VFIOContainer *container; -+ VFIOContainerBase *bcontainer; - int ret, fd; - VFIOAddressSpace *space; - -@@ -552,6 +553,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - container->iova_ranges = NULL; - QLIST_INIT(&container->giommu_list); - QLIST_INIT(&container->vrdl_list); -+ bcontainer = &container->bcontainer; -+ bcontainer->ops = &vfio_legacy_ops; - - ret = vfio_init_container(container, group->fd, errp); - if (ret) { -@@ -933,3 +936,5 @@ void vfio_detach_device(VFIODevice *vbasedev) - vfio_put_base_device(vbasedev); - vfio_put_group(group); - } -+ -+const VFIOIOMMUOps vfio_legacy_ops; -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 586d153c12..678161f207 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -255,7 +255,7 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; - typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; - extern VFIOGroupList vfio_group_list; - extern VFIODeviceList vfio_device_list; -- -+extern const VFIOIOMMUOps vfio_legacy_ops; - extern const MemoryListener vfio_memory_listener; - extern int vfio_kvm_device_fd; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch b/SOURCES/kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch deleted file mode 100644 index ae9ccd8..0000000 --- a/SOURCES/kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch +++ /dev/null @@ -1,118 +0,0 @@ -From 6c7546756e979e4f5ba29ae51a21c63fa90492cf Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:17 +0100 -Subject: [PATCH 050/101] vfio/container: Introduce vfio_legacy_setup() for - further cleanups -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [49/67] 3a621ba2605c98b7fbf7fd9f93a207f728f1202e (eauger1/centos-qemu-kvm) - -This will help subsequent patches to unify the initialization of type1 -and sPAPR IOMMU backends. - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit d3764db87531cd53849ccee9b2f72aede90ccf5b) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 63 +++++++++++++++++++++++++-------------------- - 1 file changed, 35 insertions(+), 28 deletions(-) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 1e77a2929e..afcfe80488 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -474,6 +474,35 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, - } - } - -+static int vfio_legacy_setup(VFIOContainerBase *bcontainer, Error **errp) -+{ -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); -+ g_autofree struct vfio_iommu_type1_info *info = NULL; -+ int ret; -+ -+ ret = vfio_get_iommu_info(container, &info); -+ if (ret) { -+ error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info"); -+ return ret; -+ } -+ -+ if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { -+ bcontainer->pgsizes = info->iova_pgsizes; -+ } else { -+ bcontainer->pgsizes = qemu_real_host_page_size(); -+ } -+ -+ if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) { -+ bcontainer->dma_max_mappings = 65535; -+ } -+ -+ vfio_get_info_iova_range(info, bcontainer); -+ -+ vfio_get_iommu_info_migration(container, info); -+ return 0; -+} -+ - static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - Error **errp) - { -@@ -570,40 +599,18 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - switch (container->iommu_type) { - case VFIO_TYPE1v2_IOMMU: - case VFIO_TYPE1_IOMMU: -- { -- struct vfio_iommu_type1_info *info; -- -- ret = vfio_get_iommu_info(container, &info); -- if (ret) { -- error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info"); -- goto enable_discards_exit; -- } -- -- if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { -- bcontainer->pgsizes = info->iova_pgsizes; -- } else { -- bcontainer->pgsizes = qemu_real_host_page_size(); -- } -- -- if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) { -- bcontainer->dma_max_mappings = 65535; -- } -- -- vfio_get_info_iova_range(info, bcontainer); -- -- vfio_get_iommu_info_migration(container, info); -- g_free(info); -+ ret = vfio_legacy_setup(bcontainer, errp); - break; -- } - case VFIO_SPAPR_TCE_v2_IOMMU: - case VFIO_SPAPR_TCE_IOMMU: -- { - ret = vfio_spapr_container_init(container, errp); -- if (ret) { -- goto enable_discards_exit; -- } - break; -+ default: -+ g_assert_not_reached(); - } -+ -+ if (ret) { -+ goto enable_discards_exit; - } - - vfio_kvm_device_add_group(group); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch b/SOURCES/kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch deleted file mode 100644 index 3d46a06..0000000 --- a/SOURCES/kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 6a597d7c82a4538fa1f928db7e600ec2e5a44361 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 2 Nov 2023 15:12:39 +0800 -Subject: [PATCH 014/101] vfio/container: Move dirty_pgsizes and - max_dirty_bitmap_size to base container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [13/67] b9fe57174368e36788b017cc2ad13b748592cfc2 (eauger1/centos-qemu-kvm) - -No functional change intended. - -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Yi Sun -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit 4d6b95010c59127ac4f7230d6ee88b5d0e99738c) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 9 +++++---- - include/hw/vfio/vfio-common.h | 2 -- - include/hw/vfio/vfio-container-base.h | 2 ++ - 3 files changed, 7 insertions(+), 6 deletions(-) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 5c1dee8c9f..c8088a8174 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -64,6 +64,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) - { -+ VFIOContainerBase *bcontainer = &container->bcontainer; - struct vfio_iommu_type1_dma_unmap *unmap; - struct vfio_bitmap *bitmap; - VFIOBitmap vbmap; -@@ -91,7 +92,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, - bitmap->size = vbmap.size; - bitmap->data = (__u64 *)vbmap.bitmap; - -- if (vbmap.size > container->max_dirty_bitmap_size) { -+ if (vbmap.size > bcontainer->max_dirty_bitmap_size) { - error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap.size); - ret = -E2BIG; - goto unmap_exit; -@@ -131,7 +132,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, - - if (iotlb && vfio_devices_all_running_and_mig_active(bcontainer)) { - if (!vfio_devices_all_device_dirty_tracking(bcontainer) && -- container->bcontainer.dirty_pages_supported) { -+ bcontainer->dirty_pages_supported) { - return vfio_dma_unmap_bitmap(container, iova, size, iotlb); - } - -@@ -469,8 +470,8 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, - */ - if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { - bcontainer->dirty_pages_supported = true; -- container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; -- container->dirty_pgsizes = cap_mig->pgsize_bitmap; -+ bcontainer->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; -+ bcontainer->dirty_pgsizes = cap_mig->pgsize_bitmap; - } - } - -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 922022cbc6..b1c9fe711b 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -80,8 +80,6 @@ typedef struct VFIOContainer { - int fd; /* /dev/vfio/vfio, empowered by the attached groups */ - MemoryListener prereg_listener; - unsigned iommu_type; -- uint64_t dirty_pgsizes; -- uint64_t max_dirty_bitmap_size; - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - QLIST_HEAD(, VFIOGroup) group_list; - GList *iova_ranges; -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 95f8d319e0..80e4a993c5 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -39,6 +39,8 @@ typedef struct VFIOContainerBase { - MemoryListener listener; - Error *error; - bool initialized; -+ uint64_t dirty_pgsizes; -+ uint64_t max_dirty_bitmap_size; - unsigned long pgsizes; - unsigned int dma_max_mappings; - bool dirty_pages_supported; --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Move-iova_ranges-to-base-container.patch b/SOURCES/kvm-vfio-container-Move-iova_ranges-to-base-container.patch deleted file mode 100644 index c9c79b6..0000000 --- a/SOURCES/kvm-vfio-container-Move-iova_ranges-to-base-container.patch +++ /dev/null @@ -1,168 +0,0 @@ -From 882143ef30da4182f049eb8192e0fac317c372b3 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:40 +0800 -Subject: [PATCH 015/101] vfio/container: Move iova_ranges to base container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [14/67] 49f2e3c484b4c0c63be9aa4eb1bf08804dcb1ec3 (eauger1/centos-qemu-kvm) - -Meanwhile remove the helper function vfio_free_container as it -only calls g_free now. - -No functional change intended. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit f79baf8c9575ac3193ca86ec508791c86d96b13e) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 5 +++-- - hw/vfio/container-base.c | 3 +++ - hw/vfio/container.c | 19 ++++++------------- - include/hw/vfio/vfio-common.h | 1 - - include/hw/vfio/vfio-container-base.h | 1 + - 5 files changed, 13 insertions(+), 16 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index be623e544b..8ef2e7967d 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -637,9 +637,10 @@ static void vfio_listener_region_add(MemoryListener *listener, - goto fail; - } - -- if (container->iova_ranges) { -+ if (bcontainer->iova_ranges) { - ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr, -- container->iova_ranges, &err); -+ bcontainer->iova_ranges, -+ &err); - if (ret) { - g_free(giommu); - goto fail; -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index 7f508669f5..0177f43741 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -54,6 +54,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, - bcontainer->error = NULL; - bcontainer->dirty_pages_supported = false; - bcontainer->dma_max_mappings = 0; -+ bcontainer->iova_ranges = NULL; - QLIST_INIT(&bcontainer->giommu_list); - QLIST_INIT(&bcontainer->vrdl_list); - } -@@ -70,4 +71,6 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer) - QLIST_REMOVE(giommu, giommu_next); - g_free(giommu); - } -+ -+ g_list_free_full(bcontainer->iova_ranges, g_free); - } -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index c8088a8174..721c0d7375 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -308,7 +308,7 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, - } - - static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info, -- VFIOContainer *container) -+ VFIOContainerBase *bcontainer) - { - struct vfio_info_cap_header *hdr; - struct vfio_iommu_type1_info_cap_iova_range *cap; -@@ -326,8 +326,8 @@ static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info, - - range_set_bounds(range, cap->iova_ranges[i].start, - cap->iova_ranges[i].end); -- container->iova_ranges = -- range_list_insert(container->iova_ranges, range); -+ bcontainer->iova_ranges = -+ range_list_insert(bcontainer->iova_ranges, range); - } - - return true; -@@ -475,12 +475,6 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, - } - } - --static void vfio_free_container(VFIOContainer *container) --{ -- g_list_free_full(container->iova_ranges, g_free); -- g_free(container); --} -- - static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - Error **errp) - { -@@ -560,7 +554,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - - container = g_malloc0(sizeof(*container)); - container->fd = fd; -- container->iova_ranges = NULL; - bcontainer = &container->bcontainer; - vfio_container_init(bcontainer, space, &vfio_legacy_ops); - -@@ -597,7 +590,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - bcontainer->dma_max_mappings = 65535; - } - -- vfio_get_info_iova_range(info, container); -+ vfio_get_info_iova_range(info, bcontainer); - - vfio_get_iommu_info_migration(container, info); - g_free(info); -@@ -649,7 +642,7 @@ enable_discards_exit: - vfio_ram_block_discard_disable(container, false); - - free_container_exit: -- vfio_free_container(container); -+ g_free(container); - - close_fd_exit: - close(fd); -@@ -693,7 +686,7 @@ static void vfio_disconnect_container(VFIOGroup *group) - - trace_vfio_disconnect_container(container->fd); - close(container->fd); -- vfio_free_container(container); -+ g_free(container); - - vfio_put_address_space(space); - } -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index b1c9fe711b..b9e5a0e64b 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -82,7 +82,6 @@ typedef struct VFIOContainer { - unsigned iommu_type; - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - QLIST_HEAD(, VFIOGroup) group_list; -- GList *iova_ranges; - } VFIOContainer; - - typedef struct VFIOHostDMAWindow { -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 80e4a993c5..9658ffb526 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -48,6 +48,7 @@ typedef struct VFIOContainerBase { - QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; - QLIST_ENTRY(VFIOContainerBase) next; - QLIST_HEAD(, VFIODevice) device_list; -+ GList *iova_ranges; - } VFIOContainerBase; - - typedef struct VFIOGuestIOMMU { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Move-listener-to-base-container.patch b/SOURCES/kvm-vfio-container-Move-listener-to-base-container.patch deleted file mode 100644 index 3198bfd..0000000 --- a/SOURCES/kvm-vfio-container-Move-listener-to-base-container.patch +++ /dev/null @@ -1,522 +0,0 @@ -From 36bc7782bb02f81368e3e43a3947d16ad362e137 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 2 Nov 2023 15:12:38 +0800 -Subject: [PATCH 013/101] vfio/container: Move listener to base container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [12/67] f469ab126c6366170aa2520f9b4d9969d3ae0a04 (eauger1/centos-qemu-kvm) - -Move listener to base container. Also error and initialized fields -are moved at the same time. - -No functional change intended. - -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Yi Sun -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit c7b313d300f161c650d011a5c9da469bcd5d34e4) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 110 +++++++++++++------------- - hw/vfio/container-base.c | 1 + - hw/vfio/container.c | 19 +++-- - hw/vfio/spapr.c | 11 +-- - include/hw/vfio/vfio-common.h | 3 - - include/hw/vfio/vfio-container-base.h | 3 + - 6 files changed, 74 insertions(+), 73 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index f15665789f..be623e544b 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -541,7 +541,7 @@ static bool vfio_listener_valid_section(MemoryRegionSection *section, - return true; - } - --static bool vfio_get_section_iova_range(VFIOContainer *container, -+static bool vfio_get_section_iova_range(VFIOContainerBase *bcontainer, - MemoryRegionSection *section, - hwaddr *out_iova, hwaddr *out_end, - Int128 *out_llend) -@@ -569,8 +569,10 @@ static bool vfio_get_section_iova_range(VFIOContainer *container, - static void vfio_listener_region_add(MemoryListener *listener, - MemoryRegionSection *section) - { -- VFIOContainer *container = container_of(listener, VFIOContainer, listener); -- VFIOContainerBase *bcontainer = &container->bcontainer; -+ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, -+ listener); -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - hwaddr iova, end; - Int128 llend, llsize; - void *vaddr; -@@ -581,7 +583,8 @@ static void vfio_listener_region_add(MemoryListener *listener, - return; - } - -- if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) { -+ if (!vfio_get_section_iova_range(bcontainer, section, &iova, &end, -+ &llend)) { - if (memory_region_is_ram_device(section->mr)) { - trace_vfio_listener_region_add_no_dma_map( - memory_region_name(section->mr), -@@ -688,13 +691,12 @@ static void vfio_listener_region_add(MemoryListener *listener, - } - } - -- ret = vfio_container_dma_map(&container->bcontainer, -- iova, int128_get64(llsize), vaddr, -- section->readonly); -+ ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize), -+ vaddr, section->readonly); - if (ret) { - error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx", %p) = %d (%s)", -- container, iova, int128_get64(llsize), vaddr, ret, -+ bcontainer, iova, int128_get64(llsize), vaddr, ret, - strerror(-ret)); - if (memory_region_is_ram_device(section->mr)) { - /* Allow unexpected mappings not to be fatal for RAM devices */ -@@ -716,9 +718,9 @@ fail: - * can gracefully fail. Runtime, there's not much we can do other - * than throw a hardware error. - */ -- if (!container->initialized) { -- if (!container->error) { -- error_propagate_prepend(&container->error, err, -+ if (!bcontainer->initialized) { -+ if (!bcontainer->error) { -+ error_propagate_prepend(&bcontainer->error, err, - "Region %s: ", - memory_region_name(section->mr)); - } else { -@@ -733,8 +735,10 @@ fail: - static void vfio_listener_region_del(MemoryListener *listener, - MemoryRegionSection *section) - { -- VFIOContainer *container = container_of(listener, VFIOContainer, listener); -- VFIOContainerBase *bcontainer = &container->bcontainer; -+ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, -+ listener); -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - hwaddr iova, end; - Int128 llend, llsize; - int ret; -@@ -767,7 +771,8 @@ static void vfio_listener_region_del(MemoryListener *listener, - */ - } - -- if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) { -+ if (!vfio_get_section_iova_range(bcontainer, section, &iova, &end, -+ &llend)) { - return; - } - -@@ -790,22 +795,22 @@ static void vfio_listener_region_del(MemoryListener *listener, - if (int128_eq(llsize, int128_2_64())) { - /* The unmap ioctl doesn't accept a full 64-bit span. */ - llsize = int128_rshift(llsize, 1); -- ret = vfio_container_dma_unmap(&container->bcontainer, iova, -+ ret = vfio_container_dma_unmap(bcontainer, iova, - int128_get64(llsize), NULL); - if (ret) { - error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%s)", -- container, iova, int128_get64(llsize), ret, -+ bcontainer, iova, int128_get64(llsize), ret, - strerror(-ret)); - } - iova += int128_get64(llsize); - } -- ret = vfio_container_dma_unmap(&container->bcontainer, iova, -+ ret = vfio_container_dma_unmap(bcontainer, iova, - int128_get64(llsize), NULL); - if (ret) { - error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%s)", -- container, iova, int128_get64(llsize), ret, -+ bcontainer, iova, int128_get64(llsize), ret, - strerror(-ret)); - } - } -@@ -825,16 +830,15 @@ typedef struct VFIODirtyRanges { - } VFIODirtyRanges; - - typedef struct VFIODirtyRangesListener { -- VFIOContainer *container; -+ VFIOContainerBase *bcontainer; - VFIODirtyRanges ranges; - MemoryListener listener; - } VFIODirtyRangesListener; - - static bool vfio_section_is_vfio_pci(MemoryRegionSection *section, -- VFIOContainer *container) -+ VFIOContainerBase *bcontainer) - { - VFIOPCIDevice *pcidev; -- VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - Object *owner; - -@@ -863,7 +867,7 @@ static void vfio_dirty_tracking_update(MemoryListener *listener, - hwaddr iova, end, *min, *max; - - if (!vfio_listener_valid_section(section, "tracking_update") || -- !vfio_get_section_iova_range(dirty->container, section, -+ !vfio_get_section_iova_range(dirty->bcontainer, section, - &iova, &end, NULL)) { - return; - } -@@ -887,7 +891,7 @@ static void vfio_dirty_tracking_update(MemoryListener *listener, - * The alternative would be an IOVATree but that has a much bigger runtime - * overhead and unnecessary complexity. - */ -- if (vfio_section_is_vfio_pci(section, dirty->container) && -+ if (vfio_section_is_vfio_pci(section, dirty->bcontainer) && - iova >= UINT32_MAX) { - min = &range->minpci64; - max = &range->maxpci64; -@@ -911,7 +915,7 @@ static const MemoryListener vfio_dirty_tracking_listener = { - .region_add = vfio_dirty_tracking_update, - }; - --static void vfio_dirty_tracking_init(VFIOContainer *container, -+static void vfio_dirty_tracking_init(VFIOContainerBase *bcontainer, - VFIODirtyRanges *ranges) - { - VFIODirtyRangesListener dirty; -@@ -921,10 +925,10 @@ static void vfio_dirty_tracking_init(VFIOContainer *container, - dirty.ranges.min64 = UINT64_MAX; - dirty.ranges.minpci64 = UINT64_MAX; - dirty.listener = vfio_dirty_tracking_listener; -- dirty.container = container; -+ dirty.bcontainer = bcontainer; - - memory_listener_register(&dirty.listener, -- container->bcontainer.space->as); -+ bcontainer->space->as); - - *ranges = dirty.ranges; - -@@ -936,12 +940,11 @@ static void vfio_dirty_tracking_init(VFIOContainer *container, - memory_listener_unregister(&dirty.listener); - } - --static void vfio_devices_dma_logging_stop(VFIOContainer *container) -+static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer) - { - uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature), - sizeof(uint64_t))] = {}; - struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; -- VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - - feature->argsz = sizeof(buf); -@@ -962,7 +965,7 @@ static void vfio_devices_dma_logging_stop(VFIOContainer *container) - } - - static struct vfio_device_feature * --vfio_device_feature_dma_logging_start_create(VFIOContainer *container, -+vfio_device_feature_dma_logging_start_create(VFIOContainerBase *bcontainer, - VFIODirtyRanges *tracking) - { - struct vfio_device_feature *feature; -@@ -1035,16 +1038,15 @@ static void vfio_device_feature_dma_logging_start_destroy( - g_free(feature); - } - --static int vfio_devices_dma_logging_start(VFIOContainer *container) -+static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer) - { - struct vfio_device_feature *feature; - VFIODirtyRanges ranges; -- VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - int ret = 0; - -- vfio_dirty_tracking_init(container, &ranges); -- feature = vfio_device_feature_dma_logging_start_create(container, -+ vfio_dirty_tracking_init(bcontainer, &ranges); -+ feature = vfio_device_feature_dma_logging_start_create(bcontainer, - &ranges); - if (!feature) { - return -errno; -@@ -1067,7 +1069,7 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container) - - out: - if (ret) { -- vfio_devices_dma_logging_stop(container); -+ vfio_devices_dma_logging_stop(bcontainer); - } - - vfio_device_feature_dma_logging_start_destroy(feature); -@@ -1077,14 +1079,14 @@ out: - - static void vfio_listener_log_global_start(MemoryListener *listener) - { -- VFIOContainer *container = container_of(listener, VFIOContainer, listener); -+ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, -+ listener); - int ret; - -- if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { -- ret = vfio_devices_dma_logging_start(container); -+ if (vfio_devices_all_device_dirty_tracking(bcontainer)) { -+ ret = vfio_devices_dma_logging_start(bcontainer); - } else { -- ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, -- true); -+ ret = vfio_container_set_dirty_page_tracking(bcontainer, true); - } - - if (ret) { -@@ -1096,14 +1098,14 @@ static void vfio_listener_log_global_start(MemoryListener *listener) - - static void vfio_listener_log_global_stop(MemoryListener *listener) - { -- VFIOContainer *container = container_of(listener, VFIOContainer, listener); -+ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, -+ listener); - int ret = 0; - -- if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { -- vfio_devices_dma_logging_stop(container); -+ if (vfio_devices_all_device_dirty_tracking(bcontainer)) { -+ vfio_devices_dma_logging_stop(bcontainer); - } else { -- ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, -- false); -+ ret = vfio_container_set_dirty_page_tracking(bcontainer, false); - } - - if (ret) { -@@ -1214,8 +1216,6 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - vfio_giommu_dirty_notifier, n); - VFIOGuestIOMMU *giommu = gdn->giommu; - VFIOContainerBase *bcontainer = giommu->bcontainer; -- VFIOContainer *container = container_of(bcontainer, VFIOContainer, -- bcontainer); - hwaddr iova = iotlb->iova + giommu->iommu_offset; - ram_addr_t translated_addr; - int ret = -EINVAL; -@@ -1230,12 +1230,12 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - - rcu_read_lock(); - if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) { -- ret = vfio_get_dirty_bitmap(&container->bcontainer, iova, -- iotlb->addr_mask + 1, translated_addr); -+ ret = vfio_get_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1, -+ translated_addr); - if (ret) { - error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%s)", -- container, iova, iotlb->addr_mask + 1, ret, -+ bcontainer, iova, iotlb->addr_mask + 1, ret, - strerror(-ret)); - } - } -@@ -1291,10 +1291,9 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, - &vrdl); - } - --static int vfio_sync_dirty_bitmap(VFIOContainer *container, -+static int vfio_sync_dirty_bitmap(VFIOContainerBase *bcontainer, - MemoryRegionSection *section) - { -- VFIOContainerBase *bcontainer = &container->bcontainer; - ram_addr_t ram_addr; - - if (memory_region_is_iommu(section->mr)) { -@@ -1330,7 +1329,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, - ram_addr = memory_region_get_ram_addr(section->mr) + - section->offset_within_region; - -- return vfio_get_dirty_bitmap(&container->bcontainer, -+ return vfio_get_dirty_bitmap(bcontainer, - REAL_HOST_PAGE_ALIGN(section->offset_within_address_space), - int128_get64(section->size), ram_addr); - } -@@ -1338,15 +1337,16 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, - static void vfio_listener_log_sync(MemoryListener *listener, - MemoryRegionSection *section) - { -- VFIOContainer *container = container_of(listener, VFIOContainer, listener); -+ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, -+ listener); - int ret; - - if (vfio_listener_skipped_section(section)) { - return; - } - -- if (vfio_devices_all_dirty_tracking(&container->bcontainer)) { -- ret = vfio_sync_dirty_bitmap(container, section); -+ if (vfio_devices_all_dirty_tracking(bcontainer)) { -+ ret = vfio_sync_dirty_bitmap(bcontainer, section); - if (ret) { - error_report("vfio: Failed to sync dirty bitmap, err: %d (%s)", ret, - strerror(-ret)); -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index 584eee4ba1..7f508669f5 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -51,6 +51,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, - { - bcontainer->ops = ops; - bcontainer->space = space; -+ bcontainer->error = NULL; - bcontainer->dirty_pages_supported = false; - bcontainer->dma_max_mappings = 0; - QLIST_INIT(&bcontainer->giommu_list); -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 6ba2e2f8c4..5c1dee8c9f 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -453,6 +453,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, - { - struct vfio_info_cap_header *hdr; - struct vfio_iommu_type1_info_cap_migration *cap_mig; -+ VFIOContainerBase *bcontainer = &container->bcontainer; - - hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION); - if (!hdr) { -@@ -467,7 +468,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, - * qemu_real_host_page_size to mark those dirty. - */ - if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { -- container->bcontainer.dirty_pages_supported = true; -+ bcontainer->dirty_pages_supported = true; - container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; - container->dirty_pgsizes = cap_mig->pgsize_bitmap; - } -@@ -558,7 +559,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - - container = g_malloc0(sizeof(*container)); - container->fd = fd; -- container->error = NULL; - container->iova_ranges = NULL; - bcontainer = &container->bcontainer; - vfio_container_init(bcontainer, space, &vfio_legacy_ops); -@@ -621,25 +621,24 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - group->container = container; - QLIST_INSERT_HEAD(&container->group_list, group, container_next); - -- container->listener = vfio_memory_listener; -- -- memory_listener_register(&container->listener, bcontainer->space->as); -+ bcontainer->listener = vfio_memory_listener; -+ memory_listener_register(&bcontainer->listener, bcontainer->space->as); - -- if (container->error) { -+ if (bcontainer->error) { - ret = -1; -- error_propagate_prepend(errp, container->error, -+ error_propagate_prepend(errp, bcontainer->error, - "memory listener initialization failed: "); - goto listener_release_exit; - } - -- container->initialized = true; -+ bcontainer->initialized = true; - - return 0; - listener_release_exit: - QLIST_REMOVE(group, container_next); - QLIST_REMOVE(bcontainer, next); - vfio_kvm_device_del_group(group); -- memory_listener_unregister(&container->listener); -+ memory_listener_unregister(&bcontainer->listener); - if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || - container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { - vfio_spapr_container_deinit(container); -@@ -674,7 +673,7 @@ static void vfio_disconnect_container(VFIOGroup *group) - * group. - */ - if (QLIST_EMPTY(&container->group_list)) { -- memory_listener_unregister(&container->listener); -+ memory_listener_unregister(&bcontainer->listener); - if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || - container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { - vfio_spapr_container_deinit(container); -diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c -index 4f76bdd3ca..7a50975f25 100644 ---- a/hw/vfio/spapr.c -+++ b/hw/vfio/spapr.c -@@ -46,6 +46,7 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener, - { - VFIOContainer *container = container_of(listener, VFIOContainer, - prereg_listener); -+ VFIOContainerBase *bcontainer = &container->bcontainer; - const hwaddr gpa = section->offset_within_address_space; - hwaddr end; - int ret; -@@ -88,9 +89,9 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener, - * can gracefully fail. Runtime, there's not much we can do other - * than throw a hardware error. - */ -- if (!container->initialized) { -- if (!container->error) { -- error_setg_errno(&container->error, -ret, -+ if (!bcontainer->initialized) { -+ if (!bcontainer->error) { -+ error_setg_errno(&bcontainer->error, -ret, - "Memory registering failed"); - } - } else { -@@ -445,9 +446,9 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - - memory_listener_register(&container->prereg_listener, - &address_space_memory); -- if (container->error) { -+ if (bcontainer->error) { - ret = -1; -- error_propagate_prepend(errp, container->error, -+ error_propagate_prepend(errp, bcontainer->error, - "RAM memory listener initialization failed: "); - goto listener_unregister_exit; - } -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 8a607a4c17..922022cbc6 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -78,11 +78,8 @@ struct VFIOGroup; - typedef struct VFIOContainer { - VFIOContainerBase bcontainer; - int fd; /* /dev/vfio/vfio, empowered by the attached groups */ -- MemoryListener listener; - MemoryListener prereg_listener; - unsigned iommu_type; -- Error *error; -- bool initialized; - uint64_t dirty_pgsizes; - uint64_t max_dirty_bitmap_size; - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 8e05b5ac5a..95f8d319e0 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -36,6 +36,9 @@ typedef struct VFIOAddressSpace { - typedef struct VFIOContainerBase { - const VFIOIOMMUOps *ops; - VFIOAddressSpace *space; -+ MemoryListener listener; -+ Error *error; -+ bool initialized; - unsigned long pgsizes; - unsigned int dma_max_mappings; - bool dirty_pages_supported; --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Move-per-container-device-list-in-bas.patch b/SOURCES/kvm-vfio-container-Move-per-container-device-list-in-bas.patch deleted file mode 100644 index df483e3..0000000 --- a/SOURCES/kvm-vfio-container-Move-per-container-device-list-in-bas.patch +++ /dev/null @@ -1,230 +0,0 @@ -From 0b3fbb6bf5c5bccec184829ff9454fd637c512b9 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:34 +0800 -Subject: [PATCH 009/101] vfio/container: Move per container device list in - base container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [8/67] d546cc25f4424b2d42356765c860fdaf4a3ba652 (eauger1/centos-qemu-kvm) - -VFIO Device is also changed to point to base container instead of -legacy container. - -No functional change intended. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit 3e6015d1117579324b456aa169dfca06da9922cf) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 23 +++++++++++++++-------- - hw/vfio/container.c | 12 ++++++------ - include/hw/vfio/vfio-common.h | 3 +-- - include/hw/vfio/vfio-container-base.h | 1 + - 4 files changed, 23 insertions(+), 16 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index b1a875ca93..9415395ed9 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -145,7 +145,7 @@ void vfio_unblock_multiple_devices_migration(void) - - bool vfio_viommu_preset(VFIODevice *vbasedev) - { -- return vbasedev->container->bcontainer.space->as != &address_space_memory; -+ return vbasedev->bcontainer->space->as != &address_space_memory; - } - - static void vfio_set_migration_error(int err) -@@ -179,6 +179,7 @@ bool vfio_device_state_is_precopy(VFIODevice *vbasedev) - - static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) - { -+ VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - MigrationState *ms = migrate_get_current(); - -@@ -187,7 +188,7 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) - return false; - } - -- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { -+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { - VFIOMigration *migration = vbasedev->migration; - - if (!migration) { -@@ -205,9 +206,10 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) - - bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) - { -+ VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - -- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { -+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { - if (!vbasedev->dirty_pages_supported) { - return false; - } -@@ -222,13 +224,14 @@ bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) - */ - bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) - { -+ VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - - if (!migration_is_active(migrate_get_current())) { - return false; - } - -- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { -+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { - VFIOMigration *migration = vbasedev->migration; - - if (!migration) { -@@ -833,12 +836,13 @@ static bool vfio_section_is_vfio_pci(MemoryRegionSection *section, - VFIOContainer *container) - { - VFIOPCIDevice *pcidev; -+ VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - Object *owner; - - owner = memory_region_owner(section->mr); - -- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { -+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { - if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { - continue; - } -@@ -939,13 +943,14 @@ static void vfio_devices_dma_logging_stop(VFIOContainer *container) - uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature), - sizeof(uint64_t))] = {}; - struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; -+ VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - - feature->argsz = sizeof(buf); - feature->flags = VFIO_DEVICE_FEATURE_SET | - VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP; - -- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { -+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { - if (!vbasedev->dirty_tracking) { - continue; - } -@@ -1036,6 +1041,7 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container) - { - struct vfio_device_feature *feature; - VFIODirtyRanges ranges; -+ VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - int ret = 0; - -@@ -1046,7 +1052,7 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container) - return -errno; - } - -- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { -+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { - if (vbasedev->dirty_tracking) { - continue; - } -@@ -1139,10 +1145,11 @@ int vfio_devices_query_dirty_bitmap(VFIOContainer *container, - VFIOBitmap *vbmap, hwaddr iova, - hwaddr size) - { -+ VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - int ret; - -- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { -+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { - ret = vfio_device_dma_logging_report(vbasedev, iova, size, - vbmap->bitmap); - if (ret) { -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 3ab74e2615..63a906de93 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -888,7 +888,7 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, - int groupid = vfio_device_groupid(vbasedev, errp); - VFIODevice *vbasedev_iter; - VFIOGroup *group; -- VFIOContainer *container; -+ VFIOContainerBase *bcontainer; - int ret; - - if (groupid < 0) { -@@ -915,9 +915,9 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, - return ret; - } - -- container = group->container; -- vbasedev->container = container; -- QLIST_INSERT_HEAD(&container->device_list, vbasedev, container_next); -+ bcontainer = &group->container->bcontainer; -+ vbasedev->bcontainer = bcontainer; -+ QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); - QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); - - return ret; -@@ -927,13 +927,13 @@ void vfio_detach_device(VFIODevice *vbasedev) - { - VFIOGroup *group = vbasedev->group; - -- if (!vbasedev->container) { -+ if (!vbasedev->bcontainer) { - return; - } - - QLIST_REMOVE(vbasedev, global_next); - QLIST_REMOVE(vbasedev, container_next); -- vbasedev->container = NULL; -+ vbasedev->bcontainer = NULL; - trace_vfio_detach_device(vbasedev->name, group->groupid); - vfio_put_base_device(vbasedev); - vfio_put_group(group); -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 60f2785fe0..9740cf9fbc 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -90,7 +90,6 @@ typedef struct VFIOContainer { - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - QLIST_HEAD(, VFIOGroup) group_list; - QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; -- QLIST_HEAD(, VFIODevice) device_list; - GList *iova_ranges; - } VFIOContainer; - -@@ -118,7 +117,7 @@ typedef struct VFIODevice { - QLIST_ENTRY(VFIODevice) container_next; - QLIST_ENTRY(VFIODevice) global_next; - struct VFIOGroup *group; -- VFIOContainer *container; -+ VFIOContainerBase *bcontainer; - char *sysfsdev; - char *name; - DeviceState *dev; -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index f244f003d0..7090962496 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -39,6 +39,7 @@ typedef struct VFIOContainerBase { - bool dirty_pages_supported; - QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; - QLIST_ENTRY(VFIOContainerBase) next; -+ QLIST_HEAD(, VFIODevice) device_list; - } VFIOContainerBase; - - typedef struct VFIOGuestIOMMU { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch b/SOURCES/kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch deleted file mode 100644 index 0db20c2..0000000 --- a/SOURCES/kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch +++ /dev/null @@ -1,242 +0,0 @@ -From d798939fbbe6c27200c165edd6f3771413821b34 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 2 Nov 2023 15:12:36 +0800 -Subject: [PATCH 011/101] vfio/container: Move pgsizes and dma_max_mappings to - base container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [10/67] e80696175aba159a17ce9a869535db66682deb08 (eauger1/centos-qemu-kvm) - -No functional change intended. - -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Yi Sun -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit 7ab1cb74ffdbf92ef237243b41bde5c7067d5298) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 17 +++++++++-------- - hw/vfio/container-base.c | 1 + - hw/vfio/container.c | 11 +++++------ - hw/vfio/spapr.c | 10 ++++++---- - include/hw/vfio/vfio-common.h | 2 -- - include/hw/vfio/vfio-container-base.h | 2 ++ - 6 files changed, 23 insertions(+), 20 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index cf6618f6ed..1cb53d369e 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -401,6 +401,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, - static void vfio_register_ram_discard_listener(VFIOContainer *container, - MemoryRegionSection *section) - { -+ VFIOContainerBase *bcontainer = &container->bcontainer; - RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); - VFIORamDiscardListener *vrdl; - -@@ -419,8 +420,8 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, - section->mr); - - g_assert(vrdl->granularity && is_power_of_2(vrdl->granularity)); -- g_assert(container->pgsizes && -- vrdl->granularity >= 1ULL << ctz64(container->pgsizes)); -+ g_assert(bcontainer->pgsizes && -+ vrdl->granularity >= 1ULL << ctz64(bcontainer->pgsizes)); - - ram_discard_listener_init(&vrdl->listener, - vfio_ram_discard_notify_populate, -@@ -441,7 +442,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, - * number of sections in the address space we could have over time, - * also consuming DMA mappings. - */ -- if (container->dma_max_mappings) { -+ if (bcontainer->dma_max_mappings) { - unsigned int vrdl_count = 0, vrdl_mappings = 0, max_memslots = 512; - - #ifdef CONFIG_KVM -@@ -462,11 +463,11 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, - } - - if (vrdl_mappings + max_memslots - vrdl_count > -- container->dma_max_mappings) { -+ bcontainer->dma_max_mappings) { - warn_report("%s: possibly running out of DMA mappings. E.g., try" - " increasing the 'block-size' of virtio-mem devies." - " Maximum possible DMA mappings: %d, Maximum possible" -- " memslots: %d", __func__, container->dma_max_mappings, -+ " memslots: %d", __func__, bcontainer->dma_max_mappings, - max_memslots); - } - } -@@ -626,7 +627,7 @@ static void vfio_listener_region_add(MemoryListener *listener, - iommu_idx); - - ret = memory_region_iommu_set_page_size_mask(giommu->iommu_mr, -- container->pgsizes, -+ bcontainer->pgsizes, - &err); - if (ret) { - g_free(giommu); -@@ -675,7 +676,7 @@ static void vfio_listener_region_add(MemoryListener *listener, - llsize = int128_sub(llend, int128_make64(iova)); - - if (memory_region_is_ram_device(section->mr)) { -- hwaddr pgmask = (1ULL << ctz64(container->pgsizes)) - 1; -+ hwaddr pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1; - - if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) { - trace_vfio_listener_region_add_no_dma_map( -@@ -777,7 +778,7 @@ static void vfio_listener_region_del(MemoryListener *listener, - if (memory_region_is_ram_device(section->mr)) { - hwaddr pgmask; - -- pgmask = (1ULL << ctz64(container->pgsizes)) - 1; -+ pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1; - try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); - } else if (memory_region_has_ram_discard_manager(section->mr)) { - vfio_unregister_ram_discard_listener(container, section); -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index 5d654ae172..dcce111349 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -52,6 +52,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, - bcontainer->ops = ops; - bcontainer->space = space; - bcontainer->dirty_pages_supported = false; -+ bcontainer->dma_max_mappings = 0; - QLIST_INIT(&bcontainer->giommu_list); - } - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 7bd81eab09..c5a6262882 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -154,7 +154,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, - if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) && - container->iommu_type == VFIO_TYPE1v2_IOMMU) { - trace_vfio_legacy_dma_unmap_overflow_workaround(); -- unmap.size -= 1ULL << ctz64(container->pgsizes); -+ unmap.size -= 1ULL << ctz64(bcontainer->pgsizes); - continue; - } - error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno)); -@@ -559,7 +559,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - container = g_malloc0(sizeof(*container)); - container->fd = fd; - container->error = NULL; -- container->dma_max_mappings = 0; - container->iova_ranges = NULL; - QLIST_INIT(&container->vrdl_list); - bcontainer = &container->bcontainer; -@@ -589,13 +588,13 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - } - - if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { -- container->pgsizes = info->iova_pgsizes; -+ bcontainer->pgsizes = info->iova_pgsizes; - } else { -- container->pgsizes = qemu_real_host_page_size(); -+ bcontainer->pgsizes = qemu_real_host_page_size(); - } - -- if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) { -- container->dma_max_mappings = 65535; -+ if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) { -+ bcontainer->dma_max_mappings = 65535; - } - - vfio_get_info_iova_range(info, container); -diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c -index 83da2f7ec2..4f76bdd3ca 100644 ---- a/hw/vfio/spapr.c -+++ b/hw/vfio/spapr.c -@@ -226,6 +226,7 @@ static int vfio_spapr_create_window(VFIOContainer *container, - hwaddr *pgsize) - { - int ret = 0; -+ VFIOContainerBase *bcontainer = &container->bcontainer; - IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); - uint64_t pagesize = memory_region_iommu_get_min_page_size(iommu_mr), pgmask; - unsigned entries, bits_total, bits_per_level, max_levels; -@@ -239,13 +240,13 @@ static int vfio_spapr_create_window(VFIOContainer *container, - if (pagesize > rampagesize) { - pagesize = rampagesize; - } -- pgmask = container->pgsizes & (pagesize | (pagesize - 1)); -+ pgmask = bcontainer->pgsizes & (pagesize | (pagesize - 1)); - pagesize = pgmask ? (1ULL << (63 - clz64(pgmask))) : 0; - if (!pagesize) { - error_report("Host doesn't support page size 0x%"PRIx64 - ", the supported mask is 0x%lx", - memory_region_iommu_get_min_page_size(iommu_mr), -- container->pgsizes); -+ bcontainer->pgsizes); - return -EINVAL; - } - -@@ -421,6 +422,7 @@ void vfio_container_del_section_window(VFIOContainer *container, - - int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - { -+ VFIOContainerBase *bcontainer = &container->bcontainer; - struct vfio_iommu_spapr_tce_info info; - bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; - int ret, fd = container->fd; -@@ -461,7 +463,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - } - - if (v2) { -- container->pgsizes = info.ddw.pgsizes; -+ bcontainer->pgsizes = info.ddw.pgsizes; - /* - * There is a default window in just created container. - * To make region_add/del simpler, we better remove this -@@ -476,7 +478,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - } - } else { - /* The default table uses 4K pages */ -- container->pgsizes = 0x1000; -+ bcontainer->pgsizes = 0x1000; - vfio_host_win_add(container, info.dma32_window_start, - info.dma32_window_start + - info.dma32_window_size - 1, -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index bc67e1316c..d3dc2f9dcb 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -85,8 +85,6 @@ typedef struct VFIOContainer { - bool initialized; - uint64_t dirty_pgsizes; - uint64_t max_dirty_bitmap_size; -- unsigned long pgsizes; -- unsigned int dma_max_mappings; - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - QLIST_HEAD(, VFIOGroup) group_list; - QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 7090962496..85ec7e1a56 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -36,6 +36,8 @@ typedef struct VFIOAddressSpace { - typedef struct VFIOContainerBase { - const VFIOIOMMUOps *ops; - VFIOAddressSpace *space; -+ unsigned long pgsizes; -+ unsigned int dma_max_mappings; - bool dirty_pages_supported; - QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; - QLIST_ENTRY(VFIOContainerBase) next; --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Move-space-field-to-base-container.patch b/SOURCES/kvm-vfio-container-Move-space-field-to-base-container.patch deleted file mode 100644 index edd4538..0000000 --- a/SOURCES/kvm-vfio-container-Move-space-field-to-base-container.patch +++ /dev/null @@ -1,265 +0,0 @@ -From 3ba43cbc5b096feed6272e070cf152d5fc74df01 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 2 Nov 2023 15:12:32 +0800 -Subject: [PATCH 007/101] vfio/container: Move space field to base container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [6/67] b0aa17d9ec4588bd64373452a30306e826234d0b (eauger1/centos-qemu-kvm) - -Move the space field to the base object. Also the VFIOAddressSpace -now contains a list of base containers. - -No functional change intended. - -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Yi Sun -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit e5597063386a0c76308ad16da31726d23f489945) -Signed-off-by: Eric Auger ---- - hw/ppc/spapr_pci_vfio.c | 10 +++++----- - hw/vfio/common.c | 4 ++-- - hw/vfio/container-base.c | 6 +++++- - hw/vfio/container.c | 18 ++++++++---------- - include/hw/vfio/vfio-common.h | 8 -------- - include/hw/vfio/vfio-container-base.h | 9 +++++++++ - 6 files changed, 29 insertions(+), 26 deletions(-) - -diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c -index f283f7e38d..d1d07bec46 100644 ---- a/hw/ppc/spapr_pci_vfio.c -+++ b/hw/ppc/spapr_pci_vfio.c -@@ -84,27 +84,27 @@ static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) - static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) - { - VFIOAddressSpace *space = vfio_get_address_space(as); -- VFIOContainer *container = NULL; -+ VFIOContainerBase *bcontainer = NULL; - - if (QLIST_EMPTY(&space->containers)) { - /* No containers to act on */ - goto out; - } - -- container = QLIST_FIRST(&space->containers); -+ bcontainer = QLIST_FIRST(&space->containers); - -- if (QLIST_NEXT(container, next)) { -+ if (QLIST_NEXT(bcontainer, next)) { - /* - * We don't yet have logic to synchronize EEH state across - * multiple containers - */ -- container = NULL; -+ bcontainer = NULL; - goto out; - } - - out: - vfio_put_address_space(space); -- return container; -+ return container_of(bcontainer, VFIOContainer, bcontainer); - } - - static bool vfio_eeh_as_ok(AddressSpace *as) -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 43580bcc43..1d8202537e 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -145,7 +145,7 @@ void vfio_unblock_multiple_devices_migration(void) - - bool vfio_viommu_preset(VFIODevice *vbasedev) - { -- return vbasedev->container->space->as != &address_space_memory; -+ return vbasedev->container->bcontainer.space->as != &address_space_memory; - } - - static void vfio_set_migration_error(int err) -@@ -922,7 +922,7 @@ static void vfio_dirty_tracking_init(VFIOContainer *container, - dirty.container = container; - - memory_listener_register(&dirty.listener, -- container->space->as); -+ container->bcontainer.space->as); - - *ranges = dirty.ranges; - -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index 20bcb9669a..3933391e0d 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -31,9 +31,11 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, - return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); - } - --void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUOps *ops) -+void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, -+ const VFIOIOMMUOps *ops) - { - bcontainer->ops = ops; -+ bcontainer->space = space; - QLIST_INIT(&bcontainer->giommu_list); - } - -@@ -41,6 +43,8 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer) - { - VFIOGuestIOMMU *giommu, *tmp; - -+ QLIST_REMOVE(bcontainer, next); -+ - QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) { - memory_region_unregister_iommu_notifier( - MEMORY_REGION(giommu->iommu_mr), &giommu->n); -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 133d3c8f5c..f12fcb6fe1 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -514,7 +514,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - * details once we know which type of IOMMU we are using. - */ - -- QLIST_FOREACH(container, &space->containers, next) { -+ QLIST_FOREACH(bcontainer, &space->containers, next) { -+ container = container_of(bcontainer, VFIOContainer, bcontainer); - if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { - ret = vfio_ram_block_discard_disable(container, true); - if (ret) { -@@ -550,7 +551,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - } - - container = g_malloc0(sizeof(*container)); -- container->space = space; - container->fd = fd; - container->error = NULL; - container->dirty_pages_supported = false; -@@ -558,7 +558,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - container->iova_ranges = NULL; - QLIST_INIT(&container->vrdl_list); - bcontainer = &container->bcontainer; -- vfio_container_init(bcontainer, &vfio_legacy_ops); -+ vfio_container_init(bcontainer, space, &vfio_legacy_ops); - - ret = vfio_init_container(container, group->fd, errp); - if (ret) { -@@ -613,14 +613,14 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - vfio_kvm_device_add_group(group); - - QLIST_INIT(&container->group_list); -- QLIST_INSERT_HEAD(&space->containers, container, next); -+ QLIST_INSERT_HEAD(&space->containers, bcontainer, next); - - group->container = container; - QLIST_INSERT_HEAD(&container->group_list, group, container_next); - - container->listener = vfio_memory_listener; - -- memory_listener_register(&container->listener, container->space->as); -+ memory_listener_register(&container->listener, bcontainer->space->as); - - if (container->error) { - ret = -1; -@@ -634,7 +634,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - return 0; - listener_release_exit: - QLIST_REMOVE(group, container_next); -- QLIST_REMOVE(container, next); -+ QLIST_REMOVE(bcontainer, next); - vfio_kvm_device_del_group(group); - memory_listener_unregister(&container->listener); - if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || -@@ -684,9 +684,7 @@ static void vfio_disconnect_container(VFIOGroup *group) - } - - if (QLIST_EMPTY(&container->group_list)) { -- VFIOAddressSpace *space = container->space; -- -- QLIST_REMOVE(container, next); -+ VFIOAddressSpace *space = bcontainer->space; - - vfio_container_destroy(bcontainer); - -@@ -707,7 +705,7 @@ static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp) - QLIST_FOREACH(group, &vfio_group_list, next) { - if (group->groupid == groupid) { - /* Found it. Now is it already in the right context? */ -- if (group->container->space->as == as) { -+ if (group->container->bcontainer.space->as == as) { - return group; - } else { - error_setg(errp, "group %d used in multiple address spaces", -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 6be082b8f2..bd4de6cb3a 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -73,17 +73,10 @@ typedef struct VFIOMigration { - bool initial_data_sent; - } VFIOMigration; - --typedef struct VFIOAddressSpace { -- AddressSpace *as; -- QLIST_HEAD(, VFIOContainer) containers; -- QLIST_ENTRY(VFIOAddressSpace) list; --} VFIOAddressSpace; -- - struct VFIOGroup; - - typedef struct VFIOContainer { - VFIOContainerBase bcontainer; -- VFIOAddressSpace *space; - int fd; /* /dev/vfio/vfio, empowered by the attached groups */ - MemoryListener listener; - MemoryListener prereg_listener; -@@ -98,7 +91,6 @@ typedef struct VFIOContainer { - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - QLIST_HEAD(, VFIOGroup) group_list; - QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; -- QLIST_ENTRY(VFIOContainer) next; - QLIST_HEAD(, VFIODevice) device_list; - GList *iova_ranges; - } VFIOContainer; -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index a11aec5755..c7cc6ec9c5 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -24,12 +24,20 @@ typedef struct { - hwaddr pages; - } VFIOBitmap; - -+typedef struct VFIOAddressSpace { -+ AddressSpace *as; -+ QLIST_HEAD(, VFIOContainerBase) containers; -+ QLIST_ENTRY(VFIOAddressSpace) list; -+} VFIOAddressSpace; -+ - /* - * This is the base object for vfio container backends - */ - typedef struct VFIOContainerBase { - const VFIOIOMMUOps *ops; -+ VFIOAddressSpace *space; - QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; -+ QLIST_ENTRY(VFIOContainerBase) next; - } VFIOContainerBase; - - typedef struct VFIOGuestIOMMU { -@@ -48,6 +56,7 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, - IOMMUTLBEntry *iotlb); - - void vfio_container_init(VFIOContainerBase *bcontainer, -+ VFIOAddressSpace *space, - const VFIOIOMMUOps *ops); - void vfio_container_destroy(VFIOContainerBase *bcontainer); - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Move-vrdl_list-to-base-container.patch b/SOURCES/kvm-vfio-container-Move-vrdl_list-to-base-container.patch deleted file mode 100644 index 5e31d07..0000000 --- a/SOURCES/kvm-vfio-container-Move-vrdl_list-to-base-container.patch +++ /dev/null @@ -1,255 +0,0 @@ -From aadd055dcc06cb964ebfd2868b7e9b207d62ae0e Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:37 +0800 -Subject: [PATCH 012/101] vfio/container: Move vrdl_list to base container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [11/67] 42da5389e39291839259f0e4c020c7461b7225cc (eauger1/centos-qemu-kvm) - -No functional change intended. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit dc74a4b0056c0c803d46612a2319294921097974) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 38 +++++++++++++-------------- - hw/vfio/container-base.c | 1 + - hw/vfio/container.c | 1 - - include/hw/vfio/vfio-common.h | 11 -------- - include/hw/vfio/vfio-container-base.h | 11 ++++++++ - 5 files changed, 31 insertions(+), 31 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 1cb53d369e..f15665789f 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -351,13 +351,13 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, - { - VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, - listener); -+ VFIOContainerBase *bcontainer = vrdl->bcontainer; - const hwaddr size = int128_get64(section->size); - const hwaddr iova = section->offset_within_address_space; - int ret; - - /* Unmap with a single call. */ -- ret = vfio_container_dma_unmap(&vrdl->container->bcontainer, -- iova, size , NULL); -+ ret = vfio_container_dma_unmap(bcontainer, iova, size , NULL); - if (ret) { - error_report("%s: vfio_container_dma_unmap() failed: %s", __func__, - strerror(-ret)); -@@ -369,6 +369,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, - { - VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, - listener); -+ VFIOContainerBase *bcontainer = vrdl->bcontainer; - const hwaddr end = section->offset_within_region + - int128_get64(section->size); - hwaddr start, next, iova; -@@ -387,8 +388,8 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, - section->offset_within_address_space; - vaddr = memory_region_get_ram_ptr(section->mr) + start; - -- ret = vfio_container_dma_map(&vrdl->container->bcontainer, iova, -- next - start, vaddr, section->readonly); -+ ret = vfio_container_dma_map(bcontainer, iova, next - start, -+ vaddr, section->readonly); - if (ret) { - /* Rollback */ - vfio_ram_discard_notify_discard(rdl, section); -@@ -398,10 +399,9 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, - return 0; - } - --static void vfio_register_ram_discard_listener(VFIOContainer *container, -+static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer, - MemoryRegionSection *section) - { -- VFIOContainerBase *bcontainer = &container->bcontainer; - RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); - VFIORamDiscardListener *vrdl; - -@@ -412,7 +412,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, - g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE)); - - vrdl = g_new0(VFIORamDiscardListener, 1); -- vrdl->container = container; -+ vrdl->bcontainer = bcontainer; - vrdl->mr = section->mr; - vrdl->offset_within_address_space = section->offset_within_address_space; - vrdl->size = int128_get64(section->size); -@@ -427,7 +427,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, - vfio_ram_discard_notify_populate, - vfio_ram_discard_notify_discard, true); - ram_discard_manager_register_listener(rdm, &vrdl->listener, section); -- QLIST_INSERT_HEAD(&container->vrdl_list, vrdl, next); -+ QLIST_INSERT_HEAD(&bcontainer->vrdl_list, vrdl, next); - - /* - * Sanity-check if we have a theoretically problematic setup where we could -@@ -451,7 +451,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, - } - #endif - -- QLIST_FOREACH(vrdl, &container->vrdl_list, next) { -+ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { - hwaddr start, end; - - start = QEMU_ALIGN_DOWN(vrdl->offset_within_address_space, -@@ -473,13 +473,13 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, - } - } - --static void vfio_unregister_ram_discard_listener(VFIOContainer *container, -+static void vfio_unregister_ram_discard_listener(VFIOContainerBase *bcontainer, - MemoryRegionSection *section) - { - RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); - VFIORamDiscardListener *vrdl = NULL; - -- QLIST_FOREACH(vrdl, &container->vrdl_list, next) { -+ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { - if (vrdl->mr == section->mr && - vrdl->offset_within_address_space == - section->offset_within_address_space) { -@@ -663,7 +663,7 @@ static void vfio_listener_region_add(MemoryListener *listener, - * about changes. - */ - if (memory_region_has_ram_discard_manager(section->mr)) { -- vfio_register_ram_discard_listener(container, section); -+ vfio_register_ram_discard_listener(bcontainer, section); - return; - } - -@@ -781,7 +781,7 @@ static void vfio_listener_region_del(MemoryListener *listener, - pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1; - try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); - } else if (memory_region_has_ram_discard_manager(section->mr)) { -- vfio_unregister_ram_discard_listener(container, section); -+ vfio_unregister_ram_discard_listener(bcontainer, section); - /* Unregistering will trigger an unmap. */ - try_unmap = false; - } -@@ -1260,17 +1260,17 @@ static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section, - * Sync the whole mapped region (spanning multiple individual mappings) - * in one go. - */ -- return vfio_get_dirty_bitmap(&vrdl->container->bcontainer, iova, size, -- ram_addr); -+ return vfio_get_dirty_bitmap(vrdl->bcontainer, iova, size, ram_addr); - } - --static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container, -- MemoryRegionSection *section) -+static int -+vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, -+ MemoryRegionSection *section) - { - RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); - VFIORamDiscardListener *vrdl = NULL; - -- QLIST_FOREACH(vrdl, &container->vrdl_list, next) { -+ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { - if (vrdl->mr == section->mr && - vrdl->offset_within_address_space == - section->offset_within_address_space) { -@@ -1324,7 +1324,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, - } - return 0; - } else if (memory_region_has_ram_discard_manager(section->mr)) { -- return vfio_sync_ram_discard_listener_dirty_bitmap(container, section); -+ return vfio_sync_ram_discard_listener_dirty_bitmap(bcontainer, section); - } - - ram_addr = memory_region_get_ram_addr(section->mr) + -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index dcce111349..584eee4ba1 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -54,6 +54,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, - bcontainer->dirty_pages_supported = false; - bcontainer->dma_max_mappings = 0; - QLIST_INIT(&bcontainer->giommu_list); -+ QLIST_INIT(&bcontainer->vrdl_list); - } - - void vfio_container_destroy(VFIOContainerBase *bcontainer) -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index c5a6262882..6ba2e2f8c4 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -560,7 +560,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - container->fd = fd; - container->error = NULL; - container->iova_ranges = NULL; -- QLIST_INIT(&container->vrdl_list); - bcontainer = &container->bcontainer; - vfio_container_init(bcontainer, space, &vfio_legacy_ops); - -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index d3dc2f9dcb..8a607a4c17 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -87,20 +87,9 @@ typedef struct VFIOContainer { - uint64_t max_dirty_bitmap_size; - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - QLIST_HEAD(, VFIOGroup) group_list; -- QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; - GList *iova_ranges; - } VFIOContainer; - --typedef struct VFIORamDiscardListener { -- VFIOContainer *container; -- MemoryRegion *mr; -- hwaddr offset_within_address_space; -- hwaddr size; -- uint64_t granularity; -- RamDiscardListener listener; -- QLIST_ENTRY(VFIORamDiscardListener) next; --} VFIORamDiscardListener; -- - typedef struct VFIOHostDMAWindow { - hwaddr min_iova; - hwaddr max_iova; -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 85ec7e1a56..8e05b5ac5a 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -40,6 +40,7 @@ typedef struct VFIOContainerBase { - unsigned int dma_max_mappings; - bool dirty_pages_supported; - QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; -+ QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; - QLIST_ENTRY(VFIOContainerBase) next; - QLIST_HEAD(, VFIODevice) device_list; - } VFIOContainerBase; -@@ -52,6 +53,16 @@ typedef struct VFIOGuestIOMMU { - QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; - } VFIOGuestIOMMU; - -+typedef struct VFIORamDiscardListener { -+ VFIOContainerBase *bcontainer; -+ MemoryRegion *mr; -+ hwaddr offset_within_address_space; -+ hwaddr size; -+ uint64_t granularity; -+ RamDiscardListener listener; -+ QLIST_ENTRY(VFIORamDiscardListener) next; -+} VFIORamDiscardListener; -+ - int vfio_container_dma_map(VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch b/SOURCES/kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch deleted file mode 100644 index f68be0b..0000000 --- a/SOURCES/kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch +++ /dev/null @@ -1,66 +0,0 @@ -From edfc1ee2a1854d180ffad92e70212535a2ca668c Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 21 Dec 2023 10:45:17 +0800 -Subject: [PATCH 062/101] vfio/container: Rename vfio_init_container to - vfio_set_iommu -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [61/67] 5e7f956379b54fe6fa7e078ec17e71325aa109af (eauger1/centos-qemu-kvm) - -vfio_container_init() and vfio_init_container() names are confusing -especially when we see vfio_init_container() calls vfio_container_init(). - -vfio_container_init() operates on base container which is consistent -with all routines handling 'VFIOContainerBase *' ops. - -vfio_init_container() operates on legacy container and setup IOMMU -context with ioctl(VFIO_SET_IOMMU). - -So choose to rename vfio_init_container to vfio_set_iommu to avoid -the confusion. - -No functional change intended. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -(cherry picked from commit 9f734a117cbf63b03577b46c8cad8ad88ec6dced) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 8d334f52f2..bd25b9fbad 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -392,8 +392,8 @@ static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp) - return VFIO_IOMMU_CLASS(klass); - } - --static int vfio_init_container(VFIOContainer *container, int group_fd, -- VFIOAddressSpace *space, Error **errp) -+static int vfio_set_iommu(VFIOContainer *container, int group_fd, -+ VFIOAddressSpace *space, Error **errp) - { - int iommu_type, ret; - const VFIOIOMMUClass *vioc; -@@ -616,7 +616,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - container->fd = fd; - bcontainer = &container->bcontainer; - -- ret = vfio_init_container(container, group->fd, space, errp); -+ ret = vfio_set_iommu(container, group->fd, space, errp); - if (ret) { - goto free_container_exit; - } --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Replace-basename-with-g_path_get_base.patch b/SOURCES/kvm-vfio-container-Replace-basename-with-g_path_get_base.patch deleted file mode 100644 index 77df179..0000000 --- a/SOURCES/kvm-vfio-container-Replace-basename-with-g_path_get_base.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 8d3857c7877da58ed0c6b62cf2714c4127350522 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 20 Dec 2023 14:53:02 +0100 -Subject: [PATCH 059/101] vfio/container: Replace basename with - g_path_get_basename -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [58/67] 56a90f23dadc89271b1fff014fc64ade87c1a4cb (eauger1/centos-qemu-kvm) - -g_path_get_basename() is a portable utility function that has the -advantage of not modifing the string argument. It also fixes a compile -breakage with the Musl C library reported in [1]. - -[1] https://lore.kernel.org/all/20231212010228.2701544-1-raj.khem@gmail.com/ - -Reported-by: Khem Raj -Reviewed-by: Eric Auger -Reviewed-by: Zhao Liu -Reviewed-by: Zhenzhong Duan -Signed-off-by: Cédric Le Goater -(cherry picked from commit 213ae3ffda463c0503e39e0cf827511b5298c314) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 688cf23bab..8d334f52f2 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -869,7 +869,8 @@ static void vfio_put_base_device(VFIODevice *vbasedev) - - static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) - { -- char *tmp, group_path[PATH_MAX], *group_name; -+ char *tmp, group_path[PATH_MAX]; -+ g_autofree char *group_name = NULL; - int ret, groupid; - ssize_t len; - -@@ -885,7 +886,7 @@ static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) - - group_path[len] = 0; - -- group_name = basename(group_path); -+ group_name = g_path_get_basename(group_path); - if (sscanf(group_name, "%d", &groupid) != 1) { - error_setg_errno(errp, errno, "failed to read %s", group_path); - return -errno; --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch b/SOURCES/kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch deleted file mode 100644 index 5442688..0000000 --- a/SOURCES/kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch +++ /dev/null @@ -1,235 +0,0 @@ -From a2c8aa64b1b21a3e1d4cf2a4fe7d84dc32f69284 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 2 Nov 2023 15:12:33 +0800 -Subject: [PATCH 008/101] vfio/container: Switch to IOMMU BE - set_dirty_page_tracking/query_dirty_bitmap API -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [7/67] 88368809c7990e1d9b01406e48694fe3e3fb1397 (eauger1/centos-qemu-kvm) - -dirty_pages_supported field is also moved to the base container - -No functional change intended. - -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Yi Sun -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit bb424490edcef73d07f200d53f69415b203d81df) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 12 ++++++++---- - hw/vfio/container-base.c | 16 ++++++++++++++++ - hw/vfio/container.c | 21 ++++++++++++++------- - include/hw/vfio/vfio-common.h | 6 ------ - include/hw/vfio/vfio-container-base.h | 6 ++++++ - 5 files changed, 44 insertions(+), 17 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 1d8202537e..b1a875ca93 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1079,7 +1079,8 @@ static void vfio_listener_log_global_start(MemoryListener *listener) - if (vfio_devices_all_device_dirty_tracking(container)) { - ret = vfio_devices_dma_logging_start(container); - } else { -- ret = vfio_set_dirty_page_tracking(container, true); -+ ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, -+ true); - } - - if (ret) { -@@ -1097,7 +1098,8 @@ static void vfio_listener_log_global_stop(MemoryListener *listener) - if (vfio_devices_all_device_dirty_tracking(container)) { - vfio_devices_dma_logging_stop(container); - } else { -- ret = vfio_set_dirty_page_tracking(container, false); -+ ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, -+ false); - } - - if (ret) { -@@ -1165,7 +1167,8 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, - VFIOBitmap vbmap; - int ret; - -- if (!container->dirty_pages_supported && !all_device_dirty_tracking) { -+ if (!container->bcontainer.dirty_pages_supported && -+ !all_device_dirty_tracking) { - cpu_physical_memory_set_dirty_range(ram_addr, size, - tcg_enabled() ? DIRTY_CLIENTS_ALL : - DIRTY_CLIENTS_NOCODE); -@@ -1180,7 +1183,8 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, - if (all_device_dirty_tracking) { - ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size); - } else { -- ret = vfio_query_dirty_bitmap(container, &vbmap, iova, size); -+ ret = vfio_container_query_dirty_bitmap(&container->bcontainer, &vbmap, -+ iova, size); - } - - if (ret) { -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index 3933391e0d..5d654ae172 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -31,11 +31,27 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, - return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); - } - -+int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, -+ bool start) -+{ -+ g_assert(bcontainer->ops->set_dirty_page_tracking); -+ return bcontainer->ops->set_dirty_page_tracking(bcontainer, start); -+} -+ -+int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, -+ VFIOBitmap *vbmap, -+ hwaddr iova, hwaddr size) -+{ -+ g_assert(bcontainer->ops->query_dirty_bitmap); -+ return bcontainer->ops->query_dirty_bitmap(bcontainer, vbmap, iova, size); -+} -+ - void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, - const VFIOIOMMUOps *ops) - { - bcontainer->ops = ops; - bcontainer->space = space; -+ bcontainer->dirty_pages_supported = false; - QLIST_INIT(&bcontainer->giommu_list); - } - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index f12fcb6fe1..3ab74e2615 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -131,7 +131,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, - - if (iotlb && vfio_devices_all_running_and_mig_active(container)) { - if (!vfio_devices_all_device_dirty_tracking(container) && -- container->dirty_pages_supported) { -+ container->bcontainer.dirty_pages_supported) { - return vfio_dma_unmap_bitmap(container, iova, size, iotlb); - } - -@@ -205,14 +205,17 @@ static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, - return -errno; - } - --int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) -+static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, -+ bool start) - { -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - int ret; - struct vfio_iommu_type1_dirty_bitmap dirty = { - .argsz = sizeof(dirty), - }; - -- if (!container->dirty_pages_supported) { -+ if (!bcontainer->dirty_pages_supported) { - return 0; - } - -@@ -232,9 +235,12 @@ int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) - return ret; - } - --int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, -- hwaddr iova, hwaddr size) -+static int vfio_legacy_query_dirty_bitmap(VFIOContainerBase *bcontainer, -+ VFIOBitmap *vbmap, -+ hwaddr iova, hwaddr size) - { -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - struct vfio_iommu_type1_dirty_bitmap *dbitmap; - struct vfio_iommu_type1_dirty_bitmap_get *range; - int ret; -@@ -461,7 +467,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, - * qemu_real_host_page_size to mark those dirty. - */ - if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { -- container->dirty_pages_supported = true; -+ container->bcontainer.dirty_pages_supported = true; - container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; - container->dirty_pgsizes = cap_mig->pgsize_bitmap; - } -@@ -553,7 +559,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - container = g_malloc0(sizeof(*container)); - container->fd = fd; - container->error = NULL; -- container->dirty_pages_supported = false; - container->dma_max_mappings = 0; - container->iova_ranges = NULL; - QLIST_INIT(&container->vrdl_list); -@@ -937,4 +942,6 @@ void vfio_detach_device(VFIODevice *vbasedev) - const VFIOIOMMUOps vfio_legacy_ops = { - .dma_map = vfio_legacy_dma_map, - .dma_unmap = vfio_legacy_dma_unmap, -+ .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, -+ .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, - }; -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index bd4de6cb3a..60f2785fe0 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -83,7 +83,6 @@ typedef struct VFIOContainer { - unsigned iommu_type; - Error *error; - bool initialized; -- bool dirty_pages_supported; - uint64_t dirty_pgsizes; - uint64_t max_dirty_bitmap_size; - unsigned long pgsizes; -@@ -190,11 +189,6 @@ VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); - void vfio_put_address_space(VFIOAddressSpace *space); - bool vfio_devices_all_running_and_saving(VFIOContainer *container); - --/* container->fd */ --int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start); --int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, -- hwaddr iova, hwaddr size); -- - /* SPAPR specific */ - int vfio_container_add_section_window(VFIOContainer *container, - MemoryRegionSection *section, -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index c7cc6ec9c5..f244f003d0 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -36,6 +36,7 @@ typedef struct VFIOAddressSpace { - typedef struct VFIOContainerBase { - const VFIOIOMMUOps *ops; - VFIOAddressSpace *space; -+ bool dirty_pages_supported; - QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; - QLIST_ENTRY(VFIOContainerBase) next; - } VFIOContainerBase; -@@ -54,6 +55,11 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer, - int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb); -+int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, -+ bool start); -+int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, -+ VFIOBitmap *vbmap, -+ hwaddr iova, hwaddr size); - - void vfio_container_init(VFIOContainerBase *bcontainer, - VFIOAddressSpace *space, --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Switch-to-dma_map-unmap-API.patch b/SOURCES/kvm-vfio-container-Switch-to-dma_map-unmap-API.patch deleted file mode 100644 index cfb5eb1..0000000 --- a/SOURCES/kvm-vfio-container-Switch-to-dma_map-unmap-API.patch +++ /dev/null @@ -1,303 +0,0 @@ -From 00daef8e3f4f64b1401b2e8945c256d27fbfa960 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 2 Nov 2023 15:12:29 +0800 -Subject: [PATCH 004/101] vfio/container: Switch to dma_map|unmap API -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [3/67] 9a20e2f2b277be65463f145df3309271493be6ac (eauger1/centos-qemu-kvm) - -No functional change intended. - -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Yi Sun -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit b08501a999e2448f500a46d68da503be55186b04) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 45 +++++++++++++++------------ - hw/vfio/container-base.c | 32 +++++++++++++++++++ - hw/vfio/container.c | 22 ++++++++----- - hw/vfio/meson.build | 1 + - hw/vfio/trace-events | 2 +- - include/hw/vfio/vfio-common.h | 4 --- - include/hw/vfio/vfio-container-base.h | 7 +++++ - 7 files changed, 81 insertions(+), 32 deletions(-) - create mode 100644 hw/vfio/container-base.c - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index e70fdf5e0c..e610771888 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -292,7 +292,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, - static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - { - VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); -- VFIOContainer *container = giommu->container; -+ VFIOContainerBase *bcontainer = &giommu->container->bcontainer; - hwaddr iova = iotlb->iova + giommu->iommu_offset; - void *vaddr; - int ret; -@@ -322,21 +322,22 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - * of vaddr will always be there, even if the memory object is - * destroyed and its backing memory munmap-ed. - */ -- ret = vfio_dma_map(container, iova, -- iotlb->addr_mask + 1, vaddr, -- read_only); -+ ret = vfio_container_dma_map(bcontainer, iova, -+ iotlb->addr_mask + 1, vaddr, -+ read_only); - if (ret) { -- error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " -+ error_report("vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx", %p) = %d (%s)", -- container, iova, -+ bcontainer, iova, - iotlb->addr_mask + 1, vaddr, ret, strerror(-ret)); - } - } else { -- ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, iotlb); -+ ret = vfio_container_dma_unmap(bcontainer, iova, -+ iotlb->addr_mask + 1, iotlb); - if (ret) { -- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " -+ error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%s)", -- container, iova, -+ bcontainer, iova, - iotlb->addr_mask + 1, ret, strerror(-ret)); - vfio_set_migration_error(ret); - } -@@ -355,9 +356,10 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, - int ret; - - /* Unmap with a single call. */ -- ret = vfio_dma_unmap(vrdl->container, iova, size , NULL); -+ ret = vfio_container_dma_unmap(&vrdl->container->bcontainer, -+ iova, size , NULL); - if (ret) { -- error_report("%s: vfio_dma_unmap() failed: %s", __func__, -+ error_report("%s: vfio_container_dma_unmap() failed: %s", __func__, - strerror(-ret)); - } - } -@@ -385,8 +387,8 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, - section->offset_within_address_space; - vaddr = memory_region_get_ram_ptr(section->mr) + start; - -- ret = vfio_dma_map(vrdl->container, iova, next - start, -- vaddr, section->readonly); -+ ret = vfio_container_dma_map(&vrdl->container->bcontainer, iova, -+ next - start, vaddr, section->readonly); - if (ret) { - /* Rollback */ - vfio_ram_discard_notify_discard(rdl, section); -@@ -684,10 +686,11 @@ static void vfio_listener_region_add(MemoryListener *listener, - } - } - -- ret = vfio_dma_map(container, iova, int128_get64(llsize), -- vaddr, section->readonly); -+ ret = vfio_container_dma_map(&container->bcontainer, -+ iova, int128_get64(llsize), vaddr, -+ section->readonly); - if (ret) { -- error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", " -+ error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx", %p) = %d (%s)", - container, iova, int128_get64(llsize), vaddr, ret, - strerror(-ret)); -@@ -784,18 +787,20 @@ static void vfio_listener_region_del(MemoryListener *listener, - if (int128_eq(llsize, int128_2_64())) { - /* The unmap ioctl doesn't accept a full 64-bit span. */ - llsize = int128_rshift(llsize, 1); -- ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); -+ ret = vfio_container_dma_unmap(&container->bcontainer, iova, -+ int128_get64(llsize), NULL); - if (ret) { -- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " -+ error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%s)", - container, iova, int128_get64(llsize), ret, - strerror(-ret)); - } - iova += int128_get64(llsize); - } -- ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); -+ ret = vfio_container_dma_unmap(&container->bcontainer, iova, -+ int128_get64(llsize), NULL); - if (ret) { -- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " -+ error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%s)", - container, iova, int128_get64(llsize), ret, - strerror(-ret)); -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -new file mode 100644 -index 0000000000..55d3a35fa4 ---- /dev/null -+++ b/hw/vfio/container-base.c -@@ -0,0 +1,32 @@ -+/* -+ * VFIO BASE CONTAINER -+ * -+ * Copyright (C) 2023 Intel Corporation. -+ * Copyright Red Hat, Inc. 2023 -+ * -+ * Authors: Yi Liu -+ * Eric Auger -+ * -+ * SPDX-License-Identifier: GPL-2.0-or-later -+ */ -+ -+#include "qemu/osdep.h" -+#include "qapi/error.h" -+#include "qemu/error-report.h" -+#include "hw/vfio/vfio-container-base.h" -+ -+int vfio_container_dma_map(VFIOContainerBase *bcontainer, -+ hwaddr iova, ram_addr_t size, -+ void *vaddr, bool readonly) -+{ -+ g_assert(bcontainer->ops->dma_map); -+ return bcontainer->ops->dma_map(bcontainer, iova, size, vaddr, readonly); -+} -+ -+int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, -+ hwaddr iova, ram_addr_t size, -+ IOMMUTLBEntry *iotlb) -+{ -+ g_assert(bcontainer->ops->dma_unmap); -+ return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); -+} -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 4bc43ddfa4..c04df26323 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -115,9 +115,11 @@ unmap_exit: - /* - * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 - */ --int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, -- ram_addr_t size, IOMMUTLBEntry *iotlb) -+static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, -+ ram_addr_t size, IOMMUTLBEntry *iotlb) - { -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - struct vfio_iommu_type1_dma_unmap unmap = { - .argsz = sizeof(unmap), - .flags = 0, -@@ -151,7 +153,7 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, - */ - if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) && - container->iommu_type == VFIO_TYPE1v2_IOMMU) { -- trace_vfio_dma_unmap_overflow_workaround(); -+ trace_vfio_legacy_dma_unmap_overflow_workaround(); - unmap.size -= 1ULL << ctz64(container->pgsizes); - continue; - } -@@ -170,9 +172,11 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, - return 0; - } - --int vfio_dma_map(VFIOContainer *container, hwaddr iova, -- ram_addr_t size, void *vaddr, bool readonly) -+static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, -+ ram_addr_t size, void *vaddr, bool readonly) - { -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - struct vfio_iommu_type1_dma_map map = { - .argsz = sizeof(map), - .flags = VFIO_DMA_MAP_FLAG_READ, -@@ -191,7 +195,8 @@ int vfio_dma_map(VFIOContainer *container, hwaddr iova, - * the VGA ROM space. - */ - if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || -- (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 && -+ (errno == EBUSY && -+ vfio_legacy_dma_unmap(bcontainer, iova, size, NULL) == 0 && - ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { - return 0; - } -@@ -937,4 +942,7 @@ void vfio_detach_device(VFIODevice *vbasedev) - vfio_put_group(group); - } - --const VFIOIOMMUOps vfio_legacy_ops; -+const VFIOIOMMUOps vfio_legacy_ops = { -+ .dma_map = vfio_legacy_dma_map, -+ .dma_unmap = vfio_legacy_dma_unmap, -+}; -diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build -index 2a6912c940..eb6ce6229d 100644 ---- a/hw/vfio/meson.build -+++ b/hw/vfio/meson.build -@@ -2,6 +2,7 @@ vfio_ss = ss.source_set() - vfio_ss.add(files( - 'helpers.c', - 'common.c', -+ 'container-base.c', - 'container.c', - 'spapr.c', - 'migration.c', -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 0eb2387cf2..9f7fedee98 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -116,7 +116,7 @@ vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Re - vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries" - vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" - vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x" --vfio_dma_unmap_overflow_workaround(void) "" -+vfio_legacy_dma_unmap_overflow_workaround(void) "" - vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64 - vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 - -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 678161f207..24a26345e5 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -208,10 +208,6 @@ void vfio_put_address_space(VFIOAddressSpace *space); - bool vfio_devices_all_running_and_saving(VFIOContainer *container); - - /* container->fd */ --int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, -- ram_addr_t size, IOMMUTLBEntry *iotlb); --int vfio_dma_map(VFIOContainer *container, hwaddr iova, -- ram_addr_t size, void *vaddr, bool readonly); - int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start); - int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, - hwaddr iova, hwaddr size); -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 1d6daaea5d..56b033f59f 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -31,6 +31,13 @@ typedef struct VFIOContainerBase { - const VFIOIOMMUOps *ops; - } VFIOContainerBase; - -+int vfio_container_dma_map(VFIOContainerBase *bcontainer, -+ hwaddr iova, ram_addr_t size, -+ void *vaddr, bool readonly); -+int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, -+ hwaddr iova, ram_addr_t size, -+ IOMMUTLBEntry *iotlb); -+ - struct VFIOIOMMUOps { - /* basic feature */ - int (*dma_map)(VFIOContainerBase *bcontainer, --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch b/SOURCES/kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch deleted file mode 100644 index 52e3d87..0000000 --- a/SOURCES/kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch +++ /dev/null @@ -1,115 +0,0 @@ -From 49435d4d592bc890f56b69c2290f890c87b5a103 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:05 +0800 -Subject: [PATCH 026/101] vfio/iommufd: Add support for iova_ranges and pgsizes -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [25/67] 578af0547d97276ccd4936b574c12118fc70d468 (eauger1/centos-qemu-kvm) - -Some vIOMMU such as virtio-iommu use IOVA ranges from host side to -setup reserved ranges for passthrough device, so that guest will not -use an IOVA range beyond host support. - -Use an uAPI of IOMMUFD to get IOVA ranges of host side and pass to -vIOMMU just like the legacy backend, if this fails, fallback to -64bit IOVA range. - -Also use out_iova_alignment returned from uAPI as pgsizes instead of -qemu_real_host_page_size() as a fallback. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 714e9affa8ae1d84007c8afde7bb10fef9cb883d) -Signed-off-by: Eric Auger ---- - hw/vfio/iommufd.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 55 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c -index 6d31aeac7b..01b448e840 100644 ---- a/hw/vfio/iommufd.c -+++ b/hw/vfio/iommufd.c -@@ -261,6 +261,53 @@ static int iommufd_cdev_ram_block_discard_disable(bool state) - return ram_block_uncoordinated_discard_disable(state); - } - -+static int iommufd_cdev_get_info_iova_range(VFIOIOMMUFDContainer *container, -+ uint32_t ioas_id, Error **errp) -+{ -+ VFIOContainerBase *bcontainer = &container->bcontainer; -+ struct iommu_ioas_iova_ranges *info; -+ struct iommu_iova_range *iova_ranges; -+ int ret, sz, fd = container->be->fd; -+ -+ info = g_malloc0(sizeof(*info)); -+ info->size = sizeof(*info); -+ info->ioas_id = ioas_id; -+ -+ ret = ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info); -+ if (ret && errno != EMSGSIZE) { -+ goto error; -+ } -+ -+ sz = info->num_iovas * sizeof(struct iommu_iova_range); -+ info = g_realloc(info, sizeof(*info) + sz); -+ info->allowed_iovas = (uintptr_t)(info + 1); -+ -+ ret = ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info); -+ if (ret) { -+ goto error; -+ } -+ -+ iova_ranges = (struct iommu_iova_range *)(uintptr_t)info->allowed_iovas; -+ -+ for (int i = 0; i < info->num_iovas; i++) { -+ Range *range = g_new(Range, 1); -+ -+ range_set_bounds(range, iova_ranges[i].start, iova_ranges[i].last); -+ bcontainer->iova_ranges = -+ range_list_insert(bcontainer->iova_ranges, range); -+ } -+ bcontainer->pgsizes = info->out_iova_alignment; -+ -+ g_free(info); -+ return 0; -+ -+error: -+ ret = -errno; -+ g_free(info); -+ error_setg_errno(errp, errno, "Cannot get IOVA ranges"); -+ return ret; -+} -+ - static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, - AddressSpace *as, Error **errp) - { -@@ -335,7 +382,14 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, - goto err_discard_disable; - } - -- bcontainer->pgsizes = qemu_real_host_page_size(); -+ ret = iommufd_cdev_get_info_iova_range(container, ioas_id, &err); -+ if (ret) { -+ error_append_hint(&err, -+ "Fallback to default 64bit IOVA range and 4K page size\n"); -+ warn_report_err(err); -+ err = NULL; -+ bcontainer->pgsizes = qemu_real_host_page_size(); -+ } - - bcontainer->listener = vfio_memory_listener; - memory_listener_register(&bcontainer->listener, bcontainer->space->as); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch b/SOURCES/kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch deleted file mode 100644 index 48db196..0000000 --- a/SOURCES/kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch +++ /dev/null @@ -1,215 +0,0 @@ -From e94700896dd8fcea149d9719eccde6f485440be2 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:08 +0800 -Subject: [PATCH 029/101] vfio/iommufd: Enable pci hot reset through iommufd - cdev interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [28/67] ca1ae970138ee4a6f4b3b49817e775f3159f4c97 (eauger1/centos-qemu-kvm) - -Implement the newly introduced pci_hot_reset callback named -iommufd_cdev_pci_hot_reset to do iommufd specific check and -reset operation. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 96d6f85ff012abd7aaa35b1a2bc48b8640c898d9) -Signed-off-by: Eric Auger ---- - hw/vfio/iommufd.c | 150 +++++++++++++++++++++++++++++++++++++++++++ - hw/vfio/trace-events | 1 + - 2 files changed, 151 insertions(+) - -diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c -index 01b448e840..6e53e013ef 100644 ---- a/hw/vfio/iommufd.c -+++ b/hw/vfio/iommufd.c -@@ -24,6 +24,7 @@ - #include "sysemu/reset.h" - #include "qemu/cutils.h" - #include "qemu/chardev_open.h" -+#include "pci.h" - - static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, - ram_addr_t size, void *vaddr, bool readonly) -@@ -468,9 +469,158 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) - close(vbasedev->fd); - } - -+static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid) -+{ -+ VFIODevice *vbasedev_iter; -+ -+ QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) { -+ if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) { -+ continue; -+ } -+ if (devid == vbasedev_iter->devid) { -+ return vbasedev_iter; -+ } -+ } -+ return NULL; -+} -+ -+static VFIOPCIDevice * -+iommufd_cdev_dep_get_realized_vpdev(struct vfio_pci_dependent_device *dep_dev, -+ VFIODevice *reset_dev) -+{ -+ VFIODevice *vbasedev_tmp; -+ -+ if (dep_dev->devid == reset_dev->devid || -+ dep_dev->devid == VFIO_PCI_DEVID_OWNED) { -+ return NULL; -+ } -+ -+ vbasedev_tmp = iommufd_cdev_pci_find_by_devid(dep_dev->devid); -+ if (!vbasedev_tmp || !vbasedev_tmp->dev->realized || -+ vbasedev_tmp->type != VFIO_DEVICE_TYPE_PCI) { -+ return NULL; -+ } -+ -+ return container_of(vbasedev_tmp, VFIOPCIDevice, vbasedev); -+} -+ -+static int iommufd_cdev_pci_hot_reset(VFIODevice *vbasedev, bool single) -+{ -+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); -+ struct vfio_pci_hot_reset_info *info = NULL; -+ struct vfio_pci_dependent_device *devices; -+ struct vfio_pci_hot_reset *reset; -+ int ret, i; -+ bool multi = false; -+ -+ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); -+ -+ if (!single) { -+ vfio_pci_pre_reset(vdev); -+ } -+ vdev->vbasedev.needs_reset = false; -+ -+ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); -+ -+ if (ret) { -+ goto out_single; -+ } -+ -+ assert(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID); -+ -+ devices = &info->devices[0]; -+ -+ if (!(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED)) { -+ if (!vdev->has_pm_reset) { -+ for (i = 0; i < info->count; i++) { -+ if (devices[i].devid == VFIO_PCI_DEVID_NOT_OWNED) { -+ error_report("vfio: Cannot reset device %s, " -+ "depends on device %04x:%02x:%02x.%x " -+ "which is not owned.", -+ vdev->vbasedev.name, devices[i].segment, -+ devices[i].bus, PCI_SLOT(devices[i].devfn), -+ PCI_FUNC(devices[i].devfn)); -+ } -+ } -+ } -+ ret = -EPERM; -+ goto out_single; -+ } -+ -+ trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); -+ -+ for (i = 0; i < info->count; i++) { -+ VFIOPCIDevice *tmp; -+ -+ trace_iommufd_cdev_pci_hot_reset_dep_devices(devices[i].segment, -+ devices[i].bus, -+ PCI_SLOT(devices[i].devfn), -+ PCI_FUNC(devices[i].devfn), -+ devices[i].devid); -+ -+ /* -+ * If a VFIO cdev device is resettable, all the dependent devices -+ * are either bound to same iommufd or within same iommu_groups as -+ * one of the iommufd bound devices. -+ */ -+ assert(devices[i].devid != VFIO_PCI_DEVID_NOT_OWNED); -+ -+ tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev); -+ if (!tmp) { -+ continue; -+ } -+ -+ if (single) { -+ ret = -EINVAL; -+ goto out_single; -+ } -+ vfio_pci_pre_reset(tmp); -+ tmp->vbasedev.needs_reset = false; -+ multi = true; -+ } -+ -+ if (!single && !multi) { -+ ret = -EINVAL; -+ goto out_single; -+ } -+ -+ /* Use zero length array for hot reset with iommufd backend */ -+ reset = g_malloc0(sizeof(*reset)); -+ reset->argsz = sizeof(*reset); -+ -+ /* Bus reset! */ -+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); -+ g_free(reset); -+ if (ret) { -+ ret = -errno; -+ } -+ -+ trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, -+ ret ? strerror(errno) : "Success"); -+ -+ /* Re-enable INTx on affected devices */ -+ for (i = 0; i < info->count; i++) { -+ VFIOPCIDevice *tmp; -+ -+ tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev); -+ if (!tmp) { -+ continue; -+ } -+ vfio_pci_post_reset(tmp); -+ } -+out_single: -+ if (!single) { -+ vfio_pci_post_reset(vdev); -+ } -+ g_free(info); -+ -+ return ret; -+} -+ - const VFIOIOMMUOps vfio_iommufd_ops = { - .dma_map = iommufd_cdev_map, - .dma_unmap = iommufd_cdev_unmap, - .attach_device = iommufd_cdev_attach, - .detach_device = iommufd_cdev_detach, -+ .pci_hot_reset = iommufd_cdev_pci_hot_reset, - }; -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 3340c93af0..8fdde54456 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -174,3 +174,4 @@ iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name) " [iommufd=%d] Succ - iommufd_cdev_fail_attach_existing_container(const char *msg) " %s" - iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d" - iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d" -+iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d" --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-iommufd-Implement-the-iommufd-backend.patch b/SOURCES/kvm-vfio-iommufd-Implement-the-iommufd-backend.patch deleted file mode 100644 index f00cbcd..0000000 --- a/SOURCES/kvm-vfio-iommufd-Implement-the-iommufd-backend.patch +++ /dev/null @@ -1,561 +0,0 @@ -From f018d0b686406256c2b5e823e4227316ee1394e9 Mon Sep 17 00:00:00 2001 -From: Yi Liu -Date: Tue, 21 Nov 2023 16:44:03 +0800 -Subject: [PATCH 024/101] vfio/iommufd: Implement the iommufd backend -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [23/67] d11046654117a690542a1e2b48b9d1994f778b2d (eauger1/centos-qemu-kvm) - -The iommufd backend is implemented based on the new /dev/iommu user API. -This backend obviously depends on CONFIG_IOMMUFD. - -So far, the iommufd backend doesn't support dirty page sync yet. - -Co-authored-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 5ee3dc7af7859e7b8aa34c10c21778101c15e812) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 6 + - hw/vfio/iommufd.c | 422 ++++++++++++++++++++++++++++++++++ - hw/vfio/meson.build | 3 + - hw/vfio/trace-events | 10 + - include/hw/vfio/vfio-common.h | 11 + - 5 files changed, 452 insertions(+) - create mode 100644 hw/vfio/iommufd.c - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 934f4f5446..6569732b7a 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -19,6 +19,7 @@ - */ - - #include "qemu/osdep.h" -+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ - #include - #ifdef CONFIG_KVM - #include -@@ -1503,6 +1504,11 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, - { - const VFIOIOMMUOps *ops = &vfio_legacy_ops; - -+#ifdef CONFIG_IOMMUFD -+ if (vbasedev->iommufd) { -+ ops = &vfio_iommufd_ops; -+ } -+#endif - return ops->attach_device(name, vbasedev, as, errp); - } - -diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c -new file mode 100644 -index 0000000000..6d31aeac7b ---- /dev/null -+++ b/hw/vfio/iommufd.c -@@ -0,0 +1,422 @@ -+/* -+ * iommufd container backend -+ * -+ * Copyright (C) 2023 Intel Corporation. -+ * Copyright Red Hat, Inc. 2023 -+ * -+ * Authors: Yi Liu -+ * Eric Auger -+ * -+ * SPDX-License-Identifier: GPL-2.0-or-later -+ */ -+ -+#include "qemu/osdep.h" -+#include -+#include -+#include -+ -+#include "hw/vfio/vfio-common.h" -+#include "qemu/error-report.h" -+#include "trace.h" -+#include "qapi/error.h" -+#include "sysemu/iommufd.h" -+#include "hw/qdev-core.h" -+#include "sysemu/reset.h" -+#include "qemu/cutils.h" -+#include "qemu/chardev_open.h" -+ -+static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, -+ ram_addr_t size, void *vaddr, bool readonly) -+{ -+ VFIOIOMMUFDContainer *container = -+ container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); -+ -+ return iommufd_backend_map_dma(container->be, -+ container->ioas_id, -+ iova, size, vaddr, readonly); -+} -+ -+static int iommufd_cdev_unmap(VFIOContainerBase *bcontainer, -+ hwaddr iova, ram_addr_t size, -+ IOMMUTLBEntry *iotlb) -+{ -+ VFIOIOMMUFDContainer *container = -+ container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); -+ -+ /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ -+ return iommufd_backend_unmap_dma(container->be, -+ container->ioas_id, iova, size); -+} -+ -+static int iommufd_cdev_kvm_device_add(VFIODevice *vbasedev, Error **errp) -+{ -+ return vfio_kvm_device_add_fd(vbasedev->fd, errp); -+} -+ -+static void iommufd_cdev_kvm_device_del(VFIODevice *vbasedev) -+{ -+ Error *err = NULL; -+ -+ if (vfio_kvm_device_del_fd(vbasedev->fd, &err)) { -+ error_report_err(err); -+ } -+} -+ -+static int iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp) -+{ -+ IOMMUFDBackend *iommufd = vbasedev->iommufd; -+ struct vfio_device_bind_iommufd bind = { -+ .argsz = sizeof(bind), -+ .flags = 0, -+ }; -+ int ret; -+ -+ ret = iommufd_backend_connect(iommufd, errp); -+ if (ret) { -+ return ret; -+ } -+ -+ /* -+ * Add device to kvm-vfio to be prepared for the tracking -+ * in KVM. Especially for some emulated devices, it requires -+ * to have kvm information in the device open. -+ */ -+ ret = iommufd_cdev_kvm_device_add(vbasedev, errp); -+ if (ret) { -+ goto err_kvm_device_add; -+ } -+ -+ /* Bind device to iommufd */ -+ bind.iommufd = iommufd->fd; -+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind); -+ if (ret) { -+ error_setg_errno(errp, errno, "error bind device fd=%d to iommufd=%d", -+ vbasedev->fd, bind.iommufd); -+ goto err_bind; -+ } -+ -+ vbasedev->devid = bind.out_devid; -+ trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name, -+ vbasedev->fd, vbasedev->devid); -+ return ret; -+err_bind: -+ iommufd_cdev_kvm_device_del(vbasedev); -+err_kvm_device_add: -+ iommufd_backend_disconnect(iommufd); -+ return ret; -+} -+ -+static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev) -+{ -+ /* Unbind is automatically conducted when device fd is closed */ -+ iommufd_cdev_kvm_device_del(vbasedev); -+ iommufd_backend_disconnect(vbasedev->iommufd); -+} -+ -+static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp) -+{ -+ long int ret = -ENOTTY; -+ char *path, *vfio_dev_path = NULL, *vfio_path = NULL; -+ DIR *dir = NULL; -+ struct dirent *dent; -+ gchar *contents; -+ struct stat st; -+ gsize length; -+ int major, minor; -+ dev_t vfio_devt; -+ -+ path = g_strdup_printf("%s/vfio-dev", sysfs_path); -+ if (stat(path, &st) < 0) { -+ error_setg_errno(errp, errno, "no such host device"); -+ goto out_free_path; -+ } -+ -+ dir = opendir(path); -+ if (!dir) { -+ error_setg_errno(errp, errno, "couldn't open directory %s", path); -+ goto out_free_path; -+ } -+ -+ while ((dent = readdir(dir))) { -+ if (!strncmp(dent->d_name, "vfio", 4)) { -+ vfio_dev_path = g_strdup_printf("%s/%s/dev", path, dent->d_name); -+ break; -+ } -+ } -+ -+ if (!vfio_dev_path) { -+ error_setg(errp, "failed to find vfio-dev/vfioX/dev"); -+ goto out_close_dir; -+ } -+ -+ if (!g_file_get_contents(vfio_dev_path, &contents, &length, NULL)) { -+ error_setg(errp, "failed to load \"%s\"", vfio_dev_path); -+ goto out_free_dev_path; -+ } -+ -+ if (sscanf(contents, "%d:%d", &major, &minor) != 2) { -+ error_setg(errp, "failed to get major:minor for \"%s\"", vfio_dev_path); -+ goto out_free_dev_path; -+ } -+ g_free(contents); -+ vfio_devt = makedev(major, minor); -+ -+ vfio_path = g_strdup_printf("/dev/vfio/devices/%s", dent->d_name); -+ ret = open_cdev(vfio_path, vfio_devt); -+ if (ret < 0) { -+ error_setg(errp, "Failed to open %s", vfio_path); -+ } -+ -+ trace_iommufd_cdev_getfd(vfio_path, ret); -+ g_free(vfio_path); -+ -+out_free_dev_path: -+ g_free(vfio_dev_path); -+out_close_dir: -+ closedir(dir); -+out_free_path: -+ if (*errp) { -+ error_prepend(errp, VFIO_MSG_PREFIX, path); -+ } -+ g_free(path); -+ -+ return ret; -+} -+ -+static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id, -+ Error **errp) -+{ -+ int ret, iommufd = vbasedev->iommufd->fd; -+ struct vfio_device_attach_iommufd_pt attach_data = { -+ .argsz = sizeof(attach_data), -+ .flags = 0, -+ .pt_id = id, -+ }; -+ -+ /* Attach device to an IOAS or hwpt within iommufd */ -+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_data); -+ if (ret) { -+ error_setg_errno(errp, errno, -+ "[iommufd=%d] error attach %s (%d) to id=%d", -+ iommufd, vbasedev->name, vbasedev->fd, id); -+ } else { -+ trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name, -+ vbasedev->fd, id); -+ } -+ return ret; -+} -+ -+static int iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp) -+{ -+ int ret, iommufd = vbasedev->iommufd->fd; -+ struct vfio_device_detach_iommufd_pt detach_data = { -+ .argsz = sizeof(detach_data), -+ .flags = 0, -+ }; -+ -+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_data); -+ if (ret) { -+ error_setg_errno(errp, errno, "detach %s failed", vbasedev->name); -+ } else { -+ trace_iommufd_cdev_detach_ioas_hwpt(iommufd, vbasedev->name); -+ } -+ return ret; -+} -+ -+static int iommufd_cdev_attach_container(VFIODevice *vbasedev, -+ VFIOIOMMUFDContainer *container, -+ Error **errp) -+{ -+ return iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); -+} -+ -+static void iommufd_cdev_detach_container(VFIODevice *vbasedev, -+ VFIOIOMMUFDContainer *container) -+{ -+ Error *err = NULL; -+ -+ if (iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) { -+ error_report_err(err); -+ } -+} -+ -+static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) -+{ -+ VFIOContainerBase *bcontainer = &container->bcontainer; -+ -+ if (!QLIST_EMPTY(&bcontainer->device_list)) { -+ return; -+ } -+ memory_listener_unregister(&bcontainer->listener); -+ vfio_container_destroy(bcontainer); -+ iommufd_backend_free_id(container->be, container->ioas_id); -+ g_free(container); -+} -+ -+static int iommufd_cdev_ram_block_discard_disable(bool state) -+{ -+ /* -+ * We support coordinated discarding of RAM via the RamDiscardManager. -+ */ -+ return ram_block_uncoordinated_discard_disable(state); -+} -+ -+static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, -+ AddressSpace *as, Error **errp) -+{ -+ VFIOContainerBase *bcontainer; -+ VFIOIOMMUFDContainer *container; -+ VFIOAddressSpace *space; -+ struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; -+ int ret, devfd; -+ uint32_t ioas_id; -+ Error *err = NULL; -+ -+ devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); -+ if (devfd < 0) { -+ return devfd; -+ } -+ vbasedev->fd = devfd; -+ -+ ret = iommufd_cdev_connect_and_bind(vbasedev, errp); -+ if (ret) { -+ goto err_connect_bind; -+ } -+ -+ space = vfio_get_address_space(as); -+ -+ /* try to attach to an existing container in this space */ -+ QLIST_FOREACH(bcontainer, &space->containers, next) { -+ container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); -+ if (bcontainer->ops != &vfio_iommufd_ops || -+ vbasedev->iommufd != container->be) { -+ continue; -+ } -+ if (iommufd_cdev_attach_container(vbasedev, container, &err)) { -+ const char *msg = error_get_pretty(err); -+ -+ trace_iommufd_cdev_fail_attach_existing_container(msg); -+ error_free(err); -+ err = NULL; -+ } else { -+ ret = iommufd_cdev_ram_block_discard_disable(true); -+ if (ret) { -+ error_setg(errp, -+ "Cannot set discarding of RAM broken (%d)", ret); -+ goto err_discard_disable; -+ } -+ goto found_container; -+ } -+ } -+ -+ /* Need to allocate a new dedicated container */ -+ ret = iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp); -+ if (ret < 0) { -+ goto err_alloc_ioas; -+ } -+ -+ trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id); -+ -+ container = g_malloc0(sizeof(*container)); -+ container->be = vbasedev->iommufd; -+ container->ioas_id = ioas_id; -+ -+ bcontainer = &container->bcontainer; -+ vfio_container_init(bcontainer, space, &vfio_iommufd_ops); -+ QLIST_INSERT_HEAD(&space->containers, bcontainer, next); -+ -+ ret = iommufd_cdev_attach_container(vbasedev, container, errp); -+ if (ret) { -+ goto err_attach_container; -+ } -+ -+ ret = iommufd_cdev_ram_block_discard_disable(true); -+ if (ret) { -+ goto err_discard_disable; -+ } -+ -+ bcontainer->pgsizes = qemu_real_host_page_size(); -+ -+ bcontainer->listener = vfio_memory_listener; -+ memory_listener_register(&bcontainer->listener, bcontainer->space->as); -+ -+ if (bcontainer->error) { -+ ret = -1; -+ error_propagate_prepend(errp, bcontainer->error, -+ "memory listener initialization failed: "); -+ goto err_listener_register; -+ } -+ -+ bcontainer->initialized = true; -+ -+found_container: -+ ret = ioctl(devfd, VFIO_DEVICE_GET_INFO, &dev_info); -+ if (ret) { -+ error_setg_errno(errp, errno, "error getting device info"); -+ goto err_listener_register; -+ } -+ -+ /* -+ * TODO: examine RAM_BLOCK_DISCARD stuff, should we do group level -+ * for discarding incompatibility check as well? -+ */ -+ if (vbasedev->ram_block_discard_allowed) { -+ iommufd_cdev_ram_block_discard_disable(false); -+ } -+ -+ vbasedev->group = 0; -+ vbasedev->num_irqs = dev_info.num_irqs; -+ vbasedev->num_regions = dev_info.num_regions; -+ vbasedev->flags = dev_info.flags; -+ vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); -+ vbasedev->bcontainer = bcontainer; -+ QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); -+ QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); -+ -+ trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs, -+ vbasedev->num_regions, vbasedev->flags); -+ return 0; -+ -+err_listener_register: -+ iommufd_cdev_ram_block_discard_disable(false); -+err_discard_disable: -+ iommufd_cdev_detach_container(vbasedev, container); -+err_attach_container: -+ iommufd_cdev_container_destroy(container); -+err_alloc_ioas: -+ vfio_put_address_space(space); -+ iommufd_cdev_unbind_and_disconnect(vbasedev); -+err_connect_bind: -+ close(vbasedev->fd); -+ return ret; -+} -+ -+static void iommufd_cdev_detach(VFIODevice *vbasedev) -+{ -+ VFIOContainerBase *bcontainer = vbasedev->bcontainer; -+ VFIOAddressSpace *space = bcontainer->space; -+ VFIOIOMMUFDContainer *container = container_of(bcontainer, -+ VFIOIOMMUFDContainer, -+ bcontainer); -+ QLIST_REMOVE(vbasedev, global_next); -+ QLIST_REMOVE(vbasedev, container_next); -+ vbasedev->bcontainer = NULL; -+ -+ if (!vbasedev->ram_block_discard_allowed) { -+ iommufd_cdev_ram_block_discard_disable(false); -+ } -+ -+ iommufd_cdev_detach_container(vbasedev, container); -+ iommufd_cdev_container_destroy(container); -+ vfio_put_address_space(space); -+ -+ iommufd_cdev_unbind_and_disconnect(vbasedev); -+ close(vbasedev->fd); -+} -+ -+const VFIOIOMMUOps vfio_iommufd_ops = { -+ .dma_map = iommufd_cdev_map, -+ .dma_unmap = iommufd_cdev_unmap, -+ .attach_device = iommufd_cdev_attach, -+ .detach_device = iommufd_cdev_detach, -+}; -diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build -index eb6ce6229d..e5d98b6adc 100644 ---- a/hw/vfio/meson.build -+++ b/hw/vfio/meson.build -@@ -7,6 +7,9 @@ vfio_ss.add(files( - 'spapr.c', - 'migration.c', - )) -+vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files( -+ 'iommufd.c', -+)) - vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( - 'display.c', - 'pci-quirks.c', -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 08a1f9dfa4..3340c93af0 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -164,3 +164,13 @@ vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcop - vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 - vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" - vfio_vmstate_change_prepare(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" -+ -+#iommufd.c -+ -+iommufd_cdev_connect_and_bind(int iommufd, const char *name, int devfd, int devid) " [iommufd=%d] Successfully bound device %s (fd=%d): output devid=%d" -+iommufd_cdev_getfd(const char *dev, int devfd) " %s (fd=%d)" -+iommufd_cdev_attach_ioas_hwpt(int iommufd, const char *name, int devfd, int id) " [iommufd=%d] Successfully attached device %s (%d) to id=%d" -+iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name) " [iommufd=%d] Successfully detached %s" -+iommufd_cdev_fail_attach_existing_container(const char *msg) " %s" -+iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d" -+iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d" -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 24ecc0e7ee..3dac5c167e 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -89,6 +89,14 @@ typedef struct VFIOHostDMAWindow { - QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next; - } VFIOHostDMAWindow; - -+typedef struct IOMMUFDBackend IOMMUFDBackend; -+ -+typedef struct VFIOIOMMUFDContainer { -+ VFIOContainerBase bcontainer; -+ IOMMUFDBackend *be; -+ uint32_t ioas_id; -+} VFIOIOMMUFDContainer; -+ - typedef struct VFIODeviceOps VFIODeviceOps; - - typedef struct VFIODevice { -@@ -116,6 +124,8 @@ typedef struct VFIODevice { - OnOffAuto pre_copy_dirty_page_tracking; - bool dirty_pages_supported; - bool dirty_tracking; -+ int devid; -+ IOMMUFDBackend *iommufd; - } VFIODevice; - - struct VFIODeviceOps { -@@ -201,6 +211,7 @@ typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; - extern VFIOGroupList vfio_group_list; - extern VFIODeviceList vfio_device_list; - extern const VFIOIOMMUOps vfio_legacy_ops; -+extern const VFIOIOMMUOps vfio_iommufd_ops; - extern const MemoryListener vfio_memory_listener; - extern int vfio_kvm_device_fd; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch b/SOURCES/kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch deleted file mode 100644 index 866a437..0000000 --- a/SOURCES/kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch +++ /dev/null @@ -1,155 +0,0 @@ -From f98defd6fe081bc44f5bd823d187d7d3b12832ac Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:23 +0100 -Subject: [PATCH 056/101] vfio/iommufd: Introduce a VFIOIOMMU iommufd QOM - interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [55/67] 789ecf74ace326b0df5d494fd558d7d0b6294a85 (eauger1/centos-qemu-kvm) - -As previously done for the sPAPR and legacy IOMMU backends, convert -the VFIOIOMMUOps struct to a QOM interface. The set of of operations -for this backend can be referenced with a literal typename instead of -a C struct. - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit ce5f6d49f5845c3b9955cc377a5223c3f8d7ba1e) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 2 +- - hw/vfio/iommufd.c | 35 ++++++++++++++++++++------- - include/hw/vfio/vfio-common.h | 1 - - include/hw/vfio/vfio-container-base.h | 2 +- - 4 files changed, 28 insertions(+), 12 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 2329d0efc8..89ff1c7aed 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1508,7 +1508,7 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, - - #ifdef CONFIG_IOMMUFD - if (vbasedev->iommufd) { -- ops = &vfio_iommufd_ops; -+ ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); - } - #endif - -diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c -index 87a561c545..d4c586e842 100644 ---- a/hw/vfio/iommufd.c -+++ b/hw/vfio/iommufd.c -@@ -319,6 +319,8 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, - int ret, devfd; - uint32_t ioas_id; - Error *err = NULL; -+ const VFIOIOMMUClass *iommufd_vioc = -+ VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); - - if (vbasedev->fd < 0) { - devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); -@@ -340,7 +342,7 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, - /* try to attach to an existing container in this space */ - QLIST_FOREACH(bcontainer, &space->containers, next) { - container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); -- if (bcontainer->ops != &vfio_iommufd_ops || -+ if (bcontainer->ops != iommufd_vioc || - vbasedev->iommufd != container->be) { - continue; - } -@@ -374,7 +376,7 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, - container->ioas_id = ioas_id; - - bcontainer = &container->bcontainer; -- vfio_container_init(bcontainer, space, &vfio_iommufd_ops); -+ vfio_container_init(bcontainer, space, iommufd_vioc); - QLIST_INSERT_HEAD(&space->containers, bcontainer, next); - - ret = iommufd_cdev_attach_container(vbasedev, container, errp); -@@ -476,9 +478,11 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) - static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid) - { - VFIODevice *vbasedev_iter; -+ const VFIOIOMMUClass *iommufd_vioc = -+ VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); - - QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) { -- if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) { -+ if (vbasedev_iter->bcontainer->ops != iommufd_vioc) { - continue; - } - if (devid == vbasedev_iter->devid) { -@@ -621,10 +625,23 @@ out_single: - return ret; - } - --const VFIOIOMMUOps vfio_iommufd_ops = { -- .dma_map = iommufd_cdev_map, -- .dma_unmap = iommufd_cdev_unmap, -- .attach_device = iommufd_cdev_attach, -- .detach_device = iommufd_cdev_detach, -- .pci_hot_reset = iommufd_cdev_pci_hot_reset, -+static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) -+{ -+ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); -+ -+ vioc->dma_map = iommufd_cdev_map; -+ vioc->dma_unmap = iommufd_cdev_unmap; -+ vioc->attach_device = iommufd_cdev_attach; -+ vioc->detach_device = iommufd_cdev_detach; -+ vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset; - }; -+ -+static const TypeInfo types[] = { -+ { -+ .name = TYPE_VFIO_IOMMU_IOMMUFD, -+ .parent = TYPE_VFIO_IOMMU, -+ .class_init = vfio_iommu_iommufd_class_init, -+ }, -+}; -+ -+DEFINE_TYPES(types) -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 14c497b6b0..9b7ef7d02b 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -210,7 +210,6 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; - typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; - extern VFIOGroupList vfio_group_list; - extern VFIODeviceList vfio_device_list; --extern const VFIOIOMMUOps vfio_iommufd_ops; - extern const MemoryListener vfio_memory_listener; - extern int vfio_kvm_device_fd; - -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 9e21d7811f..b2813b0c11 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -17,7 +17,6 @@ - - typedef struct VFIODevice VFIODevice; - typedef struct VFIOIOMMUClass VFIOIOMMUClass; --#define VFIOIOMMUOps VFIOIOMMUClass /* To remove */ - - typedef struct { - unsigned long *bitmap; -@@ -96,6 +95,7 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); - #define TYPE_VFIO_IOMMU "vfio-iommu" - #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" - #define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr" -+#define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd" - - /* - * VFIOContainerBase is not an abstract QOM object because it felt --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch b/SOURCES/kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch deleted file mode 100644 index f77032b..0000000 --- a/SOURCES/kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 5a49c5bb690d55fc88b6fb12f059ae932de0a716 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:04 +0800 -Subject: [PATCH 025/101] vfio/iommufd: Relax assert check for iommufd backend -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [24/67] 2c9e41e9ca0b67ebf807d1643a98866a0cb75768 (eauger1/centos-qemu-kvm) - -Currently iommufd doesn't support dirty page sync yet, -but it will not block us doing live migration if VFIO -migration is force enabled. - -So in this case we allow set_dirty_page_tracking to be NULL. -Note we don't need same change for query_dirty_bitmap because -when dirty page sync isn't supported, query_dirty_bitmap will -never be called. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 36e84d0c17102fa1c887d8c650a13ec08fca0ec0) -Signed-off-by: Eric Auger ---- - hw/vfio/container-base.c | 4 ++++ - hw/vfio/container.c | 4 ---- - 2 files changed, 4 insertions(+), 4 deletions(-) - -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index 71f7274973..eee2dcfe76 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -55,6 +55,10 @@ void vfio_container_del_section_window(VFIOContainerBase *bcontainer, - int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, - bool start) - { -+ if (!bcontainer->dirty_pages_supported) { -+ return 0; -+ } -+ - g_assert(bcontainer->ops->set_dirty_page_tracking); - return bcontainer->ops->set_dirty_page_tracking(bcontainer, start); - } -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 6bacf38222..ed2d721b2b 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -216,10 +216,6 @@ static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, - .argsz = sizeof(dirty), - }; - -- if (!bcontainer->dirty_pages_supported) { -- return 0; -- } -- - if (start) { - dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; - } else { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch b/SOURCES/kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch deleted file mode 100644 index 97d30c9..0000000 --- a/SOURCES/kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 5549bf1b2e07213c23e280a43ab2ab67d5b7304a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:25 +0100 -Subject: [PATCH 058/101] vfio/iommufd: Remove CONFIG_IOMMUFD usage -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [57/67] 3a6a45d379241d9412e0b8bcfeb9be0b4add59a5 (eauger1/centos-qemu-kvm) - -Availability of the IOMMUFD backend can now be fully determined at -runtime and the ifdef check was a build time protection (for PPC not -supporting it mostly). - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit c1139fa4feba8c320e4bd0a4e34af55caa5ffbb9) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 89ff1c7aed..0d4d8b8416 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -19,7 +19,6 @@ - */ - - #include "qemu/osdep.h" --#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ - #include - #ifdef CONFIG_KVM - #include -@@ -1506,11 +1505,9 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, - const VFIOIOMMUClass *ops = - VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY)); - --#ifdef CONFIG_IOMMUFD - if (vbasedev->iommufd) { - ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); - } --#endif - - assert(ops); - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch b/SOURCES/kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch deleted file mode 100644 index 7401d52..0000000 --- a/SOURCES/kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 6b36dc2a305af856af03aad2e315eea96a349153 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Thu, 21 Dec 2023 09:09:57 +0100 -Subject: [PATCH 061/101] vfio/iommufd: Remove the use of stat() to check file - existence -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [60/67] 485770e45c1a6399780939bfb8b01b615d9213c6 (eauger1/centos-qemu-kvm) - -Using stat() before opening a file or a directory can lead to a -time-of-check to time-of-use (TOCTOU) filesystem race, which is -reported by coverity as a Security best practices violations. The -sequence could be replaced by open and fdopendir but it doesn't add -much in this case. Simply use opendir to avoid the race. - -Fixes: CID 1531551 -Signed-off-by: Cédric Le Goater -Reviewed-by: Zhenzhong Duan -(cherry picked from commit 6ba254801f6bc7f3ef68a6414f1b107237c7eb26) -Signed-off-by: Eric Auger ---- - hw/vfio/iommufd.c | 6 ------ - 1 file changed, 6 deletions(-) - -diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c -index d4c586e842..9bfddc1360 100644 ---- a/hw/vfio/iommufd.c -+++ b/hw/vfio/iommufd.c -@@ -121,17 +121,11 @@ static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp) - DIR *dir = NULL; - struct dirent *dent; - gchar *contents; -- struct stat st; - gsize length; - int major, minor; - dev_t vfio_devt; - - path = g_strdup_printf("%s/vfio-dev", sysfs_path); -- if (stat(path, &st) < 0) { -- error_setg_errno(errp, errno, "no such host device"); -- goto out_free_path; -- } -- - dir = opendir(path); - if (!dir) { - error_setg_errno(errp, errno, "couldn't open directory %s", path); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch b/SOURCES/kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch deleted file mode 100644 index 6556a19..0000000 --- a/SOURCES/kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch +++ /dev/null @@ -1,115 +0,0 @@ -From 0c0435e7210b99a6bf7b8f8205f7af8277b7525b Mon Sep 17 00:00:00 2001 -From: Avihai Horon -Date: Sun, 31 Dec 2023 12:48:18 +0200 -Subject: [PATCH 063/101] vfio/migration: Add helper function to set state or - reset device -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [62/67] 1a63eea289561a05a6a8527c2a9da0289a7836d9 (eauger1/centos-qemu-kvm) - -There are several places where failure in setting the device state leads -to a device reset, which is done by setting ERROR as the recover state. - -Add a helper function that sets the device state and resets the device -in case of failure. This will make the code cleaner and remove duplicate -comments. - -Signed-off-by: Avihai Horon -Reviewed-by: Cédric Le Goater -Reviewed-by: Philippe Mathieu-Daudé -(cherry picked from commit c817e5a377a334241eed149e35760aca58bdeb34) -Signed-off-by: Eric Auger ---- - hw/vfio/migration.c | 41 +++++++++++++++++------------------------ - 1 file changed, 17 insertions(+), 24 deletions(-) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 28d422b39f..70e6b1a709 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -163,6 +163,19 @@ reset_device: - return ret; - } - -+/* -+ * Some device state transitions require resetting the device if they fail. -+ * This function sets the device in new_state and resets the device if that -+ * fails. Reset is done by using ERROR as the recover state. -+ */ -+static int -+vfio_migration_set_state_or_reset(VFIODevice *vbasedev, -+ enum vfio_device_mig_state new_state) -+{ -+ return vfio_migration_set_state(vbasedev, new_state, -+ VFIO_DEVICE_STATE_ERROR); -+} -+ - static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, - uint64_t data_size) - { -@@ -422,12 +435,7 @@ static void vfio_save_cleanup(void *opaque) - * after migration has completed, so it won't increase downtime. - */ - if (migration->device_state == VFIO_DEVICE_STATE_STOP_COPY) { -- /* -- * If setting the device in STOP state fails, the device should be -- * reset. To do so, use ERROR state as a recover state. -- */ -- vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP, -- VFIO_DEVICE_STATE_ERROR); -+ vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_STOP); - } - - g_free(migration->data_buffer); -@@ -699,12 +707,7 @@ static void vfio_vmstate_change_prepare(void *opaque, bool running, - VFIO_DEVICE_STATE_PRE_COPY_P2P : - VFIO_DEVICE_STATE_RUNNING_P2P; - -- /* -- * If setting the device in new_state fails, the device should be reset. -- * To do so, use ERROR state as a recover state. -- */ -- ret = vfio_migration_set_state(vbasedev, new_state, -- VFIO_DEVICE_STATE_ERROR); -+ ret = vfio_migration_set_state_or_reset(vbasedev, new_state); - if (ret) { - /* - * Migration should be aborted in this case, but vm_state_notify() -@@ -736,12 +739,7 @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state) - VFIO_DEVICE_STATE_STOP; - } - -- /* -- * If setting the device in new_state fails, the device should be reset. -- * To do so, use ERROR state as a recover state. -- */ -- ret = vfio_migration_set_state(vbasedev, new_state, -- VFIO_DEVICE_STATE_ERROR); -+ ret = vfio_migration_set_state_or_reset(vbasedev, new_state); - if (ret) { - /* - * Migration should be aborted in this case, but vm_state_notify() -@@ -770,12 +768,7 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) - case MIGRATION_STATUS_CANCELLING: - case MIGRATION_STATUS_CANCELLED: - case MIGRATION_STATUS_FAILED: -- /* -- * If setting the device in RUNNING state fails, the device should -- * be reset. To do so, use ERROR state as a recover state. -- */ -- vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RUNNING, -- VFIO_DEVICE_STATE_ERROR); -+ vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_RUNNING); - } - } - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch b/SOURCES/kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch deleted file mode 100644 index f79de18..0000000 --- a/SOURCES/kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 7788fdc2375e01ead0c8a705c3b3d7467dd93d67 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 21 Nov 2023 16:44:09 +0800 -Subject: [PATCH 030/101] vfio/pci: Allow the selection of a given iommu - backend -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [29/67] 363c62607a11093ea0062489e11a708117d8ffb9 (eauger1/centos-qemu-kvm) - -Now we support two types of iommu backends, let's add the capability -to select one of them. This depends on whether an iommufd object has -been linked with the vfio-pci device: - -If the user wants to use the legacy backend, it shall not -link the vfio-pci device with any iommufd object: - - -device vfio-pci,host=0000:02:00.0 - -This is called the legacy mode/backend. - -If the user wants to use the iommufd backend (/dev/iommu) it -shall pass an iommufd object id in the vfio-pci device options: - - -object iommufd,id=iommufd0 - -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0 - -Suggested-by: Alex Williamson -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit ee42b261b0a2e465ae003ddcaf1caf117c201f74) -Signed-off-by: Eric Auger ---- - hw/vfio/pci.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 83b2561908..39e6a6678e 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -19,6 +19,7 @@ - */ - - #include "qemu/osdep.h" -+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ - #include - #include - -@@ -42,6 +43,7 @@ - #include "qapi/error.h" - #include "migration/blocker.h" - #include "migration/qemu-file.h" -+#include "sysemu/iommufd.h" - - #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" - -@@ -3415,6 +3417,10 @@ static Property vfio_pci_dev_properties[] = { - * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), - * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name), - */ -+#ifdef CONFIG_IOMMUFD -+ DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd, -+ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), -+#endif - DEFINE_PROP_END_OF_LIST(), - }; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch b/SOURCES/kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch deleted file mode 100644 index 837e490..0000000 --- a/SOURCES/kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 43236995e8ad336d366b625fb8362046be53fc34 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Mon, 29 Jan 2024 09:46:34 +0100 -Subject: [PATCH] vfio/pci: Clear MSI-X IRQ index always -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 218: vfio/pci: Clear MSI-X IRQ index always -RH-Jira: RHEL-21293 -RH-Acked-by: Eric Auger -RH-Acked-by: Alex Williamson -RH-Commit: [1/1] b4b587b13c11e350d3e5fcc11ba66a006b25a763 (clegoate/qemu-kvm-c9s) - -JIRA: https://issues.redhat.com/browse/RHEL-21293 - -commit d2b668fca5652760b435ce812a743bba03d2f316 -Author: Cédric Le Goater -Date: Thu Jan 25 14:27:36 2024 +0100 - - vfio/pci: Clear MSI-X IRQ index always - - When doing device assignment of a physical device, MSI-X can be - enabled with no vectors enabled and this sets the IRQ index to - VFIO_PCI_MSIX_IRQ_INDEX. However, when MSI-X is disabled, the IRQ - index is left untouched if no vectors are in use. Then, when INTx - is enabled, the IRQ index value is considered incompatible (set to - MSI-X) and VFIO_DEVICE_SET_IRQS fails. QEMU complains with : - - qemu-system-x86_64: vfio 0000:08:00.0: Failed to set up TRIGGER eventfd signaling for interrupt INTX-0: VFIO_DEVICE_SET_IRQS failure: Invalid argument - - To avoid that, unconditionaly clear the IRQ index when MSI-X is - disabled. - - Buglink: https://issues.redhat.com/browse/RHEL-21293 - Fixes: 5ebffa4e87e7 ("vfio/pci: use an invalid fd to enable MSI-X") - Cc: Jing Liu - Cc: Alex Williamson - Reviewed-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index adb7c09367..29bb8067eb 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -829,9 +829,11 @@ static void vfio_msix_disable(VFIOPCIDevice *vdev) - } - } - -- if (vdev->nr_vectors) { -- vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); -- } -+ /* -+ * Always clear MSI-X IRQ index. A PF device could have enabled -+ * MSI-X with no vectors. See vfio_msix_enable(). -+ */ -+ vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); - - vfio_msi_disable_common(vdev); - vfio_intx_enable(vdev, &err); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch b/SOURCES/kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch deleted file mode 100644 index af6593c..0000000 --- a/SOURCES/kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch +++ /dev/null @@ -1,139 +0,0 @@ -From fe5ecedd452754eeb238b23eb0544ed3c5086157 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:06 +0800 -Subject: [PATCH 027/101] vfio/pci: Extract out a helper - vfio_pci_get_pci_hot_reset_info -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [26/67] 730b7f1496f4f21310fa13c79cb87f8d5e2ad2a8 (eauger1/centos-qemu-kvm) - -This helper will be used by both legacy and iommufd backends. - -No functional changes intended. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 4d36ec23a75eb387492f4d68ff1b8eeee5d68142) -Signed-off-by: Eric Auger ---- - hw/vfio/pci.c | 54 +++++++++++++++++++++++++++++++++++---------------- - hw/vfio/pci.h | 3 +++ - 2 files changed, 40 insertions(+), 17 deletions(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index ec98080f28..b482e5479f 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2448,22 +2448,13 @@ static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) - return (strcmp(tmp, name) == 0); - } - --static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) -+int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, -+ struct vfio_pci_hot_reset_info **info_p) - { -- VFIOGroup *group; - struct vfio_pci_hot_reset_info *info; -- struct vfio_pci_dependent_device *devices; -- struct vfio_pci_hot_reset *reset; -- int32_t *fds; -- int ret, i, count; -- bool multi = false; -+ int ret, count; - -- trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); -- -- if (!single) { -- vfio_pci_pre_reset(vdev); -- } -- vdev->vbasedev.needs_reset = false; -+ assert(info_p && !*info_p); - - info = g_malloc0(sizeof(*info)); - info->argsz = sizeof(*info); -@@ -2471,24 +2462,53 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info); - if (ret && errno != ENOSPC) { - ret = -errno; -+ g_free(info); - if (!vdev->has_pm_reset) { - error_report("vfio: Cannot reset device %s, " - "no available reset mechanism.", vdev->vbasedev.name); - } -- goto out_single; -+ return ret; - } - - count = info->count; -- info = g_realloc(info, sizeof(*info) + (count * sizeof(*devices))); -- info->argsz = sizeof(*info) + (count * sizeof(*devices)); -- devices = &info->devices[0]; -+ info = g_realloc(info, sizeof(*info) + (count * sizeof(info->devices[0]))); -+ info->argsz = sizeof(*info) + (count * sizeof(info->devices[0])); - - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info); - if (ret) { - ret = -errno; -+ g_free(info); - error_report("vfio: hot reset info failed: %m"); -+ return ret; -+ } -+ -+ *info_p = info; -+ return 0; -+} -+ -+static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) -+{ -+ VFIOGroup *group; -+ struct vfio_pci_hot_reset_info *info = NULL; -+ struct vfio_pci_dependent_device *devices; -+ struct vfio_pci_hot_reset *reset; -+ int32_t *fds; -+ int ret, i, count; -+ bool multi = false; -+ -+ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); -+ -+ if (!single) { -+ vfio_pci_pre_reset(vdev); -+ } -+ vdev->vbasedev.needs_reset = false; -+ -+ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); -+ -+ if (ret) { - goto out_single; - } -+ devices = &info->devices[0]; - - trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); - -diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index eb74d9de2d..3568a6135d 100644 ---- a/hw/vfio/pci.h -+++ b/hw/vfio/pci.h -@@ -219,6 +219,9 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr); - - extern const PropertyInfo qdev_prop_nv_gpudirect_clique; - -+int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, -+ struct vfio_pci_hot_reset_info **info_p); -+ - int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp); - - int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch b/SOURCES/kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch deleted file mode 100644 index 2a2db5f..0000000 --- a/SOURCES/kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch +++ /dev/null @@ -1,466 +0,0 @@ -From acc3e5306e184567006bc45e7f36f2473e75d08a Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:07 +0800 -Subject: [PATCH 028/101] vfio/pci: Introduce a vfio pci hot reset interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [27/67] 192088dbf2cf88663acd2416f69b7eeb175b2525 (eauger1/centos-qemu-kvm) - -Legacy vfio pci and iommufd cdev have different process to hot reset -vfio device, expand current code to abstract out pci_hot_reset callback -for legacy vfio, this same interface will also be used by iommufd -cdev vfio device. - -Rename vfio_pci_hot_reset to vfio_legacy_pci_hot_reset and move it -into container.c. - -vfio_pci_[pre/post]_reset and vfio_pci_host_match are exported so -they could be called in legacy and iommufd pci_hot_reset callback. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit c328e7e8ad1c969dbcbe90ee76afcd3cfec5e945) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 170 ++++++++++++++++++++++++++ - hw/vfio/pci.c | 168 +------------------------ - hw/vfio/pci.h | 3 + - include/hw/vfio/vfio-container-base.h | 3 + - 4 files changed, 182 insertions(+), 162 deletions(-) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index ed2d721b2b..1dbf9b9a17 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -33,6 +33,7 @@ - #include "trace.h" - #include "qapi/error.h" - #include "migration/migration.h" -+#include "pci.h" - - VFIOGroupList vfio_group_list = - QLIST_HEAD_INITIALIZER(vfio_group_list); -@@ -922,6 +923,174 @@ static void vfio_legacy_detach_device(VFIODevice *vbasedev) - vfio_put_group(group); - } - -+static int vfio_legacy_pci_hot_reset(VFIODevice *vbasedev, bool single) -+{ -+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); -+ VFIOGroup *group; -+ struct vfio_pci_hot_reset_info *info = NULL; -+ struct vfio_pci_dependent_device *devices; -+ struct vfio_pci_hot_reset *reset; -+ int32_t *fds; -+ int ret, i, count; -+ bool multi = false; -+ -+ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); -+ -+ if (!single) { -+ vfio_pci_pre_reset(vdev); -+ } -+ vdev->vbasedev.needs_reset = false; -+ -+ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); -+ -+ if (ret) { -+ goto out_single; -+ } -+ devices = &info->devices[0]; -+ -+ trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); -+ -+ /* Verify that we have all the groups required */ -+ for (i = 0; i < info->count; i++) { -+ PCIHostDeviceAddress host; -+ VFIOPCIDevice *tmp; -+ VFIODevice *vbasedev_iter; -+ -+ host.domain = devices[i].segment; -+ host.bus = devices[i].bus; -+ host.slot = PCI_SLOT(devices[i].devfn); -+ host.function = PCI_FUNC(devices[i].devfn); -+ -+ trace_vfio_pci_hot_reset_dep_devices(host.domain, -+ host.bus, host.slot, host.function, devices[i].group_id); -+ -+ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { -+ continue; -+ } -+ -+ QLIST_FOREACH(group, &vfio_group_list, next) { -+ if (group->groupid == devices[i].group_id) { -+ break; -+ } -+ } -+ -+ if (!group) { -+ if (!vdev->has_pm_reset) { -+ error_report("vfio: Cannot reset device %s, " -+ "depends on group %d which is not owned.", -+ vdev->vbasedev.name, devices[i].group_id); -+ } -+ ret = -EPERM; -+ goto out; -+ } -+ -+ /* Prep dependent devices for reset and clear our marker. */ -+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { -+ if (!vbasedev_iter->dev->realized || -+ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { -+ continue; -+ } -+ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); -+ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { -+ if (single) { -+ ret = -EINVAL; -+ goto out_single; -+ } -+ vfio_pci_pre_reset(tmp); -+ tmp->vbasedev.needs_reset = false; -+ multi = true; -+ break; -+ } -+ } -+ } -+ -+ if (!single && !multi) { -+ ret = -EINVAL; -+ goto out_single; -+ } -+ -+ /* Determine how many group fds need to be passed */ -+ count = 0; -+ QLIST_FOREACH(group, &vfio_group_list, next) { -+ for (i = 0; i < info->count; i++) { -+ if (group->groupid == devices[i].group_id) { -+ count++; -+ break; -+ } -+ } -+ } -+ -+ reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds))); -+ reset->argsz = sizeof(*reset) + (count * sizeof(*fds)); -+ fds = &reset->group_fds[0]; -+ -+ /* Fill in group fds */ -+ QLIST_FOREACH(group, &vfio_group_list, next) { -+ for (i = 0; i < info->count; i++) { -+ if (group->groupid == devices[i].group_id) { -+ fds[reset->count++] = group->fd; -+ break; -+ } -+ } -+ } -+ -+ /* Bus reset! */ -+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); -+ g_free(reset); -+ if (ret) { -+ ret = -errno; -+ } -+ -+ trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, -+ ret ? strerror(errno) : "Success"); -+ -+out: -+ /* Re-enable INTx on affected devices */ -+ for (i = 0; i < info->count; i++) { -+ PCIHostDeviceAddress host; -+ VFIOPCIDevice *tmp; -+ VFIODevice *vbasedev_iter; -+ -+ host.domain = devices[i].segment; -+ host.bus = devices[i].bus; -+ host.slot = PCI_SLOT(devices[i].devfn); -+ host.function = PCI_FUNC(devices[i].devfn); -+ -+ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { -+ continue; -+ } -+ -+ QLIST_FOREACH(group, &vfio_group_list, next) { -+ if (group->groupid == devices[i].group_id) { -+ break; -+ } -+ } -+ -+ if (!group) { -+ break; -+ } -+ -+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { -+ if (!vbasedev_iter->dev->realized || -+ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { -+ continue; -+ } -+ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); -+ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { -+ vfio_pci_post_reset(tmp); -+ break; -+ } -+ } -+ } -+out_single: -+ if (!single) { -+ vfio_pci_post_reset(vdev); -+ } -+ g_free(info); -+ -+ return ret; -+} -+ - const VFIOIOMMUOps vfio_legacy_ops = { - .dma_map = vfio_legacy_dma_map, - .dma_unmap = vfio_legacy_dma_unmap, -@@ -929,4 +1098,5 @@ const VFIOIOMMUOps vfio_legacy_ops = { - .detach_device = vfio_legacy_detach_device, - .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, - .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, -+ .pci_hot_reset = vfio_legacy_pci_hot_reset, - }; -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index b482e5479f..83b2561908 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2377,7 +2377,7 @@ static int vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp) - return 0; - } - --static void vfio_pci_pre_reset(VFIOPCIDevice *vdev) -+void vfio_pci_pre_reset(VFIOPCIDevice *vdev) - { - PCIDevice *pdev = &vdev->pdev; - uint16_t cmd; -@@ -2414,7 +2414,7 @@ static void vfio_pci_pre_reset(VFIOPCIDevice *vdev) - vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2); - } - --static void vfio_pci_post_reset(VFIOPCIDevice *vdev) -+void vfio_pci_post_reset(VFIOPCIDevice *vdev) - { - Error *err = NULL; - int nr; -@@ -2438,7 +2438,7 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev) - vfio_quirk_reset(vdev); - } - --static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) -+bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) - { - char tmp[13]; - -@@ -2488,166 +2488,10 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, - - static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) - { -- VFIOGroup *group; -- struct vfio_pci_hot_reset_info *info = NULL; -- struct vfio_pci_dependent_device *devices; -- struct vfio_pci_hot_reset *reset; -- int32_t *fds; -- int ret, i, count; -- bool multi = false; -- -- trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); -- -- if (!single) { -- vfio_pci_pre_reset(vdev); -- } -- vdev->vbasedev.needs_reset = false; -- -- ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); -- -- if (ret) { -- goto out_single; -- } -- devices = &info->devices[0]; -- -- trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); -- -- /* Verify that we have all the groups required */ -- for (i = 0; i < info->count; i++) { -- PCIHostDeviceAddress host; -- VFIOPCIDevice *tmp; -- VFIODevice *vbasedev_iter; -- -- host.domain = devices[i].segment; -- host.bus = devices[i].bus; -- host.slot = PCI_SLOT(devices[i].devfn); -- host.function = PCI_FUNC(devices[i].devfn); -- -- trace_vfio_pci_hot_reset_dep_devices(host.domain, -- host.bus, host.slot, host.function, devices[i].group_id); -- -- if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { -- continue; -- } -- -- QLIST_FOREACH(group, &vfio_group_list, next) { -- if (group->groupid == devices[i].group_id) { -- break; -- } -- } -- -- if (!group) { -- if (!vdev->has_pm_reset) { -- error_report("vfio: Cannot reset device %s, " -- "depends on group %d which is not owned.", -- vdev->vbasedev.name, devices[i].group_id); -- } -- ret = -EPERM; -- goto out; -- } -- -- /* Prep dependent devices for reset and clear our marker. */ -- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { -- if (!vbasedev_iter->dev->realized || -- vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { -- continue; -- } -- tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); -- if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { -- if (single) { -- ret = -EINVAL; -- goto out_single; -- } -- vfio_pci_pre_reset(tmp); -- tmp->vbasedev.needs_reset = false; -- multi = true; -- break; -- } -- } -- } -- -- if (!single && !multi) { -- ret = -EINVAL; -- goto out_single; -- } -- -- /* Determine how many group fds need to be passed */ -- count = 0; -- QLIST_FOREACH(group, &vfio_group_list, next) { -- for (i = 0; i < info->count; i++) { -- if (group->groupid == devices[i].group_id) { -- count++; -- break; -- } -- } -- } -- -- reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds))); -- reset->argsz = sizeof(*reset) + (count * sizeof(*fds)); -- fds = &reset->group_fds[0]; -- -- /* Fill in group fds */ -- QLIST_FOREACH(group, &vfio_group_list, next) { -- for (i = 0; i < info->count; i++) { -- if (group->groupid == devices[i].group_id) { -- fds[reset->count++] = group->fd; -- break; -- } -- } -- } -- -- /* Bus reset! */ -- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); -- g_free(reset); -- -- trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, -- ret ? strerror(errno) : "Success"); -- --out: -- /* Re-enable INTx on affected devices */ -- for (i = 0; i < info->count; i++) { -- PCIHostDeviceAddress host; -- VFIOPCIDevice *tmp; -- VFIODevice *vbasedev_iter; -- -- host.domain = devices[i].segment; -- host.bus = devices[i].bus; -- host.slot = PCI_SLOT(devices[i].devfn); -- host.function = PCI_FUNC(devices[i].devfn); -- -- if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { -- continue; -- } -- -- QLIST_FOREACH(group, &vfio_group_list, next) { -- if (group->groupid == devices[i].group_id) { -- break; -- } -- } -- -- if (!group) { -- break; -- } -- -- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { -- if (!vbasedev_iter->dev->realized || -- vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { -- continue; -- } -- tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); -- if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { -- vfio_pci_post_reset(tmp); -- break; -- } -- } -- } --out_single: -- if (!single) { -- vfio_pci_post_reset(vdev); -- } -- g_free(info); -+ VFIODevice *vbasedev = &vdev->vbasedev; -+ const VFIOIOMMUOps *ops = vbasedev->bcontainer->ops; - -- return ret; -+ return ops->pci_hot_reset(vbasedev, single); - } - - /* -diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 3568a6135d..b7de39c010 100644 ---- a/hw/vfio/pci.h -+++ b/hw/vfio/pci.h -@@ -219,6 +219,9 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr); - - extern const PropertyInfo qdev_prop_nv_gpudirect_clique; - -+void vfio_pci_pre_reset(VFIOPCIDevice *vdev); -+void vfio_pci_post_reset(VFIOPCIDevice *vdev); -+bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name); - int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, - struct vfio_pci_hot_reset_info **info_p); - -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 4b6f017c6f..45bb19c767 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -106,6 +106,9 @@ struct VFIOIOMMUOps { - int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); - int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, - hwaddr iova, hwaddr size); -+ /* PCI specific */ -+ int (*pci_hot_reset)(VFIODevice *vbasedev, bool single); -+ - /* SPAPR specific */ - int (*add_window)(VFIOContainerBase *bcontainer, - MemoryRegionSection *section, --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch b/SOURCES/kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch deleted file mode 100644 index 4a973b5..0000000 --- a/SOURCES/kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch +++ /dev/null @@ -1,237 +0,0 @@ -From 965a44793806fef2094906947bd3b428638bf89a Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:10 +0800 -Subject: [PATCH 031/101] vfio/pci: Make vfio cdev pre-openable by passing a - file handle -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [30/67] a14b824b700e8fb36633cd159bcc422d992a316f (eauger1/centos-qemu-kvm) - -Conflicts: contextual conflict in hw/vfio/pci.c due to -RHEL-only f73562144e492 vfio: cap number of devices that can be assigned - -This gives management tools like libvirt a chance to open the vfio -cdev with privilege and pass FD to qemu. This way qemu never needs -to have privilege to open a VFIO or iommu cdev node. - -Together with the earlier support of pre-opening /dev/iommu device, -now we have full support of passing a vfio device to unprivileged -qemu by management tool. This mode is no more considered for the -legacy backend. So let's remove the "TODO" comment. - -Add helper functions vfio_device_set_fd() and vfio_device_get_name() -to set fd and get device name, they will also be used by other vfio -devices. - -There is no easy way to check if a device is mdev with FD passing, -so fail the x-balloon-allowed check unconditionally in this case. - -There is also no easy way to get BDF as name with FD passing, so -we fake a name by VFIO_FD[fd]. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit da3e04b26fd8d15b344944504d5ffa9c5f20b54b) -Signed-off-by: Eric Auger ---- - hw/vfio/helpers.c | 43 +++++++++++++++++++++++++++++++++++ - hw/vfio/iommufd.c | 12 ++++++---- - hw/vfio/pci.c | 28 +++++++++++++---------- - include/hw/vfio/vfio-common.h | 4 ++++ - 4 files changed, 71 insertions(+), 16 deletions(-) - -diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c -index 168847e7c5..3592c3d54e 100644 ---- a/hw/vfio/helpers.c -+++ b/hw/vfio/helpers.c -@@ -27,6 +27,7 @@ - #include "trace.h" - #include "qapi/error.h" - #include "qemu/error-report.h" -+#include "monitor/monitor.h" - - /* - * Common VFIO interrupt disable -@@ -609,3 +610,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) - - return ret; - } -+ -+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp) -+{ -+ struct stat st; -+ -+ if (vbasedev->fd < 0) { -+ if (stat(vbasedev->sysfsdev, &st) < 0) { -+ error_setg_errno(errp, errno, "no such host device"); -+ error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); -+ return -errno; -+ } -+ /* User may specify a name, e.g: VFIO platform device */ -+ if (!vbasedev->name) { -+ vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); -+ } -+ } else { -+ if (!vbasedev->iommufd) { -+ error_setg(errp, "Use FD passing only with iommufd backend"); -+ return -EINVAL; -+ } -+ /* -+ * Give a name with fd so any function printing out vbasedev->name -+ * will not break. -+ */ -+ if (!vbasedev->name) { -+ vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); -+ } -+ } -+ -+ return 0; -+} -+ -+void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) -+{ -+ int fd = monitor_fd_param(monitor_cur(), str, errp); -+ -+ if (fd < 0) { -+ error_prepend(errp, "Could not parse remote object fd %s:", str); -+ return; -+ } -+ vbasedev->fd = fd; -+} -diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c -index 6e53e013ef..5accd26484 100644 ---- a/hw/vfio/iommufd.c -+++ b/hw/vfio/iommufd.c -@@ -320,11 +320,15 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, - uint32_t ioas_id; - Error *err = NULL; - -- devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); -- if (devfd < 0) { -- return devfd; -+ if (vbasedev->fd < 0) { -+ devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); -+ if (devfd < 0) { -+ return devfd; -+ } -+ vbasedev->fd = devfd; -+ } else { -+ devfd = vbasedev->fd; - } -- vbasedev->fd = devfd; - - ret = iommufd_cdev_connect_and_bind(vbasedev, errp); - if (ret) { -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 39e6a6678e..3412a63bb1 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2949,7 +2949,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - VFIOGroup *group; - char *tmp, *subsys; - Error *err = NULL; -- struct stat st; - int ret, i = 0; - bool is_mdev; - char uuid[UUID_STR_LEN]; -@@ -2976,11 +2975,14 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - return; - } - -- if (!vbasedev->sysfsdev) { -+ if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { - if (!(~vdev->host.domain || ~vdev->host.bus || - ~vdev->host.slot || ~vdev->host.function)) { - error_setg(errp, "No provided host device"); - error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F " -+#ifdef CONFIG_IOMMUFD -+ "or -device vfio-pci,fd=DEVICE_FD " -+#endif - "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n"); - return; - } -@@ -2990,13 +2992,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - vdev->host.slot, vdev->host.function); - } - -- if (stat(vbasedev->sysfsdev, &st) < 0) { -- error_setg_errno(errp, errno, "no such host device"); -- error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); -+ if (vfio_device_get_name(vbasedev, errp) < 0) { - return; - } -- -- vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); - vbasedev->ops = &vfio_pci_ops; - vbasedev->type = VFIO_DEVICE_TYPE_PCI; - vbasedev->dev = DEVICE(vdev); -@@ -3356,6 +3354,7 @@ static void vfio_instance_init(Object *obj) - vdev->host.bus = ~0U; - vdev->host.slot = ~0U; - vdev->host.function = ~0U; -+ vdev->vbasedev.fd = -1; - - vdev->nv_gpudirect_clique = 0xFF; - -@@ -3412,11 +3411,6 @@ static Property vfio_pci_dev_properties[] = { - qdev_prop_nv_gpudirect_clique, uint8_t), - DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo, - OFF_AUTOPCIBAR_OFF), -- /* -- * TODO - support passed fds... is this necessary? -- * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), -- * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name), -- */ - #ifdef CONFIG_IOMMUFD - DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd, - TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), -@@ -3424,6 +3418,13 @@ static Property vfio_pci_dev_properties[] = { - DEFINE_PROP_END_OF_LIST(), - }; - -+#ifdef CONFIG_IOMMUFD -+static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp) -+{ -+ vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp); -+} -+#endif -+ - static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) - { - DeviceClass *dc = DEVICE_CLASS(klass); -@@ -3431,6 +3432,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) - - dc->reset = vfio_pci_reset; - device_class_set_props(dc, vfio_pci_dev_properties); -+#ifdef CONFIG_IOMMUFD -+ object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd); -+#endif - dc->desc = "VFIO-based PCI device assignment"; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); - pdc->realize = vfio_realize; -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 3dac5c167e..697bf24a35 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -251,4 +251,8 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, - hwaddr size); - int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, - uint64_t size, ram_addr_t ram_addr); -+ -+/* Returns 0 on success, or a negative errno. */ -+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); -+void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); - #endif /* HW_VFIO_VFIO_COMMON_H */ --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch b/SOURCES/kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch deleted file mode 100644 index d426ede..0000000 --- a/SOURCES/kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 942bd7251d166f558e0e6acf7ba853e940e2fb52 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:21 +0800 -Subject: [PATCH 042/101] vfio/pci: Move VFIODevice initializations in - vfio_instance_init -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [41/67] 67392d7a92a6ec2155697a355c88d295338a0785 (eauger1/centos-qemu-kvm) - -Some of the VFIODevice initializations is in vfio_realize, -move all of them in vfio_instance_init. - -No functional change intended. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit dd2fcb1716be9b89c726b3446f38446bb99d6b3a) -Signed-off-by: Eric Auger ---- - hw/vfio/pci.c | 10 ++++++---- - 1 file changed, 6 insertions(+), 4 deletions(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 3412a63bb1..3f5900cc46 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2995,9 +2995,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - if (vfio_device_get_name(vbasedev, errp) < 0) { - return; - } -- vbasedev->ops = &vfio_pci_ops; -- vbasedev->type = VFIO_DEVICE_TYPE_PCI; -- vbasedev->dev = DEVICE(vdev); - - /* - * Mediated devices *might* operate compatibly with discarding of RAM, but -@@ -3346,6 +3343,7 @@ static void vfio_instance_init(Object *obj) - { - PCIDevice *pci_dev = PCI_DEVICE(obj); - VFIOPCIDevice *vdev = VFIO_PCI(obj); -+ VFIODevice *vbasedev = &vdev->vbasedev; - - device_add_bootindex_property(obj, &vdev->bootindex, - "bootindex", NULL, -@@ -3354,7 +3352,11 @@ static void vfio_instance_init(Object *obj) - vdev->host.bus = ~0U; - vdev->host.slot = ~0U; - vdev->host.function = ~0U; -- vdev->vbasedev.fd = -1; -+ -+ vbasedev->type = VFIO_DEVICE_TYPE_PCI; -+ vbasedev->ops = &vfio_pci_ops; -+ vbasedev->dev = DEVICE(vdev); -+ vbasedev->fd = -1; - - vdev->nv_gpudirect_clique = 0xFF; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch b/SOURCES/kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch deleted file mode 100644 index 06c2f0f..0000000 --- a/SOURCES/kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch +++ /dev/null @@ -1,77 +0,0 @@ -From ede579d6d5fe5be9235d6a218efdb237192aee0e Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:11 +0800 -Subject: [PATCH 032/101] vfio/platform: Allow the selection of a given iommu - backend -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [31/67] aba1dc16cada602edd7be1a28b0f57991131e6f7 (eauger1/centos-qemu-kvm) - -Now we support two types of iommu backends, let's add the capability -to select one of them. This depends on whether an iommufd object has -been linked with the vfio-platform device: - -If the user wants to use the legacy backend, it shall not -link the vfio-platform device with any iommufd object: - - -device vfio-platform,host=XXX - -This is called the legacy mode/backend. - -If the user wants to use the iommufd backend (/dev/iommu) it -shall pass an iommufd object id in the vfio-platform device options: - - -object iommufd,id=iommufd0 - -device vfio-platform,host=XXX,iommufd=iommufd0 - -Suggested-by: Alex Williamson -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit a6c50e1c3f8d0eb77edaea392e61508bb3c516f8) -Signed-off-by: Eric Auger ---- - hw/vfio/platform.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c -index 8e3d4ac458..98ae4bc655 100644 ---- a/hw/vfio/platform.c -+++ b/hw/vfio/platform.c -@@ -15,11 +15,13 @@ - */ - - #include "qemu/osdep.h" -+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ - #include "qapi/error.h" - #include - #include - - #include "hw/vfio/vfio-platform.h" -+#include "sysemu/iommufd.h" - #include "migration/vmstate.h" - #include "qemu/error-report.h" - #include "qemu/lockable.h" -@@ -649,6 +651,10 @@ static Property vfio_platform_dev_properties[] = { - DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice, - mmap_timeout, 1100), - DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true), -+#ifdef CONFIG_IOMMUFD -+ DEFINE_PROP_LINK("iommufd", VFIOPlatformDevice, vbasedev.iommufd, -+ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), -+#endif - DEFINE_PROP_END_OF_LIST(), - }; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch b/SOURCES/kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch deleted file mode 100644 index f931524..0000000 --- a/SOURCES/kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch +++ /dev/null @@ -1,108 +0,0 @@ -From 22664f4115d9b297ef4276e48f8ba0bc195ec99e Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:12 +0800 -Subject: [PATCH 033/101] vfio/platform: Make vfio cdev pre-openable by passing - a file handle -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [32/67] 069867dce64b826e92dc2051405a4ded5261981f (eauger1/centos-qemu-kvm) - -This gives management tools like libvirt a chance to open the vfio -cdev with privilege and pass FD to qemu. This way qemu never needs -to have privilege to open a VFIO or iommu cdev node. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 3016e60f8f715d2058a48e4956be994482c5e218) -Signed-off-by: Eric Auger ---- - hw/vfio/platform.c | 32 ++++++++++++++++++++++++-------- - 1 file changed, 24 insertions(+), 8 deletions(-) - -diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c -index 98ae4bc655..a97d9c6234 100644 ---- a/hw/vfio/platform.c -+++ b/hw/vfio/platform.c -@@ -531,14 +531,13 @@ static VFIODeviceOps vfio_platform_ops = { - */ - static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp) - { -- struct stat st; - int ret; - -- /* @sysfsdev takes precedence over @host */ -- if (vbasedev->sysfsdev) { -+ /* @fd takes precedence over @sysfsdev which takes precedence over @host */ -+ if (vbasedev->fd < 0 && vbasedev->sysfsdev) { - g_free(vbasedev->name); - vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); -- } else { -+ } else if (vbasedev->fd < 0) { - if (!vbasedev->name || strchr(vbasedev->name, '/')) { - error_setg(errp, "wrong host device name"); - return -EINVAL; -@@ -548,10 +547,9 @@ static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp) - vbasedev->name); - } - -- if (stat(vbasedev->sysfsdev, &st) < 0) { -- error_setg_errno(errp, errno, -- "failed to get the sysfs host device file status"); -- return -errno; -+ ret = vfio_device_get_name(vbasedev, errp); -+ if (ret) { -+ return ret; - } - - ret = vfio_attach_device(vbasedev->name, vbasedev, -@@ -658,6 +656,20 @@ static Property vfio_platform_dev_properties[] = { - DEFINE_PROP_END_OF_LIST(), - }; - -+static void vfio_platform_instance_init(Object *obj) -+{ -+ VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj); -+ -+ vdev->vbasedev.fd = -1; -+} -+ -+#ifdef CONFIG_IOMMUFD -+static void vfio_platform_set_fd(Object *obj, const char *str, Error **errp) -+{ -+ vfio_device_set_fd(&VFIO_PLATFORM_DEVICE(obj)->vbasedev, str, errp); -+} -+#endif -+ - static void vfio_platform_class_init(ObjectClass *klass, void *data) - { - DeviceClass *dc = DEVICE_CLASS(klass); -@@ -665,6 +677,9 @@ static void vfio_platform_class_init(ObjectClass *klass, void *data) - - dc->realize = vfio_platform_realize; - device_class_set_props(dc, vfio_platform_dev_properties); -+#ifdef CONFIG_IOMMUFD -+ object_class_property_add_str(klass, "fd", NULL, vfio_platform_set_fd); -+#endif - dc->vmsd = &vfio_platform_vmstate; - dc->desc = "VFIO-based platform device assignment"; - sbc->connect_irq_notifier = vfio_start_irqfd_injection; -@@ -677,6 +692,7 @@ static const TypeInfo vfio_platform_dev_info = { - .name = TYPE_VFIO_PLATFORM, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(VFIOPlatformDevice), -+ .instance_init = vfio_platform_instance_init, - .class_init = vfio_platform_class_init, - .class_size = sizeof(VFIOPlatformDeviceClass), - }; --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch b/SOURCES/kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch deleted file mode 100644 index 56283a6..0000000 --- a/SOURCES/kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 2417020283532030f424fe07dfeb7477e6489640 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:22 +0800 -Subject: [PATCH 043/101] vfio/platform: Move VFIODevice initializations in - vfio_platform_instance_init -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [42/67] 53a459b6246d7d7bdc7a62ac92f02f1e775a54a6 (eauger1/centos-qemu-kvm) - -Some of the VFIODevice initializations is in vfio_platform_realize, -move all of them in vfio_platform_instance_init. - -No functional change intended. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit a0cf44c8d618578843a65ea7f6d3db8ce52185bc) -Signed-off-by: Eric Auger ---- - hw/vfio/platform.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c -index a97d9c6234..506eb8193f 100644 ---- a/hw/vfio/platform.c -+++ b/hw/vfio/platform.c -@@ -581,10 +581,6 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp) - VFIODevice *vbasedev = &vdev->vbasedev; - int i, ret; - -- vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; -- vbasedev->dev = dev; -- vbasedev->ops = &vfio_platform_ops; -- - qemu_mutex_init(&vdev->intp_mutex); - - trace_vfio_platform_realize(vbasedev->sysfsdev ? -@@ -659,8 +655,12 @@ static Property vfio_platform_dev_properties[] = { - static void vfio_platform_instance_init(Object *obj) - { - VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj); -+ VFIODevice *vbasedev = &vdev->vbasedev; - -- vdev->vbasedev.fd = -1; -+ vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; -+ vbasedev->ops = &vfio_platform_ops; -+ vbasedev->dev = DEVICE(vdev); -+ vbasedev->fd = -1; - } - - #ifdef CONFIG_IOMMUFD --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch b/SOURCES/kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch deleted file mode 100644 index fb7e707..0000000 --- a/SOURCES/kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch +++ /dev/null @@ -1,129 +0,0 @@ -From e75ec2aca351daabe597ca6322c1589885f30d7a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:16 +0100 -Subject: [PATCH 049/101] vfio/spapr: Extend VFIOIOMMUOps with a release - handler -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [48/67] 1c4d22a6f69324805d050767fcf178d8566f2030 (eauger1/centos-qemu-kvm) - -This allows to abstract a bit more the sPAPR IOMMU support in the -legacy IOMMU backend. - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit 001a013ea3f125d2ec0e709b5765754149d8d968) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 10 +++----- - hw/vfio/spapr.c | 35 +++++++++++++++------------ - include/hw/vfio/vfio-container-base.h | 1 + - 3 files changed, 24 insertions(+), 22 deletions(-) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index b22feb8ded..1e77a2929e 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -632,9 +632,8 @@ listener_release_exit: - QLIST_REMOVE(bcontainer, next); - vfio_kvm_device_del_group(group); - memory_listener_unregister(&bcontainer->listener); -- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || -- container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { -- vfio_spapr_container_deinit(container); -+ if (bcontainer->ops->release) { -+ bcontainer->ops->release(bcontainer); - } - - enable_discards_exit: -@@ -667,9 +666,8 @@ static void vfio_disconnect_container(VFIOGroup *group) - */ - if (QLIST_EMPTY(&container->group_list)) { - memory_listener_unregister(&bcontainer->listener); -- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || -- container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { -- vfio_spapr_container_deinit(container); -+ if (bcontainer->ops->release) { -+ bcontainer->ops->release(bcontainer); - } - } - -diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c -index 5c6426e697..44617dfc6b 100644 ---- a/hw/vfio/spapr.c -+++ b/hw/vfio/spapr.c -@@ -440,6 +440,24 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, - } - } - -+static void vfio_spapr_container_release(VFIOContainerBase *bcontainer) -+{ -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); -+ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, -+ container); -+ VFIOHostDMAWindow *hostwin, *next; -+ -+ if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { -+ memory_listener_unregister(&scontainer->prereg_listener); -+ } -+ QLIST_FOREACH_SAFE(hostwin, &scontainer->hostwin_list, hostwin_next, -+ next) { -+ QLIST_REMOVE(hostwin, hostwin_next); -+ g_free(hostwin); -+ } -+} -+ - static VFIOIOMMUOps vfio_iommu_spapr_ops; - - static void setup_spapr_ops(VFIOContainerBase *bcontainer) -@@ -447,6 +465,7 @@ static void setup_spapr_ops(VFIOContainerBase *bcontainer) - vfio_iommu_spapr_ops = *bcontainer->ops; - vfio_iommu_spapr_ops.add_window = vfio_spapr_container_add_section_window; - vfio_iommu_spapr_ops.del_window = vfio_spapr_container_del_section_window; -+ vfio_iommu_spapr_ops.release = vfio_spapr_container_release; - bcontainer->ops = &vfio_iommu_spapr_ops; - } - -@@ -527,19 +546,3 @@ listener_unregister_exit: - } - return ret; - } -- --void vfio_spapr_container_deinit(VFIOContainer *container) --{ -- VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, -- container); -- VFIOHostDMAWindow *hostwin, *next; -- -- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { -- memory_listener_unregister(&scontainer->prereg_listener); -- } -- QLIST_FOREACH_SAFE(hostwin, &scontainer->hostwin_list, hostwin_next, -- next) { -- QLIST_REMOVE(hostwin, hostwin_next); -- g_free(hostwin); -- } --} -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 2ae297ccda..5c9594b6c7 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -117,5 +117,6 @@ struct VFIOIOMMUOps { - Error **errp); - void (*del_window)(VFIOContainerBase *bcontainer, - MemoryRegionSection *section); -+ void (*release)(VFIOContainerBase *bcontainer); - }; - #endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch b/SOURCES/kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch deleted file mode 100644 index f835acb..0000000 --- a/SOURCES/kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch +++ /dev/null @@ -1,150 +0,0 @@ -From 645ed97633935712edcc2c56f252738b38f15e3a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:22 +0100 -Subject: [PATCH 055/101] vfio/spapr: Introduce a sPAPR VFIOIOMMU QOM interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [54/67] 2ceac3c07d71790dc3852fbbbd4084a7affb9373 (eauger1/centos-qemu-kvm) - -Move vfio_spapr_container_setup() to a VFIOIOMMUClass::setup handler -and convert the sPAPR VFIOIOMMUOps struct to a QOM interface. The -sPAPR QOM interface inherits from the legacy QOM interface because -because both have the same basic needs. The sPAPR interface is then -extended with the handlers specific to the sPAPR IOMMU. - -This allows reuse and provides better abstraction of the backends. It -will be useful to avoid compiling the sPAPR IOMMU backend on targets -not supporting it. - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit f221f641a2fe69c2ca3857759551470664b0bec8) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 18 +++++-------- - hw/vfio/spapr.c | 39 ++++++++++++++++----------- - include/hw/vfio/vfio-container-base.h | 1 + - 3 files changed, 31 insertions(+), 27 deletions(-) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index c22bdd3216..688cf23bab 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -381,6 +381,10 @@ static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp) - case VFIO_TYPE1_IOMMU: - klass = object_class_by_name(TYPE_VFIO_IOMMU_LEGACY); - break; -+ case VFIO_SPAPR_TCE_v2_IOMMU: -+ case VFIO_SPAPR_TCE_IOMMU: -+ klass = object_class_by_name(TYPE_VFIO_IOMMU_SPAPR); -+ break; - default: - g_assert_not_reached(); - }; -@@ -623,19 +627,9 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - goto free_container_exit; - } - -- switch (container->iommu_type) { -- case VFIO_TYPE1v2_IOMMU: -- case VFIO_TYPE1_IOMMU: -- ret = vfio_legacy_setup(bcontainer, errp); -- break; -- case VFIO_SPAPR_TCE_v2_IOMMU: -- case VFIO_SPAPR_TCE_IOMMU: -- ret = vfio_spapr_container_init(container, errp); -- break; -- default: -- g_assert_not_reached(); -- } -+ assert(bcontainer->ops->setup); - -+ ret = bcontainer->ops->setup(bcontainer, errp); - if (ret) { - goto enable_discards_exit; - } -diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c -index 44617dfc6b..0d949bb728 100644 ---- a/hw/vfio/spapr.c -+++ b/hw/vfio/spapr.c -@@ -458,20 +458,11 @@ static void vfio_spapr_container_release(VFIOContainerBase *bcontainer) - } - } - --static VFIOIOMMUOps vfio_iommu_spapr_ops; -- --static void setup_spapr_ops(VFIOContainerBase *bcontainer) --{ -- vfio_iommu_spapr_ops = *bcontainer->ops; -- vfio_iommu_spapr_ops.add_window = vfio_spapr_container_add_section_window; -- vfio_iommu_spapr_ops.del_window = vfio_spapr_container_del_section_window; -- vfio_iommu_spapr_ops.release = vfio_spapr_container_release; -- bcontainer->ops = &vfio_iommu_spapr_ops; --} -- --int vfio_spapr_container_init(VFIOContainer *container, Error **errp) -+static int vfio_spapr_container_setup(VFIOContainerBase *bcontainer, -+ Error **errp) - { -- VFIOContainerBase *bcontainer = &container->bcontainer; -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, - container); - struct vfio_iommu_spapr_tce_info info; -@@ -536,8 +527,6 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - 0x1000); - } - -- setup_spapr_ops(bcontainer); -- - return 0; - - listener_unregister_exit: -@@ -546,3 +535,23 @@ listener_unregister_exit: - } - return ret; - } -+ -+static void vfio_iommu_spapr_class_init(ObjectClass *klass, void *data) -+{ -+ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); -+ -+ vioc->add_window = vfio_spapr_container_add_section_window; -+ vioc->del_window = vfio_spapr_container_del_section_window; -+ vioc->release = vfio_spapr_container_release; -+ vioc->setup = vfio_spapr_container_setup; -+}; -+ -+static const TypeInfo types[] = { -+ { -+ .name = TYPE_VFIO_IOMMU_SPAPR, -+ .parent = TYPE_VFIO_IOMMU_LEGACY, -+ .class_init = vfio_iommu_spapr_class_init, -+ }, -+}; -+ -+DEFINE_TYPES(types) -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index ce8b1fba88..9e21d7811f 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -95,6 +95,7 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); - - #define TYPE_VFIO_IOMMU "vfio-iommu" - #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" -+#define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr" - - /* - * VFIOContainerBase is not an abstract QOM object because it felt --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch b/SOURCES/kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch deleted file mode 100644 index f1ca4a2..0000000 --- a/SOURCES/kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch +++ /dev/null @@ -1,91 +0,0 @@ -From ff0c13c22878eed0f3879c0805bef5b9f9d83e04 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:42 +0800 -Subject: [PATCH 017/101] vfio/spapr: Introduce spapr backend and target - interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [16/67] e35cda157a2a1afeded3305622c861abd07edb51 (eauger1/centos-qemu-kvm) - -Introduce an empty spapr backend which will hold spapr specific -content, currently only prereg_listener and hostwin_list. - -Also introduce two spapr specific callbacks add/del_window into -VFIOIOMMUOps. Instantiate a spapr ops with a helper setup_spapr_ops -and assign it to bcontainer->ops. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit 9b7d38bf5a2c1054bfe6de08806954cdc45d8d98) -Signed-off-by: Eric Auger ---- - hw/vfio/spapr.c | 14 ++++++++++++++ - include/hw/vfio/vfio-container-base.h | 6 ++++++ - 2 files changed, 20 insertions(+) - -diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c -index 7a50975f25..e1a6b35563 100644 ---- a/hw/vfio/spapr.c -+++ b/hw/vfio/spapr.c -@@ -24,6 +24,10 @@ - #include "qapi/error.h" - #include "trace.h" - -+typedef struct VFIOSpaprContainer { -+ VFIOContainer container; -+} VFIOSpaprContainer; -+ - static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section) - { - if (memory_region_is_iommu(section->mr)) { -@@ -421,6 +425,14 @@ void vfio_container_del_section_window(VFIOContainer *container, - } - } - -+static VFIOIOMMUOps vfio_iommu_spapr_ops; -+ -+static void setup_spapr_ops(VFIOContainerBase *bcontainer) -+{ -+ vfio_iommu_spapr_ops = *bcontainer->ops; -+ bcontainer->ops = &vfio_iommu_spapr_ops; -+} -+ - int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - { - VFIOContainerBase *bcontainer = &container->bcontainer; -@@ -486,6 +498,8 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - 0x1000); - } - -+ setup_spapr_ops(bcontainer); -+ - return 0; - - listener_unregister_exit: -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 9658ffb526..f62a14ac73 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -101,5 +101,11 @@ struct VFIOIOMMUOps { - int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); - int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, - hwaddr iova, hwaddr size); -+ /* SPAPR specific */ -+ int (*add_window)(VFIOContainerBase *bcontainer, -+ MemoryRegionSection *section, -+ Error **errp); -+ void (*del_window)(VFIOContainerBase *bcontainer, -+ MemoryRegionSection *section); - }; - #endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch b/SOURCES/kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch deleted file mode 100644 index 93cb6b8..0000000 --- a/SOURCES/kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch +++ /dev/null @@ -1,188 +0,0 @@ -From 3e9e7b57b15ac328f5d663b4e04df546d49f5fa6 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:45 +0800 -Subject: [PATCH 020/101] vfio/spapr: Move hostwin_list into spapr container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [19/67] 87cfeaa32ad32a260a89b2bb1866d59e20c0fe30 (eauger1/centos-qemu-kvm) - -No functional changes intended. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit dbb9d0c9691d145338686d3e0920da047f2ab3da) -Signed-off-by: Eric Auger ---- - hw/vfio/spapr.c | 36 +++++++++++++++++++---------------- - include/hw/vfio/vfio-common.h | 1 - - 2 files changed, 20 insertions(+), 17 deletions(-) - -diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c -index 68c3dd6c75..5c6426e697 100644 ---- a/hw/vfio/spapr.c -+++ b/hw/vfio/spapr.c -@@ -27,6 +27,7 @@ - typedef struct VFIOSpaprContainer { - VFIOContainer container; - MemoryListener prereg_listener; -+ QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - } VFIOSpaprContainer; - - static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section) -@@ -154,12 +155,12 @@ static const MemoryListener vfio_prereg_listener = { - .region_del = vfio_prereg_listener_region_del, - }; - --static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova, -+static void vfio_host_win_add(VFIOSpaprContainer *scontainer, hwaddr min_iova, - hwaddr max_iova, uint64_t iova_pgsizes) - { - VFIOHostDMAWindow *hostwin; - -- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { -+ QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) { - if (ranges_overlap(hostwin->min_iova, - hostwin->max_iova - hostwin->min_iova + 1, - min_iova, -@@ -173,15 +174,15 @@ static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova, - hostwin->min_iova = min_iova; - hostwin->max_iova = max_iova; - hostwin->iova_pgsizes = iova_pgsizes; -- QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next); -+ QLIST_INSERT_HEAD(&scontainer->hostwin_list, hostwin, hostwin_next); - } - --static int vfio_host_win_del(VFIOContainer *container, -+static int vfio_host_win_del(VFIOSpaprContainer *scontainer, - hwaddr min_iova, hwaddr max_iova) - { - VFIOHostDMAWindow *hostwin; - -- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { -+ QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) { - if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) { - QLIST_REMOVE(hostwin, hostwin_next); - g_free(hostwin); -@@ -192,7 +193,7 @@ static int vfio_host_win_del(VFIOContainer *container, - return -1; - } - --static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container, -+static VFIOHostDMAWindow *vfio_find_hostwin(VFIOSpaprContainer *container, - hwaddr iova, hwaddr end) - { - VFIOHostDMAWindow *hostwin; -@@ -329,6 +330,8 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, - { - VFIOContainer *container = container_of(bcontainer, VFIOContainer, - bcontainer); -+ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, -+ container); - VFIOHostDMAWindow *hostwin; - hwaddr pgsize = 0; - int ret; -@@ -344,7 +347,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, - iova = section->offset_within_address_space; - end = iova + int128_get64(section->size) - 1; - -- if (!vfio_find_hostwin(container, iova, end)) { -+ if (!vfio_find_hostwin(scontainer, iova, end)) { - error_setg(errp, "Container %p can't map guest IOVA region" - " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, - iova, end); -@@ -358,7 +361,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, - } - - /* For now intersections are not allowed, we may relax this later */ -- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { -+ QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) { - if (ranges_overlap(hostwin->min_iova, - hostwin->max_iova - hostwin->min_iova + 1, - section->offset_within_address_space, -@@ -380,7 +383,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, - return ret; - } - -- vfio_host_win_add(container, section->offset_within_address_space, -+ vfio_host_win_add(scontainer, section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(section->size) - 1, pgsize); - #ifdef CONFIG_KVM -@@ -419,6 +422,8 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, - { - VFIOContainer *container = container_of(bcontainer, VFIOContainer, - bcontainer); -+ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, -+ container); - - if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { - return; -@@ -426,7 +431,7 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, - - vfio_spapr_remove_window(container, - section->offset_within_address_space); -- if (vfio_host_win_del(container, -+ if (vfio_host_win_del(scontainer, - section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(section->size) - 1) < 0) { -@@ -454,7 +459,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; - int ret, fd = container->fd; - -- QLIST_INIT(&container->hostwin_list); -+ QLIST_INIT(&scontainer->hostwin_list); - - /* - * The host kernel code implementing VFIO_IOMMU_DISABLE is called -@@ -506,7 +511,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - } else { - /* The default table uses 4K pages */ - bcontainer->pgsizes = 0x1000; -- vfio_host_win_add(container, info.dma32_window_start, -+ vfio_host_win_add(scontainer, info.dma32_window_start, - info.dma32_window_start + - info.dma32_window_size - 1, - 0x1000); -@@ -525,15 +530,14 @@ listener_unregister_exit: - - void vfio_spapr_container_deinit(VFIOContainer *container) - { -+ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, -+ container); - VFIOHostDMAWindow *hostwin, *next; - - if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { -- VFIOSpaprContainer *scontainer = container_of(container, -- VFIOSpaprContainer, -- container); - memory_listener_unregister(&scontainer->prereg_listener); - } -- QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, -+ QLIST_FOREACH_SAFE(hostwin, &scontainer->hostwin_list, hostwin_next, - next) { - QLIST_REMOVE(hostwin, hostwin_next); - g_free(hostwin); -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index ed6148c058..24ecc0e7ee 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -79,7 +79,6 @@ typedef struct VFIOContainer { - VFIOContainerBase bcontainer; - int fd; /* /dev/vfio/vfio, empowered by the attached groups */ - unsigned iommu_type; -- QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - QLIST_HEAD(, VFIOGroup) group_list; - } VFIOContainer; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch b/SOURCES/kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch deleted file mode 100644 index 1db4b55..0000000 --- a/SOURCES/kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 17e6dad3e43e173147c0ca33f6f1f4f317a77d0b Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:44 +0800 -Subject: [PATCH 019/101] vfio/spapr: Move prereg_listener into spapr container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [18/67] dbea1b0b759e91b953271da92bba4ca6853bec82 (eauger1/centos-qemu-kvm) - -No functional changes intended. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit 6ad359ec29af7f21dcb206c8edb26905a4925f80) -Signed-off-by: Eric Auger ---- - hw/vfio/spapr.c | 24 ++++++++++++++++-------- - include/hw/vfio/vfio-common.h | 1 - - 2 files changed, 16 insertions(+), 9 deletions(-) - -diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c -index 5be1911aad..68c3dd6c75 100644 ---- a/hw/vfio/spapr.c -+++ b/hw/vfio/spapr.c -@@ -26,6 +26,7 @@ - - typedef struct VFIOSpaprContainer { - VFIOContainer container; -+ MemoryListener prereg_listener; - } VFIOSpaprContainer; - - static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section) -@@ -48,8 +49,9 @@ static void *vfio_prereg_gpa_to_vaddr(MemoryRegionSection *section, hwaddr gpa) - static void vfio_prereg_listener_region_add(MemoryListener *listener, - MemoryRegionSection *section) - { -- VFIOContainer *container = container_of(listener, VFIOContainer, -- prereg_listener); -+ VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer, -+ prereg_listener); -+ VFIOContainer *container = &scontainer->container; - VFIOContainerBase *bcontainer = &container->bcontainer; - const hwaddr gpa = section->offset_within_address_space; - hwaddr end; -@@ -107,8 +109,9 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener, - static void vfio_prereg_listener_region_del(MemoryListener *listener, - MemoryRegionSection *section) - { -- VFIOContainer *container = container_of(listener, VFIOContainer, -- prereg_listener); -+ VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer, -+ prereg_listener); -+ VFIOContainer *container = &scontainer->container; - const hwaddr gpa = section->offset_within_address_space; - hwaddr end; - int ret; -@@ -445,6 +448,8 @@ static void setup_spapr_ops(VFIOContainerBase *bcontainer) - int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - { - VFIOContainerBase *bcontainer = &container->bcontainer; -+ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, -+ container); - struct vfio_iommu_spapr_tce_info info; - bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; - int ret, fd = container->fd; -@@ -463,9 +468,9 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - return -errno; - } - } else { -- container->prereg_listener = vfio_prereg_listener; -+ scontainer->prereg_listener = vfio_prereg_listener; - -- memory_listener_register(&container->prereg_listener, -+ memory_listener_register(&scontainer->prereg_listener, - &address_space_memory); - if (bcontainer->error) { - ret = -1; -@@ -513,7 +518,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - - listener_unregister_exit: - if (v2) { -- memory_listener_unregister(&container->prereg_listener); -+ memory_listener_unregister(&scontainer->prereg_listener); - } - return ret; - } -@@ -523,7 +528,10 @@ void vfio_spapr_container_deinit(VFIOContainer *container) - VFIOHostDMAWindow *hostwin, *next; - - if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { -- memory_listener_unregister(&container->prereg_listener); -+ VFIOSpaprContainer *scontainer = container_of(container, -+ VFIOSpaprContainer, -+ container); -+ memory_listener_unregister(&scontainer->prereg_listener); - } - QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, - next) { -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 055f679363..ed6148c058 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -78,7 +78,6 @@ struct VFIOGroup; - typedef struct VFIOContainer { - VFIOContainerBase bcontainer; - int fd; /* /dev/vfio/vfio, empowered by the attached groups */ -- MemoryListener prereg_listener; - unsigned iommu_type; - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - QLIST_HEAD(, VFIOGroup) group_list; --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch b/SOURCES/kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch deleted file mode 100644 index 7762804..0000000 --- a/SOURCES/kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 5d485eb1442a81b51688124ce30024e96490acbf Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:24 +0100 -Subject: [PATCH 057/101] vfio/spapr: Only compile sPAPR IOMMU support when - needed -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [56/67] 4dc0cbde470f877a8aac2bf6fab6923f2f919285 (eauger1/centos-qemu-kvm) - -sPAPR IOMMU support is only needed for pseries machines. Compile out -support when CONFIG_PSERIES is not set. This saves ~7K of text. - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit 10164df6ed3d41cbf67105dcd954a663ef4cc3e9) -Signed-off-by: Eric Auger ---- - hw/vfio/meson.build | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build -index e5d98b6adc..bb98493b53 100644 ---- a/hw/vfio/meson.build -+++ b/hw/vfio/meson.build -@@ -4,9 +4,9 @@ vfio_ss.add(files( - 'common.c', - 'container-base.c', - 'container.c', -- 'spapr.c', - 'migration.c', - )) -+vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c')) - vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files( - 'iommufd.c', - )) --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch b/SOURCES/kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch deleted file mode 100644 index 4d8db61..0000000 --- a/SOURCES/kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch +++ /dev/null @@ -1,184 +0,0 @@ -From 3b7f044f15b4a9daf4ad7eda58777aba6dbe3fc0 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:43 +0800 -Subject: [PATCH 018/101] vfio/spapr: switch to spapr IOMMU BE - add/del_section_window -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [17/67] a0d9f1f2d4d2592f3d9fc2ee5b2c38236a986e38 (eauger1/centos-qemu-kvm) - -No functional change intended. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -(cherry picked from commit 233309e8e4c158af6c6b126d5ad021bae40a918a) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 8 ++------ - hw/vfio/container-base.c | 21 +++++++++++++++++++++ - hw/vfio/spapr.c | 19 ++++++++++++++----- - include/hw/vfio/vfio-common.h | 5 ----- - include/hw/vfio/vfio-container-base.h | 5 +++++ - 5 files changed, 42 insertions(+), 16 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 483ba82089..572ae7c934 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -571,8 +571,6 @@ static void vfio_listener_region_add(MemoryListener *listener, - { - VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, - listener); -- VFIOContainer *container = container_of(bcontainer, VFIOContainer, -- bcontainer); - hwaddr iova, end; - Int128 llend, llsize; - void *vaddr; -@@ -595,7 +593,7 @@ static void vfio_listener_region_add(MemoryListener *listener, - return; - } - -- if (vfio_container_add_section_window(container, section, &err)) { -+ if (vfio_container_add_section_window(bcontainer, section, &err)) { - goto fail; - } - -@@ -738,8 +736,6 @@ static void vfio_listener_region_del(MemoryListener *listener, - { - VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, - listener); -- VFIOContainer *container = container_of(bcontainer, VFIOContainer, -- bcontainer); - hwaddr iova, end; - Int128 llend, llsize; - int ret; -@@ -818,7 +814,7 @@ static void vfio_listener_region_del(MemoryListener *listener, - - memory_region_unref(section->mr); - -- vfio_container_del_section_window(container, section); -+ vfio_container_del_section_window(bcontainer, section); - } - - typedef struct VFIODirtyRanges { -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index 0177f43741..71f7274973 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -31,6 +31,27 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, - return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); - } - -+int vfio_container_add_section_window(VFIOContainerBase *bcontainer, -+ MemoryRegionSection *section, -+ Error **errp) -+{ -+ if (!bcontainer->ops->add_window) { -+ return 0; -+ } -+ -+ return bcontainer->ops->add_window(bcontainer, section, errp); -+} -+ -+void vfio_container_del_section_window(VFIOContainerBase *bcontainer, -+ MemoryRegionSection *section) -+{ -+ if (!bcontainer->ops->del_window) { -+ return; -+ } -+ -+ return bcontainer->ops->del_window(bcontainer, section); -+} -+ - int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, - bool start) - { -diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c -index e1a6b35563..5be1911aad 100644 ---- a/hw/vfio/spapr.c -+++ b/hw/vfio/spapr.c -@@ -319,10 +319,13 @@ static int vfio_spapr_create_window(VFIOContainer *container, - return 0; - } - --int vfio_container_add_section_window(VFIOContainer *container, -- MemoryRegionSection *section, -- Error **errp) -+static int -+vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, -+ MemoryRegionSection *section, -+ Error **errp) - { -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - VFIOHostDMAWindow *hostwin; - hwaddr pgsize = 0; - int ret; -@@ -407,9 +410,13 @@ int vfio_container_add_section_window(VFIOContainer *container, - return 0; - } - --void vfio_container_del_section_window(VFIOContainer *container, -- MemoryRegionSection *section) -+static void -+vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, -+ MemoryRegionSection *section) - { -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); -+ - if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { - return; - } -@@ -430,6 +437,8 @@ static VFIOIOMMUOps vfio_iommu_spapr_ops; - static void setup_spapr_ops(VFIOContainerBase *bcontainer) - { - vfio_iommu_spapr_ops = *bcontainer->ops; -+ vfio_iommu_spapr_ops.add_window = vfio_spapr_container_add_section_window; -+ vfio_iommu_spapr_ops.del_window = vfio_spapr_container_del_section_window; - bcontainer->ops = &vfio_iommu_spapr_ops; - } - -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index b9e5a0e64b..055f679363 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -169,11 +169,6 @@ VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); - void vfio_put_address_space(VFIOAddressSpace *space); - - /* SPAPR specific */ --int vfio_container_add_section_window(VFIOContainer *container, -- MemoryRegionSection *section, -- Error **errp); --void vfio_container_del_section_window(VFIOContainer *container, -- MemoryRegionSection *section); - int vfio_spapr_container_init(VFIOContainer *container, Error **errp); - void vfio_spapr_container_deinit(VFIOContainer *container); - -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index f62a14ac73..4b6f017c6f 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -75,6 +75,11 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer, - int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb); -+int vfio_container_add_section_window(VFIOContainerBase *bcontainer, -+ MemoryRegionSection *section, -+ Error **errp); -+void vfio_container_del_section_window(VFIOContainerBase *bcontainer, -+ MemoryRegionSection *section); - int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, - bool start); - int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-Re-enable-notifications-after-drain.patch b/SOURCES/kvm-virtio-Re-enable-notifications-after-drain.patch deleted file mode 100644 index ef770fd..0000000 --- a/SOURCES/kvm-virtio-Re-enable-notifications-after-drain.patch +++ /dev/null @@ -1,139 +0,0 @@ -From 2a758da4e1433564998def68447008908c96e113 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Fri, 2 Feb 2024 16:31:57 +0100 -Subject: [PATCH 2/6] virtio: Re-enable notifications after drain - -RH-Author: Hanna Czenczek -RH-MergeRequest: 223: virtio: Re-enable notifications after drain -RH-Jira: RHEL-3934 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [2/3] e3be798e6259a378fc03f4364ecaeb875b01f64c (hreitz/qemu-kvm-c-9-s) - -During drain, we do not care about virtqueue notifications, which is why -we remove the handlers on it. When removing those handlers, whether vq -notifications are enabled or not depends on whether we were in polling -mode or not; if not, they are enabled (by default); if so, they have -been disabled by the io_poll_start callback. - -Because we do not care about those notifications after removing the -handlers, this is fine. However, we have to explicitly ensure they are -enabled when re-attaching the handlers, so we will resume receiving -notifications. We do this in virtio_queue_aio_attach_host_notifier*(). -If such a function is called while we are in a polling section, -attaching the notifiers will then invoke the io_poll_start callback, -re-disabling notifications. - -Because we will always miss virtqueue updates in the drained section, we -also need to poll the virtqueue once after attaching the notifiers. - -Buglink: https://issues.redhat.com/browse/RHEL-3934 -Signed-off-by: Hanna Czenczek -Message-ID: <20240202153158.788922-3-hreitz@redhat.com> -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 5bdbaebcce18fe6a627cafad2043ec08f3de5744) ---- - hw/virtio/virtio.c | 42 ++++++++++++++++++++++++++++++++++++++++++ - include/block/aio.h | 7 ++++++- - 2 files changed, 48 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index 3a160f86ed..356d690cc9 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -3556,6 +3556,17 @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) - - void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) - { -+ /* -+ * virtio_queue_aio_detach_host_notifier() can leave notifications disabled. -+ * Re-enable them. (And if detach has not been used before, notifications -+ * being enabled is still the default state while a notifier is attached; -+ * see virtio_queue_host_notifier_aio_poll_end(), which will always leave -+ * notifications enabled once the polling section is left.) -+ */ -+ if (!virtio_queue_get_notification(vq)) { -+ virtio_queue_set_notification(vq, 1); -+ } -+ - aio_set_event_notifier(ctx, &vq->host_notifier, - virtio_queue_host_notifier_read, - virtio_queue_host_notifier_aio_poll, -@@ -3563,6 +3574,13 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) - aio_set_event_notifier_poll(ctx, &vq->host_notifier, - virtio_queue_host_notifier_aio_poll_begin, - virtio_queue_host_notifier_aio_poll_end); -+ -+ /* -+ * We will have ignored notifications about new requests from the guest -+ * while no notifiers were attached, so "kick" the virt queue to process -+ * those requests now. -+ */ -+ event_notifier_set(&vq->host_notifier); - } - - /* -@@ -3573,14 +3591,38 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) - */ - void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx) - { -+ /* See virtio_queue_aio_attach_host_notifier() */ -+ if (!virtio_queue_get_notification(vq)) { -+ virtio_queue_set_notification(vq, 1); -+ } -+ - aio_set_event_notifier(ctx, &vq->host_notifier, - virtio_queue_host_notifier_read, - NULL, NULL); -+ -+ /* -+ * See virtio_queue_aio_attach_host_notifier(). -+ * Note that this may be unnecessary for the type of virtqueues this -+ * function is used for. Still, it will not hurt to have a quick look into -+ * whether we can/should process any of the virtqueue elements. -+ */ -+ event_notifier_set(&vq->host_notifier); - } - - void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx) - { - aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL); -+ -+ /* -+ * aio_set_event_notifier_poll() does not guarantee whether io_poll_end() -+ * will run after io_poll_begin(), so by removing the notifier, we do not -+ * know whether virtio_queue_host_notifier_aio_poll_end() has run after a -+ * previous virtio_queue_host_notifier_aio_poll_begin(), i.e. whether -+ * notifications are enabled or disabled. It does not really matter anyway; -+ * we just removed the notifier, so we do not care about notifications until -+ * we potentially re-attach it. The attach_host_notifier functions will -+ * ensure that notifications are enabled again when they are needed. -+ */ - } - - void virtio_queue_host_notifier_read(EventNotifier *n) -diff --git a/include/block/aio.h b/include/block/aio.h -index af05512a7d..261c77fd9a 100644 ---- a/include/block/aio.h -+++ b/include/block/aio.h -@@ -480,9 +480,14 @@ void aio_set_event_notifier(AioContext *ctx, - AioPollFn *io_poll, - EventNotifierHandler *io_poll_ready); - --/* Set polling begin/end callbacks for an event notifier that has already been -+/* -+ * Set polling begin/end callbacks for an event notifier that has already been - * registered with aio_set_event_notifier. Do nothing if the event notifier is - * not registered. -+ * -+ * Note that if the io_poll_end() callback (or the entire notifier) is removed -+ * during polling, it will not be called, so an io_poll_begin() is not -+ * necessarily always followed by an io_poll_end(). - */ - void aio_set_event_notifier_poll(AioContext *ctx, - EventNotifier *notifier, --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch b/SOURCES/kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch deleted file mode 100644 index 0565357..0000000 --- a/SOURCES/kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 376df80fbba5a9bb0ec43cad083cde9de59128d7 Mon Sep 17 00:00:00 2001 -From: Stefan Weil via -Date: Sun, 24 Dec 2023 12:43:14 +0100 -Subject: [PATCH 10/22] virtio-blk: Fix potential nullpointer read access in - virtio_blk_data_plane_destroy - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [6/17] 460005fc7719b2e1dd577dfe75d18537ab2b8d06 (stefanha/centos-stream-qemu-kvm) - -Fixes: CID 1532828 -Fixes: b6948ab01d ("virtio-blk: add iothread-vq-mapping parameter") -Signed-off-by: Stefan Weil -Signed-off-by: Michael Tokarev -(cherry picked from commit d819fc9516a4ec71e37a6c9edfcd285b7f98c2dc) -Signed-off-by: Stefan Hajnoczi ---- - hw/block/dataplane/virtio-blk.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -index 6debd4401e..97a302cf49 100644 ---- a/hw/block/dataplane/virtio-blk.c -+++ b/hw/block/dataplane/virtio-blk.c -@@ -152,7 +152,7 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, - void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) - { - VirtIOBlock *vblk; -- VirtIOBlkConf *conf = s->conf; -+ VirtIOBlkConf *conf; - - if (!s) { - return; -@@ -160,6 +160,7 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) - - vblk = VIRTIO_BLK(s->vdev); - assert(!vblk->dataplane_started); -+ conf = s->conf; - - if (conf->iothread_vq_mapping_list) { - IOThreadVirtQueueMappingList *node; --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch b/SOURCES/kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch deleted file mode 100644 index 1a3771e..0000000 --- a/SOURCES/kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 094941b2c3e66e078d93718933eb07e800a7dd60 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Fri, 2 Feb 2024 16:31:58 +0100 -Subject: [PATCH 3/6] virtio-blk: Use ioeventfd_attach in start_ioeventfd - -RH-Author: Hanna Czenczek -RH-MergeRequest: 223: virtio: Re-enable notifications after drain -RH-Jira: RHEL-3934 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [3/3] 96d6760d1b7b12df695b6825b15a2a3b8a79a74c (hreitz/qemu-kvm-c-9-s) - -Commit d3f6f294aeadd5f88caf0155e4360808c95b3146 ("virtio-blk: always set -ioeventfd during startup") has made virtio_blk_start_ioeventfd() always -kick the virtqueue (set the ioeventfd), regardless of whether the BB is -drained. That is no longer necessary, because attaching the host -notifier will now set the ioeventfd, too; this happens either -immediately right here in virtio_blk_start_ioeventfd(), or later when -the drain ends, in virtio_blk_ioeventfd_attach(). - -With event_notifier_set() removed, the code becomes the same as the one -in virtio_blk_ioeventfd_attach(), so we can reuse that function. - -Signed-off-by: Hanna Czenczek -Message-ID: <20240202153158.788922-4-hreitz@redhat.com> -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 52bff01f64eec017ffb0d5903a0ee1d67ca7a548) ---- - hw/block/virtio-blk.c | 21 ++++++++++----------- - 1 file changed, 10 insertions(+), 11 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 0b9100b746..7fdeaf2d12 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -37,6 +37,8 @@ - #include "hw/virtio/virtio-blk-common.h" - #include "qemu/coroutine.h" - -+static void virtio_blk_ioeventfd_attach(VirtIOBlock *s); -+ - static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq, - VirtIOBlockReq *req) - { -@@ -1808,17 +1810,14 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) - s->ioeventfd_started = true; - smp_wmb(); /* paired with aio_notify_accept() on the read side */ - -- /* Get this show started by hooking up our callbacks */ -- for (i = 0; i < nvqs; i++) { -- VirtQueue *vq = virtio_get_queue(vdev, i); -- AioContext *ctx = s->vq_aio_context[i]; -- -- /* Kick right away to begin processing requests already in vring */ -- event_notifier_set(virtio_queue_get_host_notifier(vq)); -- -- if (!blk_in_drain(s->conf.conf.blk)) { -- virtio_queue_aio_attach_host_notifier(vq, ctx); -- } -+ /* -+ * Get this show started by hooking up our callbacks. If drained now, -+ * virtio_blk_drained_end() will do this later. -+ * Attaching the notifier also kicks the virtqueues, processing any requests -+ * they may already have. -+ */ -+ if (!blk_in_drain(s->conf.conf.blk)) { -+ virtio_blk_ioeventfd_attach(s); - } - return 0; - --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch b/SOURCES/kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch deleted file mode 100644 index 65a96a0..0000000 --- a/SOURCES/kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch +++ /dev/null @@ -1,464 +0,0 @@ -From 733fc13f65286c849ad6618be89df450f8bc5f7e Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Wed, 20 Dec 2023 08:47:55 -0500 -Subject: [PATCH 09/22] virtio-blk: add iothread-vq-mapping parameter - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [5/17] c371fe62376c4eb54da88272a5966cec28404224 (stefanha/centos-stream-qemu-kvm) - -Add the iothread-vq-mapping parameter to assign virtqueues to IOThreads. -Store the vq:AioContext mapping in the new struct -VirtIOBlockDataPlane->vq_aio_context[] field and refactor the code to -use the per-vq AioContext instead of the BlockDriverState's AioContext. - -Reimplement --device virtio-blk-pci,iothread= and non-IOThread mode by -assigning all virtqueues to the IOThread and main loop's AioContext in -vq_aio_context[], respectively. - -The comment in struct VirtIOBlockDataPlane about EventNotifiers is -stale. Remove it. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20231220134755.814917-5-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit b6948ab01df068bef591868c22d1f873d2d05cde) -Signed-off-by: Stefan Hajnoczi ---- - hw/block/dataplane/virtio-blk.c | 155 ++++++++++++++++++++++++-------- - hw/block/dataplane/virtio-blk.h | 3 + - hw/block/virtio-blk.c | 92 ++++++++++++++++--- - include/hw/virtio/virtio-blk.h | 2 + - 4 files changed, 202 insertions(+), 50 deletions(-) - -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -index 7bbbd981ad..6debd4401e 100644 ---- a/hw/block/dataplane/virtio-blk.c -+++ b/hw/block/dataplane/virtio-blk.c -@@ -32,13 +32,11 @@ struct VirtIOBlockDataPlane { - VirtIOBlkConf *conf; - VirtIODevice *vdev; - -- /* Note that these EventNotifiers are assigned by value. This is -- * fine as long as you do not call event_notifier_cleanup on them -- * (because you don't own the file descriptor or handle; you just -- * use it). -+ /* -+ * The AioContext for each virtqueue. The BlockDriverState will use the -+ * first element as its AioContext. - */ -- IOThread *iothread; -- AioContext *ctx; -+ AioContext **vq_aio_context; - }; - - /* Raise an interrupt to signal guest, if necessary */ -@@ -47,6 +45,45 @@ void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq) - virtio_notify_irqfd(s->vdev, vq); - } - -+/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ -+static void -+apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, -+ AioContext **vq_aio_context, uint16_t num_queues) -+{ -+ IOThreadVirtQueueMappingList *node; -+ size_t num_iothreads = 0; -+ size_t cur_iothread = 0; -+ -+ for (node = iothread_vq_mapping_list; node; node = node->next) { -+ num_iothreads++; -+ } -+ -+ for (node = iothread_vq_mapping_list; node; node = node->next) { -+ IOThread *iothread = iothread_by_id(node->value->iothread); -+ AioContext *ctx = iothread_get_aio_context(iothread); -+ -+ /* Released in virtio_blk_data_plane_destroy() */ -+ object_ref(OBJECT(iothread)); -+ -+ if (node->value->vqs) { -+ uint16List *vq; -+ -+ /* Explicit vq:IOThread assignment */ -+ for (vq = node->value->vqs; vq; vq = vq->next) { -+ vq_aio_context[vq->value] = ctx; -+ } -+ } else { -+ /* Round-robin vq:IOThread assignment */ -+ for (unsigned i = cur_iothread; i < num_queues; -+ i += num_iothreads) { -+ vq_aio_context[i] = ctx; -+ } -+ } -+ -+ cur_iothread++; -+ } -+} -+ - /* Context: QEMU global mutex held */ - bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, - VirtIOBlockDataPlane **dataplane, -@@ -58,7 +95,7 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, - - *dataplane = NULL; - -- if (conf->iothread) { -+ if (conf->iothread || conf->iothread_vq_mapping_list) { - if (!k->set_guest_notifiers || !k->ioeventfd_assign) { - error_setg(errp, - "device is incompatible with iothread " -@@ -86,13 +123,24 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, - s = g_new0(VirtIOBlockDataPlane, 1); - s->vdev = vdev; - s->conf = conf; -+ s->vq_aio_context = g_new(AioContext *, conf->num_queues); -+ -+ if (conf->iothread_vq_mapping_list) { -+ apply_vq_mapping(conf->iothread_vq_mapping_list, s->vq_aio_context, -+ conf->num_queues); -+ } else if (conf->iothread) { -+ AioContext *ctx = iothread_get_aio_context(conf->iothread); -+ for (unsigned i = 0; i < conf->num_queues; i++) { -+ s->vq_aio_context[i] = ctx; -+ } - -- if (conf->iothread) { -- s->iothread = conf->iothread; -- object_ref(OBJECT(s->iothread)); -- s->ctx = iothread_get_aio_context(s->iothread); -+ /* Released in virtio_blk_data_plane_destroy() */ -+ object_ref(OBJECT(conf->iothread)); - } else { -- s->ctx = qemu_get_aio_context(); -+ AioContext *ctx = qemu_get_aio_context(); -+ for (unsigned i = 0; i < conf->num_queues; i++) { -+ s->vq_aio_context[i] = ctx; -+ } - } - - *dataplane = s; -@@ -104,6 +152,7 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, - void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) - { - VirtIOBlock *vblk; -+ VirtIOBlkConf *conf = s->conf; - - if (!s) { - return; -@@ -111,9 +160,21 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) - - vblk = VIRTIO_BLK(s->vdev); - assert(!vblk->dataplane_started); -- if (s->iothread) { -- object_unref(OBJECT(s->iothread)); -+ -+ if (conf->iothread_vq_mapping_list) { -+ IOThreadVirtQueueMappingList *node; -+ -+ for (node = conf->iothread_vq_mapping_list; node; node = node->next) { -+ IOThread *iothread = iothread_by_id(node->value->iothread); -+ object_unref(OBJECT(iothread)); -+ } -+ } -+ -+ if (conf->iothread) { -+ object_unref(OBJECT(conf->iothread)); - } -+ -+ g_free(s->vq_aio_context); - g_free(s); - } - -@@ -177,19 +238,13 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - - trace_virtio_blk_data_plane_start(s); - -- r = blk_set_aio_context(s->conf->conf.blk, s->ctx, &local_err); -+ r = blk_set_aio_context(s->conf->conf.blk, s->vq_aio_context[0], -+ &local_err); - if (r < 0) { - error_report_err(local_err); - goto fail_aio_context; - } - -- /* Kick right away to begin processing requests already in vring */ -- for (i = 0; i < nvqs; i++) { -- VirtQueue *vq = virtio_get_queue(s->vdev, i); -- -- event_notifier_set(virtio_queue_get_host_notifier(vq)); -- } -- - /* - * These fields must be visible to the IOThread when it processes the - * virtqueue, otherwise it will think dataplane has not started yet. -@@ -206,8 +261,12 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - if (!blk_in_drain(s->conf->conf.blk)) { - for (i = 0; i < nvqs; i++) { - VirtQueue *vq = virtio_get_queue(s->vdev, i); -+ AioContext *ctx = s->vq_aio_context[i]; - -- virtio_queue_aio_attach_host_notifier(vq, s->ctx); -+ /* Kick right away to begin processing requests already in vring */ -+ event_notifier_set(virtio_queue_get_host_notifier(vq)); -+ -+ virtio_queue_aio_attach_host_notifier(vq, ctx); - } - } - return 0; -@@ -236,23 +295,18 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - * - * Context: BH in IOThread - */ --static void virtio_blk_data_plane_stop_bh(void *opaque) -+static void virtio_blk_data_plane_stop_vq_bh(void *opaque) - { -- VirtIOBlockDataPlane *s = opaque; -- unsigned i; -- -- for (i = 0; i < s->conf->num_queues; i++) { -- VirtQueue *vq = virtio_get_queue(s->vdev, i); -- EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); -+ VirtQueue *vq = opaque; -+ EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); - -- virtio_queue_aio_detach_host_notifier(vq, s->ctx); -+ virtio_queue_aio_detach_host_notifier(vq, qemu_get_current_aio_context()); - -- /* -- * Test and clear notifier after disabling event, in case poll callback -- * didn't have time to run. -- */ -- virtio_queue_host_notifier_read(host_notifier); -- } -+ /* -+ * Test and clear notifier after disabling event, in case poll callback -+ * didn't have time to run. -+ */ -+ virtio_queue_host_notifier_read(host_notifier); - } - - /* Context: QEMU global mutex held */ -@@ -279,7 +333,12 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) - trace_virtio_blk_data_plane_stop(s); - - if (!blk_in_drain(s->conf->conf.blk)) { -- aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s); -+ for (i = 0; i < nvqs; i++) { -+ VirtQueue *vq = virtio_get_queue(s->vdev, i); -+ AioContext *ctx = s->vq_aio_context[i]; -+ -+ aio_wait_bh_oneshot(ctx, virtio_blk_data_plane_stop_vq_bh, vq); -+ } - } - - /* -@@ -322,3 +381,23 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) - - s->stopping = false; - } -+ -+void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s) -+{ -+ VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); -+ -+ for (uint16_t i = 0; i < s->conf->num_queues; i++) { -+ VirtQueue *vq = virtio_get_queue(vdev, i); -+ virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); -+ } -+} -+ -+void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s) -+{ -+ VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); -+ -+ for (uint16_t i = 0; i < s->conf->num_queues; i++) { -+ VirtQueue *vq = virtio_get_queue(vdev, i); -+ virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); -+ } -+} -diff --git a/hw/block/dataplane/virtio-blk.h b/hw/block/dataplane/virtio-blk.h -index 5e18bb99ae..1a806fe447 100644 ---- a/hw/block/dataplane/virtio-blk.h -+++ b/hw/block/dataplane/virtio-blk.h -@@ -28,4 +28,7 @@ void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq); - int virtio_blk_data_plane_start(VirtIODevice *vdev); - void virtio_blk_data_plane_stop(VirtIODevice *vdev); - -+void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s); -+void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s); -+ - #endif /* HW_DATAPLANE_VIRTIO_BLK_H */ -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index ec9ed09a6a..46e73b2c96 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -1151,6 +1151,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) - return; - } - } -+ - virtio_blk_handle_vq(s, vq); - } - -@@ -1463,6 +1464,68 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, - return 0; - } - -+static bool -+validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list, -+ uint16_t num_queues, Error **errp) -+{ -+ g_autofree unsigned long *vqs = bitmap_new(num_queues); -+ g_autoptr(GHashTable) iothreads = -+ g_hash_table_new(g_str_hash, g_str_equal); -+ -+ for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) { -+ const char *name = node->value->iothread; -+ uint16List *vq; -+ -+ if (!iothread_by_id(name)) { -+ error_setg(errp, "IOThread \"%s\" object does not exist", name); -+ return false; -+ } -+ -+ if (!g_hash_table_add(iothreads, (gpointer)name)) { -+ error_setg(errp, -+ "duplicate IOThread name \"%s\" in iothread-vq-mapping", -+ name); -+ return false; -+ } -+ -+ if (node != list) { -+ if (!!node->value->vqs != !!list->value->vqs) { -+ error_setg(errp, "either all items in iothread-vq-mapping " -+ "must have vqs or none of them must have it"); -+ return false; -+ } -+ } -+ -+ for (vq = node->value->vqs; vq; vq = vq->next) { -+ if (vq->value >= num_queues) { -+ error_setg(errp, "vq index %u for IOThread \"%s\" must be " -+ "less than num_queues %u in iothread-vq-mapping", -+ vq->value, name, num_queues); -+ return false; -+ } -+ -+ if (test_and_set_bit(vq->value, vqs)) { -+ error_setg(errp, "cannot assign vq %u to IOThread \"%s\" " -+ "because it is already assigned", vq->value, name); -+ return false; -+ } -+ } -+ } -+ -+ if (list->value->vqs) { -+ for (uint16_t i = 0; i < num_queues; i++) { -+ if (!test_bit(i, vqs)) { -+ error_setg(errp, -+ "missing vq %u IOThread assignment in iothread-vq-mapping", -+ i); -+ return false; -+ } -+ } -+ } -+ -+ return true; -+} -+ - static void virtio_resize_cb(void *opaque) - { - VirtIODevice *vdev = opaque; -@@ -1487,34 +1550,24 @@ static void virtio_blk_resize(void *opaque) - static void virtio_blk_drained_begin(void *opaque) - { - VirtIOBlock *s = opaque; -- VirtIODevice *vdev = VIRTIO_DEVICE(opaque); -- AioContext *ctx = blk_get_aio_context(s->conf.conf.blk); - - if (!s->dataplane || !s->dataplane_started) { - return; - } - -- for (uint16_t i = 0; i < s->conf.num_queues; i++) { -- VirtQueue *vq = virtio_get_queue(vdev, i); -- virtio_queue_aio_detach_host_notifier(vq, ctx); -- } -+ virtio_blk_data_plane_detach(s->dataplane); - } - - /* Resume virtqueue ioeventfd processing after drain */ - static void virtio_blk_drained_end(void *opaque) - { - VirtIOBlock *s = opaque; -- VirtIODevice *vdev = VIRTIO_DEVICE(opaque); -- AioContext *ctx = blk_get_aio_context(s->conf.conf.blk); - - if (!s->dataplane || !s->dataplane_started) { - return; - } - -- for (uint16_t i = 0; i < s->conf.num_queues; i++) { -- VirtQueue *vq = virtio_get_queue(vdev, i); -- virtio_queue_aio_attach_host_notifier(vq, ctx); -- } -+ virtio_blk_data_plane_attach(s->dataplane); - } - - static const BlockDevOps virtio_block_ops = { -@@ -1600,6 +1653,19 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) - return; - } - -+ if (conf->iothread_vq_mapping_list) { -+ if (conf->iothread) { -+ error_setg(errp, "iothread and iothread-vq-mapping properties " -+ "cannot be set at the same time"); -+ return; -+ } -+ -+ if (!validate_iothread_vq_mapping_list(conf->iothread_vq_mapping_list, -+ conf->num_queues, errp)) { -+ return; -+ } -+ } -+ - s->config_size = virtio_get_config_size(&virtio_blk_cfg_size_params, - s->host_features); - virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size); -@@ -1702,6 +1768,8 @@ static Property virtio_blk_properties[] = { - DEFINE_PROP_BOOL("seg-max-adjust", VirtIOBlock, conf.seg_max_adjust, true), - DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD, - IOThread *), -+ DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST("iothread-vq-mapping", VirtIOBlock, -+ conf.iothread_vq_mapping_list), - DEFINE_PROP_BIT64("discard", VirtIOBlock, host_features, - VIRTIO_BLK_F_DISCARD, true), - DEFINE_PROP_BOOL("report-discard-granularity", VirtIOBlock, -diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h -index 9881009c22..5e4091e4da 100644 ---- a/include/hw/virtio/virtio-blk.h -+++ b/include/hw/virtio/virtio-blk.h -@@ -21,6 +21,7 @@ - #include "sysemu/block-backend.h" - #include "sysemu/block-ram-registrar.h" - #include "qom/object.h" -+#include "qapi/qapi-types-virtio.h" - - #define TYPE_VIRTIO_BLK "virtio-blk-device" - OBJECT_DECLARE_SIMPLE_TYPE(VirtIOBlock, VIRTIO_BLK) -@@ -37,6 +38,7 @@ struct VirtIOBlkConf - { - BlockConf conf; - IOThread *iothread; -+ IOThreadVirtQueueMappingList *iothread_vq_mapping_list; - char *serial; - uint32_t request_merging; - uint16_t num_queues; --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-add-lock-to-protect-s-rq.patch b/SOURCES/kvm-virtio-blk-add-lock-to-protect-s-rq.patch deleted file mode 100644 index 31e83a2..0000000 --- a/SOURCES/kvm-virtio-blk-add-lock-to-protect-s-rq.patch +++ /dev/null @@ -1,177 +0,0 @@ -From d54e88103aa76f3bf755b3f4308d8ab60367c6ef Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 14 Sep 2023 10:00:59 -0400 -Subject: [PATCH 074/101] virtio-blk: add lock to protect s->rq - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [5/26] 17dcd5ba18c03e5633a014d8d62d34d8dd7b43bf (kmwolf/centos-qemu-kvm) - -s->rq is accessed from IO_CODE and GLOBAL_STATE_CODE. Introduce a lock -to protect s->rq and eliminate reliance on the AioContext lock. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20230914140101.1065008-3-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Kevin Wolf ---- - hw/block/virtio-blk.c | 67 +++++++++++++++++++++++----------- - include/hw/virtio/virtio-blk.h | 3 +- - 2 files changed, 47 insertions(+), 23 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index a1f8e15522..ee38e089bc 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -82,8 +82,11 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, - /* Break the link as the next request is going to be parsed from the - * ring again. Otherwise we may end up doing a double completion! */ - req->mr_next = NULL; -- req->next = s->rq; -- s->rq = req; -+ -+ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { -+ req->next = s->rq; -+ s->rq = req; -+ } - } else if (action == BLOCK_ERROR_ACTION_REPORT) { - virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); - if (acct_failed) { -@@ -1183,10 +1186,13 @@ static void virtio_blk_dma_restart_bh(void *opaque) - { - VirtIOBlock *s = opaque; - -- VirtIOBlockReq *req = s->rq; -+ VirtIOBlockReq *req; - MultiReqBuffer mrb = {}; - -- s->rq = NULL; -+ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { -+ req = s->rq; -+ s->rq = NULL; -+ } - - aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); - while (req) { -@@ -1238,22 +1244,29 @@ static void virtio_blk_reset(VirtIODevice *vdev) - AioContext *ctx; - VirtIOBlockReq *req; - -+ /* Dataplane has stopped... */ -+ assert(!s->dataplane_started); -+ -+ /* ...but requests may still be in flight. */ - ctx = blk_get_aio_context(s->blk); - aio_context_acquire(ctx); - blk_drain(s->blk); -+ aio_context_release(ctx); - - /* We drop queued requests after blk_drain() because blk_drain() itself can - * produce them. */ -- while (s->rq) { -- req = s->rq; -- s->rq = req->next; -- virtqueue_detach_element(req->vq, &req->elem, 0); -- virtio_blk_free_request(req); -- } -+ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { -+ while (s->rq) { -+ req = s->rq; -+ s->rq = req->next; - -- aio_context_release(ctx); -+ /* No other threads can access req->vq here */ -+ virtqueue_detach_element(req->vq, &req->elem, 0); -+ -+ virtio_blk_free_request(req); -+ } -+ } - -- assert(!s->dataplane_started); - blk_set_enable_write_cache(s->blk, s->original_wce); - } - -@@ -1443,18 +1456,22 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) - static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) - { - VirtIOBlock *s = VIRTIO_BLK(vdev); -- VirtIOBlockReq *req = s->rq; - -- while (req) { -- qemu_put_sbyte(f, 1); -+ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { -+ VirtIOBlockReq *req = s->rq; - -- if (s->conf.num_queues > 1) { -- qemu_put_be32(f, virtio_get_queue_index(req->vq)); -- } -+ while (req) { -+ qemu_put_sbyte(f, 1); - -- qemu_put_virtqueue_element(vdev, f, &req->elem); -- req = req->next; -+ if (s->conf.num_queues > 1) { -+ qemu_put_be32(f, virtio_get_queue_index(req->vq)); -+ } -+ -+ qemu_put_virtqueue_element(vdev, f, &req->elem); -+ req = req->next; -+ } - } -+ - qemu_put_sbyte(f, 0); - } - -@@ -1480,8 +1497,11 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, - - req = qemu_get_virtqueue_element(vdev, f, sizeof(VirtIOBlockReq)); - virtio_blk_init_request(s, virtio_get_queue(vdev, vq_idx), req); -- req->next = s->rq; -- s->rq = req; -+ -+ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { -+ req->next = s->rq; -+ s->rq = req; -+ } - } - - return 0; -@@ -1628,6 +1648,8 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) - s->host_features); - virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size); - -+ qemu_mutex_init(&s->rq_lock); -+ - s->blk = conf->conf.blk; - s->rq = NULL; - s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1; -@@ -1679,6 +1701,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) - virtio_del_queue(vdev, i); - } - qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2); -+ qemu_mutex_destroy(&s->rq_lock); - blk_ram_registrar_destroy(&s->blk_ram_registrar); - qemu_del_vm_change_state_handler(s->change); - blockdev_mark_auto_del(s->blk); -diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h -index dafec432ce..9881009c22 100644 ---- a/include/hw/virtio/virtio-blk.h -+++ b/include/hw/virtio/virtio-blk.h -@@ -54,7 +54,8 @@ struct VirtIOBlockReq; - struct VirtIOBlock { - VirtIODevice parent_obj; - BlockBackend *blk; -- void *rq; -+ QemuMutex rq_lock; -+ void *rq; /* protected by rq_lock */ - VirtIOBlkConf conf; - unsigned short sector_mask; - bool original_wce; --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-always-set-ioeventfd-during-startup.patch b/SOURCES/kvm-virtio-blk-always-set-ioeventfd-during-startup.patch deleted file mode 100644 index a7b518d..0000000 --- a/SOURCES/kvm-virtio-blk-always-set-ioeventfd-during-startup.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 22730552442003e81c8c508c3e7ebacf647e4e75 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Fri, 19 Jan 2024 08:57:48 -0500 -Subject: [PATCH 19/22] virtio-blk: always set ioeventfd during startup - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [15/17] 5f7142aeaa54fda41bd5c4fd3222fd8e3e18f370 (stefanha/centos-stream-qemu-kvm) - -When starting ioeventfd it is common practice to set the event notifier -so that the ioeventfd handler is triggered to run immediately. There may -be no requests waiting to be processed, but the idea is that if a -request snuck in then we guarantee that it will be detected. - -One scenario where self-triggering the ioeventfd is necessary is when -virtio_blk_handle_output() is called from a vCPU thread before the -VIRTIO Device Status transitions to DRIVER_OK. In that case we need to -self-trigger the ioeventfd so that the kick handled by the vCPU thread -causes the vq AioContext thread to take over handling the request(s). - -Fixes: b6948ab01df0 ("virtio-blk: add iothread-vq-mapping parameter") -Reported-by: Kevin Wolf -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240119135748.270944-7-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit d3f6f294aeadd5f88caf0155e4360808c95b3146) -Signed-off-by: Stefan Hajnoczi ---- - hw/block/virtio-blk.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 81de06c9f6..0b9100b746 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -1809,14 +1809,14 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) - smp_wmb(); /* paired with aio_notify_accept() on the read side */ - - /* Get this show started by hooking up our callbacks */ -- if (!blk_in_drain(s->conf.conf.blk)) { -- for (i = 0; i < nvqs; i++) { -- VirtQueue *vq = virtio_get_queue(vdev, i); -- AioContext *ctx = s->vq_aio_context[i]; -+ for (i = 0; i < nvqs; i++) { -+ VirtQueue *vq = virtio_get_queue(vdev, i); -+ AioContext *ctx = s->vq_aio_context[i]; - -- /* Kick right away to begin processing requests already in vring */ -- event_notifier_set(virtio_queue_get_host_notifier(vq)); -+ /* Kick right away to begin processing requests already in vring */ -+ event_notifier_set(virtio_queue_get_host_notifier(vq)); - -+ if (!blk_in_drain(s->conf.conf.blk)) { - virtio_queue_aio_attach_host_notifier(vq, ctx); - } - } --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch b/SOURCES/kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch deleted file mode 100644 index 8d93bf6..0000000 --- a/SOURCES/kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch +++ /dev/null @@ -1,72 +0,0 @@ -From f62b56c68d50a149a07e15797bf3605e63b2c501 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 22 Jan 2024 12:26:25 -0500 -Subject: [PATCH 4/6] virtio-blk: avoid using ioeventfd state in irqfd - conditional - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 224: virtio-blk: avoid using ioeventfd state in irqfd conditional -RH-Jira: RHEL-15394 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [1/1] 8f24084669db52457e55e2523b9f56f5560dd6ce (stefanha/centos-stream-qemu-kvm) - -Requests that complete in an IOThread use irqfd to notify the guest -while requests that complete in the main loop thread use the traditional -qdev irq code path. The reason for this conditional is that the irq code -path requires the BQL: - - if (s->ioeventfd_started && !s->ioeventfd_disabled) { - virtio_notify_irqfd(vdev, req->vq); - } else { - virtio_notify(vdev, req->vq); - } - -There is a corner case where the conditional invokes the irq code path -instead of the irqfd code path: - - static void virtio_blk_stop_ioeventfd(VirtIODevice *vdev) - { - ... - /* - * Set ->ioeventfd_started to false before draining so that host notifiers - * are not detached/attached anymore. - */ - s->ioeventfd_started = false; - - /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ - blk_drain(s->conf.conf.blk); - -During blk_drain() the conditional produces the wrong result because -ioeventfd_started is false. - -Use qemu_in_iothread() instead of checking the ioeventfd state. - -Cc: qemu-stable@nongnu.org -Buglink: https://issues.redhat.com/browse/RHEL-15394 -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240122172625.415386-1-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit bfa36802d1704fc413c590ebdcc4e5ae0eacf439) -Signed-off-by: Stefan Hajnoczi ---- - hw/block/virtio-blk.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 7fdeaf2d12..2ae2f6a823 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -66,7 +66,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) - iov_discard_undo(&req->inhdr_undo); - iov_discard_undo(&req->outhdr_undo); - virtqueue_push(req->vq, &req->elem, req->in_len); -- if (s->ioeventfd_started && !s->ioeventfd_disabled) { -+ if (qemu_in_iothread()) { - virtio_notify_irqfd(vdev, req->vq); - } else { - virtio_notify(vdev, req->vq); --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch b/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch deleted file mode 100644 index be3c7db..0000000 --- a/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch +++ /dev/null @@ -1,167 +0,0 @@ -From a2069ff76637365cacf5b96f9427b98a6ca2c9ba Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 14 Sep 2023 10:01:00 -0400 -Subject: [PATCH 075/101] virtio-blk: don't lock AioContext in the completion - code path - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [6/26] 3426f62c2156f6967bb4ffbce75a4ff46d3312a3 (kmwolf/centos-qemu-kvm) - -Nothing in the completion code path relies on the AioContext lock -anymore. Virtqueues are only accessed from one thread at any moment and -the s->rq global state is protected by its own lock now. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20230914140101.1065008-4-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Kevin Wolf ---- - hw/block/virtio-blk.c | 34 ++++------------------------------ - 1 file changed, 4 insertions(+), 30 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index ee38e089bc..f5315df042 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -105,7 +105,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret) - VirtIOBlock *s = next->dev; - VirtIODevice *vdev = VIRTIO_DEVICE(s); - -- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); - while (next) { - VirtIOBlockReq *req = next; - next = req->mr_next; -@@ -138,7 +137,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret) - block_acct_done(blk_get_stats(s->blk), &req->acct); - virtio_blk_free_request(req); - } -- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - } - - static void virtio_blk_flush_complete(void *opaque, int ret) -@@ -146,19 +144,13 @@ static void virtio_blk_flush_complete(void *opaque, int ret) - VirtIOBlockReq *req = opaque; - VirtIOBlock *s = req->dev; - -- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); -- if (ret) { -- if (virtio_blk_handle_rw_error(req, -ret, 0, true)) { -- goto out; -- } -+ if (ret && virtio_blk_handle_rw_error(req, -ret, 0, true)) { -+ return; - } - - virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); - block_acct_done(blk_get_stats(s->blk), &req->acct); - virtio_blk_free_request(req); -- --out: -- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - } - - static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) -@@ -168,11 +160,8 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) - bool is_write_zeroes = (virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type) & - ~VIRTIO_BLK_T_BARRIER) == VIRTIO_BLK_T_WRITE_ZEROES; - -- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); -- if (ret) { -- if (virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) { -- goto out; -- } -+ if (ret && virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) { -+ return; - } - - virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); -@@ -180,9 +169,6 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) - block_acct_done(blk_get_stats(s->blk), &req->acct); - } - virtio_blk_free_request(req); -- --out: -- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - } - - #ifdef __linux__ -@@ -229,10 +215,8 @@ static void virtio_blk_ioctl_complete(void *opaque, int status) - virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len); - - out: -- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); - virtio_blk_req_complete(req, status); - virtio_blk_free_request(req); -- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - g_free(ioctl_req); - } - -@@ -672,7 +656,6 @@ static void virtio_blk_zone_report_complete(void *opaque, int ret) - { - ZoneCmdData *data = opaque; - VirtIOBlockReq *req = data->req; -- VirtIOBlock *s = req->dev; - VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); - struct iovec *in_iov = data->in_iov; - unsigned in_num = data->in_num; -@@ -763,10 +746,8 @@ static void virtio_blk_zone_report_complete(void *opaque, int ret) - } - - out: -- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); - virtio_blk_req_complete(req, err_status); - virtio_blk_free_request(req); -- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - g_free(data->zone_report_data.zones); - g_free(data); - } -@@ -829,10 +810,8 @@ static void virtio_blk_zone_mgmt_complete(void *opaque, int ret) - err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; - } - -- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); - virtio_blk_req_complete(req, err_status); - virtio_blk_free_request(req); -- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - } - - static int virtio_blk_handle_zone_mgmt(VirtIOBlockReq *req, BlockZoneOp op) -@@ -882,7 +861,6 @@ static void virtio_blk_zone_append_complete(void *opaque, int ret) - { - ZoneCmdData *data = opaque; - VirtIOBlockReq *req = data->req; -- VirtIOBlock *s = req->dev; - VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); - int64_t append_sector, n; - uint8_t err_status = VIRTIO_BLK_S_OK; -@@ -905,10 +883,8 @@ static void virtio_blk_zone_append_complete(void *opaque, int ret) - trace_virtio_blk_zone_append_complete(vdev, req, append_sector, ret); - - out: -- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); - virtio_blk_req_complete(req, err_status); - virtio_blk_free_request(req); -- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - g_free(data); - } - -@@ -944,10 +920,8 @@ static int virtio_blk_handle_zone_append(VirtIOBlockReq *req, - return 0; - - out: -- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); - virtio_blk_req_complete(req, err_status); - virtio_blk_free_request(req); -- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - return err_status; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch b/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch deleted file mode 100644 index c31fcca..0000000 --- a/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 2816f6ce20c496e21947f215112be34a5cb93606 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 14 Sep 2023 10:01:01 -0400 -Subject: [PATCH 076/101] virtio-blk: don't lock AioContext in the submission - code path - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [7/26] e0de2744cb319569ea008334e45ee5fc2ba9b6d7 (kmwolf/centos-qemu-kvm) - -There is no need to acquire the AioContext lock around blk_aio_*() or -blk_get_geometry() anymore. I/O plugging (defer_call()) also does not -require the AioContext lock anymore. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20230914140101.1065008-5-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Kevin Wolf ---- - hw/block/virtio-blk.c | 5 ----- - 1 file changed, 5 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index f5315df042..e110f9718b 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -1111,7 +1111,6 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) - MultiReqBuffer mrb = {}; - bool suppress_notifications = virtio_queue_get_notification(vq); - -- aio_context_acquire(blk_get_aio_context(s->blk)); - defer_call_begin(); - - do { -@@ -1137,7 +1136,6 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) - } - - defer_call_end(); -- aio_context_release(blk_get_aio_context(s->blk)); - } - - static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) -@@ -1168,7 +1166,6 @@ static void virtio_blk_dma_restart_bh(void *opaque) - s->rq = NULL; - } - -- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); - while (req) { - VirtIOBlockReq *next = req->next; - if (virtio_blk_handle_request(req, &mrb)) { -@@ -1192,8 +1189,6 @@ static void virtio_blk_dma_restart_bh(void *opaque) - - /* Paired with inc in virtio_blk_dma_restart_cb() */ - blk_dec_in_flight(s->conf.conf.blk); -- -- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - } - - static void virtio_blk_dma_restart_cb(void *opaque, bool running, --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch b/SOURCES/kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch deleted file mode 100644 index 3fb8211..0000000 --- a/SOURCES/kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch +++ /dev/null @@ -1,1009 +0,0 @@ -From d9be1e1f199ee3171455636f32f3ba59b57e9351 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Fri, 19 Jan 2024 08:57:43 -0500 -Subject: [PATCH 14/22] virtio-blk: move dataplane code into virtio-blk.c - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [10/17] ad854c6c7e808da272bd07229e8c915c1ee6f296 (stefanha/centos-stream-qemu-kvm) - -The dataplane code used to be significantly different from the -non-dataplane code and therefore had a separate source file. - -Over time the difference has gotten smaller because the I/O code paths -were unified. Nowadays the distinction between the VirtIOBlock and -VirtIOBlockDataPlane structs is more of an inconvenience that hinders -code simplification. - -Move hw/block/dataplane/virtio-blk.c into hw/block/virtio-blk.c, merging -VirtIOBlockDataPlane's fields into VirtIOBlock. - -hw/block/virtio-blk.c used VirtIOBlock->dataplane to check if -virtio_blk_data_plane_create() was successful. This is not necessary -because ->dataplane_started and ->dataplane_disabled can be used -instead. This patch makes those changes in order to drop -VirtIOBlock->dataplane. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240119135748.270944-2-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 3bcc17f06526754fd675dcf601414442044fa0b6) -Signed-off-by: Stefan Hajnoczi - -Conflicts: - hw/block/dataplane/virtio-blk.c - Downstream is missing commit 0b2675c473f6 ("Rename "QEMU global mutex" - to "BQL" in comments and docs") so the source file still contains old - "QEMU global mutex held" comments instead of the new "BQL held" - phrasing. The code moved into hw/block/virtio-blk.c by this patch uses - the new "BQL held" phrasing so to minimize conflicts in future - backports. Either way, this is not a code change and therefore no risk - in introducing bugs. ---- - hw/block/dataplane/meson.build | 1 - - hw/block/dataplane/trace-events | 5 - - hw/block/dataplane/trace.h | 1 - - hw/block/dataplane/virtio-blk.c | 404 -------------------------------- - hw/block/dataplane/virtio-blk.h | 34 --- - hw/block/virtio-blk.c | 362 ++++++++++++++++++++++++++-- - include/hw/virtio/virtio-blk.h | 12 +- - meson.build | 1 - - 8 files changed, 357 insertions(+), 463 deletions(-) - delete mode 100644 hw/block/dataplane/trace-events - delete mode 100644 hw/block/dataplane/trace.h - delete mode 100644 hw/block/dataplane/virtio-blk.c - delete mode 100644 hw/block/dataplane/virtio-blk.h - -diff --git a/hw/block/dataplane/meson.build b/hw/block/dataplane/meson.build -index 025b3b061b..11a5eba2f4 100644 ---- a/hw/block/dataplane/meson.build -+++ b/hw/block/dataplane/meson.build -@@ -1,2 +1 @@ --system_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) - specific_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c')) -diff --git a/hw/block/dataplane/trace-events b/hw/block/dataplane/trace-events -deleted file mode 100644 -index 38fc3e7507..0000000000 ---- a/hw/block/dataplane/trace-events -+++ /dev/null -@@ -1,5 +0,0 @@ --# See docs/devel/tracing.rst for syntax documentation. -- --# virtio-blk.c --virtio_blk_data_plane_start(void *s) "dataplane %p" --virtio_blk_data_plane_stop(void *s) "dataplane %p" -diff --git a/hw/block/dataplane/trace.h b/hw/block/dataplane/trace.h -deleted file mode 100644 -index 240cc59834..0000000000 ---- a/hw/block/dataplane/trace.h -+++ /dev/null -@@ -1 +0,0 @@ --#include "trace/trace-hw_block_dataplane.h" -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -deleted file mode 100644 -index 97a302cf49..0000000000 ---- a/hw/block/dataplane/virtio-blk.c -+++ /dev/null -@@ -1,404 +0,0 @@ --/* -- * Dedicated thread for virtio-blk I/O processing -- * -- * Copyright 2012 IBM, Corp. -- * Copyright 2012 Red Hat, Inc. and/or its affiliates -- * -- * Authors: -- * Stefan Hajnoczi -- * -- * This work is licensed under the terms of the GNU GPL, version 2 or later. -- * See the COPYING file in the top-level directory. -- * -- */ -- --#include "qemu/osdep.h" --#include "qapi/error.h" --#include "trace.h" --#include "qemu/iov.h" --#include "qemu/main-loop.h" --#include "qemu/thread.h" --#include "qemu/error-report.h" --#include "hw/virtio/virtio-blk.h" --#include "virtio-blk.h" --#include "block/aio.h" --#include "hw/virtio/virtio-bus.h" --#include "qom/object_interfaces.h" -- --struct VirtIOBlockDataPlane { -- bool starting; -- bool stopping; -- -- VirtIOBlkConf *conf; -- VirtIODevice *vdev; -- -- /* -- * The AioContext for each virtqueue. The BlockDriverState will use the -- * first element as its AioContext. -- */ -- AioContext **vq_aio_context; --}; -- --/* Raise an interrupt to signal guest, if necessary */ --void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq) --{ -- virtio_notify_irqfd(s->vdev, vq); --} -- --/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ --static void --apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, -- AioContext **vq_aio_context, uint16_t num_queues) --{ -- IOThreadVirtQueueMappingList *node; -- size_t num_iothreads = 0; -- size_t cur_iothread = 0; -- -- for (node = iothread_vq_mapping_list; node; node = node->next) { -- num_iothreads++; -- } -- -- for (node = iothread_vq_mapping_list; node; node = node->next) { -- IOThread *iothread = iothread_by_id(node->value->iothread); -- AioContext *ctx = iothread_get_aio_context(iothread); -- -- /* Released in virtio_blk_data_plane_destroy() */ -- object_ref(OBJECT(iothread)); -- -- if (node->value->vqs) { -- uint16List *vq; -- -- /* Explicit vq:IOThread assignment */ -- for (vq = node->value->vqs; vq; vq = vq->next) { -- vq_aio_context[vq->value] = ctx; -- } -- } else { -- /* Round-robin vq:IOThread assignment */ -- for (unsigned i = cur_iothread; i < num_queues; -- i += num_iothreads) { -- vq_aio_context[i] = ctx; -- } -- } -- -- cur_iothread++; -- } --} -- --/* Context: QEMU global mutex held */ --bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, -- VirtIOBlockDataPlane **dataplane, -- Error **errp) --{ -- VirtIOBlockDataPlane *s; -- BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); -- VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); -- -- *dataplane = NULL; -- -- if (conf->iothread || conf->iothread_vq_mapping_list) { -- if (!k->set_guest_notifiers || !k->ioeventfd_assign) { -- error_setg(errp, -- "device is incompatible with iothread " -- "(transport does not support notifiers)"); -- return false; -- } -- if (!virtio_device_ioeventfd_enabled(vdev)) { -- error_setg(errp, "ioeventfd is required for iothread"); -- return false; -- } -- -- /* If dataplane is (re-)enabled while the guest is running there could -- * be block jobs that can conflict. -- */ -- if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { -- error_prepend(errp, "cannot start virtio-blk dataplane: "); -- return false; -- } -- } -- /* Don't try if transport does not support notifiers. */ -- if (!virtio_device_ioeventfd_enabled(vdev)) { -- return false; -- } -- -- s = g_new0(VirtIOBlockDataPlane, 1); -- s->vdev = vdev; -- s->conf = conf; -- s->vq_aio_context = g_new(AioContext *, conf->num_queues); -- -- if (conf->iothread_vq_mapping_list) { -- apply_vq_mapping(conf->iothread_vq_mapping_list, s->vq_aio_context, -- conf->num_queues); -- } else if (conf->iothread) { -- AioContext *ctx = iothread_get_aio_context(conf->iothread); -- for (unsigned i = 0; i < conf->num_queues; i++) { -- s->vq_aio_context[i] = ctx; -- } -- -- /* Released in virtio_blk_data_plane_destroy() */ -- object_ref(OBJECT(conf->iothread)); -- } else { -- AioContext *ctx = qemu_get_aio_context(); -- for (unsigned i = 0; i < conf->num_queues; i++) { -- s->vq_aio_context[i] = ctx; -- } -- } -- -- *dataplane = s; -- -- return true; --} -- --/* Context: QEMU global mutex held */ --void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) --{ -- VirtIOBlock *vblk; -- VirtIOBlkConf *conf; -- -- if (!s) { -- return; -- } -- -- vblk = VIRTIO_BLK(s->vdev); -- assert(!vblk->dataplane_started); -- conf = s->conf; -- -- if (conf->iothread_vq_mapping_list) { -- IOThreadVirtQueueMappingList *node; -- -- for (node = conf->iothread_vq_mapping_list; node; node = node->next) { -- IOThread *iothread = iothread_by_id(node->value->iothread); -- object_unref(OBJECT(iothread)); -- } -- } -- -- if (conf->iothread) { -- object_unref(OBJECT(conf->iothread)); -- } -- -- g_free(s->vq_aio_context); -- g_free(s); --} -- --/* Context: QEMU global mutex held */ --int virtio_blk_data_plane_start(VirtIODevice *vdev) --{ -- VirtIOBlock *vblk = VIRTIO_BLK(vdev); -- VirtIOBlockDataPlane *s = vblk->dataplane; -- BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vblk))); -- VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); -- unsigned i; -- unsigned nvqs = s->conf->num_queues; -- Error *local_err = NULL; -- int r; -- -- if (vblk->dataplane_started || s->starting) { -- return 0; -- } -- -- s->starting = true; -- -- /* Set up guest notifier (irq) */ -- r = k->set_guest_notifiers(qbus->parent, nvqs, true); -- if (r != 0) { -- error_report("virtio-blk failed to set guest notifier (%d), " -- "ensure -accel kvm is set.", r); -- goto fail_guest_notifiers; -- } -- -- /* -- * Batch all the host notifiers in a single transaction to avoid -- * quadratic time complexity in address_space_update_ioeventfds(). -- */ -- memory_region_transaction_begin(); -- -- /* Set up virtqueue notify */ -- for (i = 0; i < nvqs; i++) { -- r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, true); -- if (r != 0) { -- int j = i; -- -- fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); -- while (i--) { -- virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -- } -- -- /* -- * The transaction expects the ioeventfds to be open when it -- * commits. Do it now, before the cleanup loop. -- */ -- memory_region_transaction_commit(); -- -- while (j--) { -- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), j); -- } -- goto fail_host_notifiers; -- } -- } -- -- memory_region_transaction_commit(); -- -- trace_virtio_blk_data_plane_start(s); -- -- r = blk_set_aio_context(s->conf->conf.blk, s->vq_aio_context[0], -- &local_err); -- if (r < 0) { -- error_report_err(local_err); -- goto fail_aio_context; -- } -- -- /* -- * These fields must be visible to the IOThread when it processes the -- * virtqueue, otherwise it will think dataplane has not started yet. -- * -- * Make sure ->dataplane_started is false when blk_set_aio_context() is -- * called above so that draining does not cause the host notifier to be -- * detached/attached prematurely. -- */ -- s->starting = false; -- vblk->dataplane_started = true; -- smp_wmb(); /* paired with aio_notify_accept() on the read side */ -- -- /* Get this show started by hooking up our callbacks */ -- if (!blk_in_drain(s->conf->conf.blk)) { -- for (i = 0; i < nvqs; i++) { -- VirtQueue *vq = virtio_get_queue(s->vdev, i); -- AioContext *ctx = s->vq_aio_context[i]; -- -- /* Kick right away to begin processing requests already in vring */ -- event_notifier_set(virtio_queue_get_host_notifier(vq)); -- -- virtio_queue_aio_attach_host_notifier(vq, ctx); -- } -- } -- return 0; -- -- fail_aio_context: -- memory_region_transaction_begin(); -- -- for (i = 0; i < nvqs; i++) { -- virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -- } -- -- memory_region_transaction_commit(); -- -- for (i = 0; i < nvqs; i++) { -- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); -- } -- fail_host_notifiers: -- k->set_guest_notifiers(qbus->parent, nvqs, false); -- fail_guest_notifiers: -- vblk->dataplane_disabled = true; -- s->starting = false; -- return -ENOSYS; --} -- --/* Stop notifications for new requests from guest. -- * -- * Context: BH in IOThread -- */ --static void virtio_blk_data_plane_stop_vq_bh(void *opaque) --{ -- VirtQueue *vq = opaque; -- EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); -- -- virtio_queue_aio_detach_host_notifier(vq, qemu_get_current_aio_context()); -- -- /* -- * Test and clear notifier after disabling event, in case poll callback -- * didn't have time to run. -- */ -- virtio_queue_host_notifier_read(host_notifier); --} -- --/* Context: QEMU global mutex held */ --void virtio_blk_data_plane_stop(VirtIODevice *vdev) --{ -- VirtIOBlock *vblk = VIRTIO_BLK(vdev); -- VirtIOBlockDataPlane *s = vblk->dataplane; -- BusState *qbus = qdev_get_parent_bus(DEVICE(vblk)); -- VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); -- unsigned i; -- unsigned nvqs = s->conf->num_queues; -- -- if (!vblk->dataplane_started || s->stopping) { -- return; -- } -- -- /* Better luck next time. */ -- if (vblk->dataplane_disabled) { -- vblk->dataplane_disabled = false; -- vblk->dataplane_started = false; -- return; -- } -- s->stopping = true; -- trace_virtio_blk_data_plane_stop(s); -- -- if (!blk_in_drain(s->conf->conf.blk)) { -- for (i = 0; i < nvqs; i++) { -- VirtQueue *vq = virtio_get_queue(s->vdev, i); -- AioContext *ctx = s->vq_aio_context[i]; -- -- aio_wait_bh_oneshot(ctx, virtio_blk_data_plane_stop_vq_bh, vq); -- } -- } -- -- /* -- * Batch all the host notifiers in a single transaction to avoid -- * quadratic time complexity in address_space_update_ioeventfds(). -- */ -- memory_region_transaction_begin(); -- -- for (i = 0; i < nvqs; i++) { -- virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -- } -- -- /* -- * The transaction expects the ioeventfds to be open when it -- * commits. Do it now, before the cleanup loop. -- */ -- memory_region_transaction_commit(); -- -- for (i = 0; i < nvqs; i++) { -- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); -- } -- -- /* -- * Set ->dataplane_started to false before draining so that host notifiers -- * are not detached/attached anymore. -- */ -- vblk->dataplane_started = false; -- -- /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ -- blk_drain(s->conf->conf.blk); -- -- /* -- * Try to switch bs back to the QEMU main loop. If other users keep the -- * BlockBackend in the iothread, that's ok -- */ -- blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context(), NULL); -- -- /* Clean up guest notifier (irq) */ -- k->set_guest_notifiers(qbus->parent, nvqs, false); -- -- s->stopping = false; --} -- --void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s) --{ -- VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); -- -- for (uint16_t i = 0; i < s->conf->num_queues; i++) { -- VirtQueue *vq = virtio_get_queue(vdev, i); -- virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); -- } --} -- --void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s) --{ -- VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); -- -- for (uint16_t i = 0; i < s->conf->num_queues; i++) { -- VirtQueue *vq = virtio_get_queue(vdev, i); -- virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); -- } --} -diff --git a/hw/block/dataplane/virtio-blk.h b/hw/block/dataplane/virtio-blk.h -deleted file mode 100644 -index 1a806fe447..0000000000 ---- a/hw/block/dataplane/virtio-blk.h -+++ /dev/null -@@ -1,34 +0,0 @@ --/* -- * Dedicated thread for virtio-blk I/O processing -- * -- * Copyright 2012 IBM, Corp. -- * Copyright 2012 Red Hat, Inc. and/or its affiliates -- * -- * Authors: -- * Stefan Hajnoczi -- * -- * This work is licensed under the terms of the GNU GPL, version 2 or later. -- * See the COPYING file in the top-level directory. -- * -- */ -- --#ifndef HW_DATAPLANE_VIRTIO_BLK_H --#define HW_DATAPLANE_VIRTIO_BLK_H -- --#include "hw/virtio/virtio.h" -- --typedef struct VirtIOBlockDataPlane VirtIOBlockDataPlane; -- --bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, -- VirtIOBlockDataPlane **dataplane, -- Error **errp); --void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s); --void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq); -- --int virtio_blk_data_plane_start(VirtIODevice *vdev); --void virtio_blk_data_plane_stop(VirtIODevice *vdev); -- --void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s); --void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s); -- --#endif /* HW_DATAPLANE_VIRTIO_BLK_H */ -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 46e73b2c96..cb623069f8 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -27,7 +27,6 @@ - #include "sysemu/sysemu.h" - #include "sysemu/runstate.h" - #include "hw/virtio/virtio-blk.h" --#include "dataplane/virtio-blk.h" - #include "scsi/constants.h" - #ifdef __linux__ - # include -@@ -66,7 +65,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) - iov_discard_undo(&req->outhdr_undo); - virtqueue_push(req->vq, &req->elem, req->in_len); - if (s->dataplane_started && !s->dataplane_disabled) { -- virtio_blk_data_plane_notify(s->dataplane, req->vq); -+ virtio_notify_irqfd(vdev, req->vq); - } else { - virtio_notify(vdev, req->vq); - } -@@ -1142,7 +1141,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) - { - VirtIOBlock *s = (VirtIOBlock *)vdev; - -- if (s->dataplane && !s->dataplane_started) { -+ if (!s->dataplane_disabled && !s->dataplane_started) { - /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start - * dataplane here instead of waiting for .set_status(). - */ -@@ -1546,16 +1545,34 @@ static void virtio_blk_resize(void *opaque) - aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); - } - -+static void virtio_blk_data_plane_detach(VirtIOBlock *s) -+{ -+ VirtIODevice *vdev = VIRTIO_DEVICE(s); -+ -+ for (uint16_t i = 0; i < s->conf.num_queues; i++) { -+ VirtQueue *vq = virtio_get_queue(vdev, i); -+ virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); -+ } -+} -+ -+static void virtio_blk_data_plane_attach(VirtIOBlock *s) -+{ -+ VirtIODevice *vdev = VIRTIO_DEVICE(s); -+ -+ for (uint16_t i = 0; i < s->conf.num_queues; i++) { -+ VirtQueue *vq = virtio_get_queue(vdev, i); -+ virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); -+ } -+} -+ - /* Suspend virtqueue ioeventfd processing during drain */ - static void virtio_blk_drained_begin(void *opaque) - { - VirtIOBlock *s = opaque; - -- if (!s->dataplane || !s->dataplane_started) { -- return; -+ if (s->dataplane_started) { -+ virtio_blk_data_plane_detach(s); - } -- -- virtio_blk_data_plane_detach(s->dataplane); - } - - /* Resume virtqueue ioeventfd processing after drain */ -@@ -1563,11 +1580,9 @@ static void virtio_blk_drained_end(void *opaque) - { - VirtIOBlock *s = opaque; - -- if (!s->dataplane || !s->dataplane_started) { -- return; -+ if (s->dataplane_started) { -+ virtio_blk_data_plane_attach(s); - } -- -- virtio_blk_data_plane_attach(s->dataplane); - } - - static const BlockDevOps virtio_block_ops = { -@@ -1576,6 +1591,326 @@ static const BlockDevOps virtio_block_ops = { - .drained_end = virtio_blk_drained_end, - }; - -+/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ -+static void -+apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, -+ AioContext **vq_aio_context, uint16_t num_queues) -+{ -+ IOThreadVirtQueueMappingList *node; -+ size_t num_iothreads = 0; -+ size_t cur_iothread = 0; -+ -+ for (node = iothread_vq_mapping_list; node; node = node->next) { -+ num_iothreads++; -+ } -+ -+ for (node = iothread_vq_mapping_list; node; node = node->next) { -+ IOThread *iothread = iothread_by_id(node->value->iothread); -+ AioContext *ctx = iothread_get_aio_context(iothread); -+ -+ /* Released in virtio_blk_data_plane_destroy() */ -+ object_ref(OBJECT(iothread)); -+ -+ if (node->value->vqs) { -+ uint16List *vq; -+ -+ /* Explicit vq:IOThread assignment */ -+ for (vq = node->value->vqs; vq; vq = vq->next) { -+ vq_aio_context[vq->value] = ctx; -+ } -+ } else { -+ /* Round-robin vq:IOThread assignment */ -+ for (unsigned i = cur_iothread; i < num_queues; -+ i += num_iothreads) { -+ vq_aio_context[i] = ctx; -+ } -+ } -+ -+ cur_iothread++; -+ } -+} -+ -+/* Context: BQL held */ -+static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) -+{ -+ VirtIODevice *vdev = VIRTIO_DEVICE(s); -+ VirtIOBlkConf *conf = &s->conf; -+ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); -+ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); -+ -+ if (conf->iothread || conf->iothread_vq_mapping_list) { -+ if (!k->set_guest_notifiers || !k->ioeventfd_assign) { -+ error_setg(errp, -+ "device is incompatible with iothread " -+ "(transport does not support notifiers)"); -+ return false; -+ } -+ if (!virtio_device_ioeventfd_enabled(vdev)) { -+ error_setg(errp, "ioeventfd is required for iothread"); -+ return false; -+ } -+ -+ /* -+ * If dataplane is (re-)enabled while the guest is running there could -+ * be block jobs that can conflict. -+ */ -+ if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { -+ error_prepend(errp, "cannot start virtio-blk dataplane: "); -+ return false; -+ } -+ } -+ /* Don't try if transport does not support notifiers. */ -+ if (!virtio_device_ioeventfd_enabled(vdev)) { -+ s->dataplane_disabled = true; -+ return false; -+ } -+ -+ s->vq_aio_context = g_new(AioContext *, conf->num_queues); -+ -+ if (conf->iothread_vq_mapping_list) { -+ apply_vq_mapping(conf->iothread_vq_mapping_list, s->vq_aio_context, -+ conf->num_queues); -+ } else if (conf->iothread) { -+ AioContext *ctx = iothread_get_aio_context(conf->iothread); -+ for (unsigned i = 0; i < conf->num_queues; i++) { -+ s->vq_aio_context[i] = ctx; -+ } -+ -+ /* Released in virtio_blk_data_plane_destroy() */ -+ object_ref(OBJECT(conf->iothread)); -+ } else { -+ AioContext *ctx = qemu_get_aio_context(); -+ for (unsigned i = 0; i < conf->num_queues; i++) { -+ s->vq_aio_context[i] = ctx; -+ } -+ } -+ -+ return true; -+} -+ -+/* Context: BQL held */ -+static void virtio_blk_data_plane_destroy(VirtIOBlock *s) -+{ -+ VirtIOBlkConf *conf = &s->conf; -+ -+ assert(!s->dataplane_started); -+ -+ if (conf->iothread_vq_mapping_list) { -+ IOThreadVirtQueueMappingList *node; -+ -+ for (node = conf->iothread_vq_mapping_list; node; node = node->next) { -+ IOThread *iothread = iothread_by_id(node->value->iothread); -+ object_unref(OBJECT(iothread)); -+ } -+ } -+ -+ if (conf->iothread) { -+ object_unref(OBJECT(conf->iothread)); -+ } -+ -+ g_free(s->vq_aio_context); -+ s->vq_aio_context = NULL; -+} -+ -+/* Context: BQL held */ -+static int virtio_blk_data_plane_start(VirtIODevice *vdev) -+{ -+ VirtIOBlock *s = VIRTIO_BLK(vdev); -+ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); -+ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); -+ unsigned i; -+ unsigned nvqs = s->conf.num_queues; -+ Error *local_err = NULL; -+ int r; -+ -+ if (s->dataplane_started || s->dataplane_starting) { -+ return 0; -+ } -+ -+ s->dataplane_starting = true; -+ -+ /* Set up guest notifier (irq) */ -+ r = k->set_guest_notifiers(qbus->parent, nvqs, true); -+ if (r != 0) { -+ error_report("virtio-blk failed to set guest notifier (%d), " -+ "ensure -accel kvm is set.", r); -+ goto fail_guest_notifiers; -+ } -+ -+ /* -+ * Batch all the host notifiers in a single transaction to avoid -+ * quadratic time complexity in address_space_update_ioeventfds(). -+ */ -+ memory_region_transaction_begin(); -+ -+ /* Set up virtqueue notify */ -+ for (i = 0; i < nvqs; i++) { -+ r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, true); -+ if (r != 0) { -+ int j = i; -+ -+ fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); -+ while (i--) { -+ virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -+ } -+ -+ /* -+ * The transaction expects the ioeventfds to be open when it -+ * commits. Do it now, before the cleanup loop. -+ */ -+ memory_region_transaction_commit(); -+ -+ while (j--) { -+ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), j); -+ } -+ goto fail_host_notifiers; -+ } -+ } -+ -+ memory_region_transaction_commit(); -+ -+ r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0], -+ &local_err); -+ if (r < 0) { -+ error_report_err(local_err); -+ goto fail_aio_context; -+ } -+ -+ /* -+ * These fields must be visible to the IOThread when it processes the -+ * virtqueue, otherwise it will think dataplane has not started yet. -+ * -+ * Make sure ->dataplane_started is false when blk_set_aio_context() is -+ * called above so that draining does not cause the host notifier to be -+ * detached/attached prematurely. -+ */ -+ s->dataplane_starting = false; -+ s->dataplane_started = true; -+ smp_wmb(); /* paired with aio_notify_accept() on the read side */ -+ -+ /* Get this show started by hooking up our callbacks */ -+ if (!blk_in_drain(s->conf.conf.blk)) { -+ for (i = 0; i < nvqs; i++) { -+ VirtQueue *vq = virtio_get_queue(vdev, i); -+ AioContext *ctx = s->vq_aio_context[i]; -+ -+ /* Kick right away to begin processing requests already in vring */ -+ event_notifier_set(virtio_queue_get_host_notifier(vq)); -+ -+ virtio_queue_aio_attach_host_notifier(vq, ctx); -+ } -+ } -+ return 0; -+ -+ fail_aio_context: -+ memory_region_transaction_begin(); -+ -+ for (i = 0; i < nvqs; i++) { -+ virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -+ } -+ -+ memory_region_transaction_commit(); -+ -+ for (i = 0; i < nvqs; i++) { -+ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); -+ } -+ fail_host_notifiers: -+ k->set_guest_notifiers(qbus->parent, nvqs, false); -+ fail_guest_notifiers: -+ s->dataplane_disabled = true; -+ s->dataplane_starting = false; -+ return -ENOSYS; -+} -+ -+/* Stop notifications for new requests from guest. -+ * -+ * Context: BH in IOThread -+ */ -+static void virtio_blk_data_plane_stop_vq_bh(void *opaque) -+{ -+ VirtQueue *vq = opaque; -+ EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); -+ -+ virtio_queue_aio_detach_host_notifier(vq, qemu_get_current_aio_context()); -+ -+ /* -+ * Test and clear notifier after disabling event, in case poll callback -+ * didn't have time to run. -+ */ -+ virtio_queue_host_notifier_read(host_notifier); -+} -+ -+/* Context: BQL held */ -+static void virtio_blk_data_plane_stop(VirtIODevice *vdev) -+{ -+ VirtIOBlock *s = VIRTIO_BLK(vdev); -+ BusState *qbus = qdev_get_parent_bus(DEVICE(s)); -+ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); -+ unsigned i; -+ unsigned nvqs = s->conf.num_queues; -+ -+ if (!s->dataplane_started || s->dataplane_stopping) { -+ return; -+ } -+ -+ /* Better luck next time. */ -+ if (s->dataplane_disabled) { -+ s->dataplane_disabled = false; -+ s->dataplane_started = false; -+ return; -+ } -+ s->dataplane_stopping = true; -+ -+ if (!blk_in_drain(s->conf.conf.blk)) { -+ for (i = 0; i < nvqs; i++) { -+ VirtQueue *vq = virtio_get_queue(vdev, i); -+ AioContext *ctx = s->vq_aio_context[i]; -+ -+ aio_wait_bh_oneshot(ctx, virtio_blk_data_plane_stop_vq_bh, vq); -+ } -+ } -+ -+ /* -+ * Batch all the host notifiers in a single transaction to avoid -+ * quadratic time complexity in address_space_update_ioeventfds(). -+ */ -+ memory_region_transaction_begin(); -+ -+ for (i = 0; i < nvqs; i++) { -+ virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -+ } -+ -+ /* -+ * The transaction expects the ioeventfds to be open when it -+ * commits. Do it now, before the cleanup loop. -+ */ -+ memory_region_transaction_commit(); -+ -+ for (i = 0; i < nvqs; i++) { -+ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); -+ } -+ -+ /* -+ * Set ->dataplane_started to false before draining so that host notifiers -+ * are not detached/attached anymore. -+ */ -+ s->dataplane_started = false; -+ -+ /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ -+ blk_drain(s->conf.conf.blk); -+ -+ /* -+ * Try to switch bs back to the QEMU main loop. If other users keep the -+ * BlockBackend in the iothread, that's ok -+ */ -+ blk_set_aio_context(s->conf.conf.blk, qemu_get_aio_context(), NULL); -+ -+ /* Clean up guest notifier (irq) */ -+ k->set_guest_notifiers(qbus->parent, nvqs, false); -+ -+ s->dataplane_stopping = false; -+} -+ - static void virtio_blk_device_realize(DeviceState *dev, Error **errp) - { - VirtIODevice *vdev = VIRTIO_DEVICE(dev); -@@ -1680,7 +2015,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) - virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); - } - qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); -- virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); -+ virtio_blk_data_plane_create(s, &err); - if (err != NULL) { - error_propagate(errp, err); - for (i = 0; i < conf->num_queues; i++) { -@@ -1717,8 +2052,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) - - blk_drain(s->blk); - del_boot_device_lchs(dev, "/disk@0,0"); -- virtio_blk_data_plane_destroy(s->dataplane); -- s->dataplane = NULL; -+ virtio_blk_data_plane_destroy(s); - for (i = 0; i < conf->num_queues; i++) { - virtio_del_queue(vdev, i); - } -diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h -index 5e4091e4da..fecffdc303 100644 ---- a/include/hw/virtio/virtio-blk.h -+++ b/include/hw/virtio/virtio-blk.h -@@ -50,8 +50,6 @@ struct VirtIOBlkConf - bool x_enable_wce_if_config_wce; - }; - --struct VirtIOBlockDataPlane; -- - struct VirtIOBlockReq; - struct VirtIOBlock { - VirtIODevice parent_obj; -@@ -64,7 +62,15 @@ struct VirtIOBlock { - VMChangeStateEntry *change; - bool dataplane_disabled; - bool dataplane_started; -- struct VirtIOBlockDataPlane *dataplane; -+ bool dataplane_starting; -+ bool dataplane_stopping; -+ -+ /* -+ * The AioContext for each virtqueue. The BlockDriverState will use the -+ * first element as its AioContext. -+ */ -+ AioContext **vq_aio_context; -+ - uint64_t host_features; - size_t config_size; - BlockRAMRegistrar blk_ram_registrar; -diff --git a/meson.build b/meson.build -index 6c77d9687d..47c65d0f53 100644 ---- a/meson.build -+++ b/meson.build -@@ -3298,7 +3298,6 @@ if have_system - 'hw/arm', - 'hw/audio', - 'hw/block', -- 'hw/block/dataplane', - 'hw/char', - 'hw/display', - 'hw/dma', --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch b/SOURCES/kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch deleted file mode 100644 index 5f45b9d..0000000 --- a/SOURCES/kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch +++ /dev/null @@ -1,117 +0,0 @@ -From 71257c2f320f1511de1e275779cf4b90effc1f02 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Fri, 19 Jan 2024 08:57:44 -0500 -Subject: [PATCH 15/22] virtio-blk: rename dataplane create/destroy functions - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [11/17] 60e7016d5f3e4e9e89945578279b12f812f85ddf (stefanha/centos-stream-qemu-kvm) - -virtio_blk_data_plane_create() and virtio_blk_data_plane_destroy() are -actually about s->vq_aio_context[] rather than managing -dataplane-specific state. - -As a prerequisite to using s->vq_aio_context[] in all code paths (even -when dataplane is not used), rename these functions to reflect that they -just manage s->vq_aio_context and call them regardless of whether or not -dataplane is in use. - -Note that virtio-blk supports running with -device -virtio-blk-pci,ioevent=off where the vCPU thread enters the device -emulation code. In this mode ioeventfd is not used for virtqueue -processing. However, we still want to initialize s->vq_aio_context[] to -qemu_aio_context in that case since I/O completion callbacks will be -invoked in the main loop thread. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240119135748.270944-3-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 57bc2658935778d1ae0edbcd4402763da8c7bae2) -Signed-off-by: Stefan Hajnoczi ---- - hw/block/virtio-blk.c | 23 ++++++++++++----------- - 1 file changed, 12 insertions(+), 11 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index cb623069f8..4d6f9377c6 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -1608,7 +1608,7 @@ apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, - IOThread *iothread = iothread_by_id(node->value->iothread); - AioContext *ctx = iothread_get_aio_context(iothread); - -- /* Released in virtio_blk_data_plane_destroy() */ -+ /* Released in virtio_blk_vq_aio_context_cleanup() */ - object_ref(OBJECT(iothread)); - - if (node->value->vqs) { -@@ -1631,7 +1631,7 @@ apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, - } - - /* Context: BQL held */ --static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) -+static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp) - { - VirtIODevice *vdev = VIRTIO_DEVICE(s); - VirtIOBlkConf *conf = &s->conf; -@@ -1659,11 +1659,6 @@ static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) - return false; - } - } -- /* Don't try if transport does not support notifiers. */ -- if (!virtio_device_ioeventfd_enabled(vdev)) { -- s->dataplane_disabled = true; -- return false; -- } - - s->vq_aio_context = g_new(AioContext *, conf->num_queues); - -@@ -1676,7 +1671,7 @@ static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) - s->vq_aio_context[i] = ctx; - } - -- /* Released in virtio_blk_data_plane_destroy() */ -+ /* Released in virtio_blk_vq_aio_context_cleanup() */ - object_ref(OBJECT(conf->iothread)); - } else { - AioContext *ctx = qemu_get_aio_context(); -@@ -1689,7 +1684,7 @@ static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) - } - - /* Context: BQL held */ --static void virtio_blk_data_plane_destroy(VirtIOBlock *s) -+static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s) - { - VirtIOBlkConf *conf = &s->conf; - -@@ -2015,7 +2010,13 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) - virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); - } - qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); -- virtio_blk_data_plane_create(s, &err); -+ -+ /* Don't start dataplane if transport does not support notifiers. */ -+ if (!virtio_device_ioeventfd_enabled(vdev)) { -+ s->dataplane_disabled = true; -+ } -+ -+ virtio_blk_vq_aio_context_init(s, &err); - if (err != NULL) { - error_propagate(errp, err); - for (i = 0; i < conf->num_queues; i++) { -@@ -2052,7 +2053,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) - - blk_drain(s->blk); - del_boot_device_lchs(dev, "/disk@0,0"); -- virtio_blk_data_plane_destroy(s); -+ virtio_blk_vq_aio_context_cleanup(s); - for (i = 0; i < conf->num_queues; i++) { - virtio_del_queue(vdev, i); - } --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch b/SOURCES/kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch deleted file mode 100644 index a0c0b67..0000000 --- a/SOURCES/kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch +++ /dev/null @@ -1,307 +0,0 @@ -From ba80cdcd5604b9b9efc4682ade9828ab74ebf5e6 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Fri, 19 Jan 2024 08:57:45 -0500 -Subject: [PATCH 16/22] virtio-blk: rename dataplane to ioeventfd - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [12/17] 4230005e0d1b4629fe4540f1f63cd705e58618da (stefanha/centos-stream-qemu-kvm) - -The dataplane code is really about using ioeventfd. It's used both for -IOThreads (what we think of as dataplane) and for the core virtio-pci -code's ioeventfd feature (which is enabled by default and used when no -IOThread has been specified). Rename the code to reflect this. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240119135748.270944-4-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 3cdaf3dd4a4ca94ebabe7eab23b432f1a6c547cc) -Signed-off-by: Stefan Hajnoczi ---- - hw/block/virtio-blk.c | 78 +++++++++++++++++----------------- - include/hw/virtio/virtio-blk.h | 8 ++-- - 2 files changed, 43 insertions(+), 43 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 4d6f9377c6..08c566946a 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -64,7 +64,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) - iov_discard_undo(&req->inhdr_undo); - iov_discard_undo(&req->outhdr_undo); - virtqueue_push(req->vq, &req->elem, req->in_len); -- if (s->dataplane_started && !s->dataplane_disabled) { -+ if (s->ioeventfd_started && !s->ioeventfd_disabled) { - virtio_notify_irqfd(vdev, req->vq); - } else { - virtio_notify(vdev, req->vq); -@@ -1141,12 +1141,12 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) - { - VirtIOBlock *s = (VirtIOBlock *)vdev; - -- if (!s->dataplane_disabled && !s->dataplane_started) { -+ if (!s->ioeventfd_disabled && !s->ioeventfd_started) { - /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start -- * dataplane here instead of waiting for .set_status(). -+ * ioeventfd here instead of waiting for .set_status(). - */ - virtio_device_start_ioeventfd(vdev); -- if (!s->dataplane_disabled) { -+ if (!s->ioeventfd_disabled) { - return; - } - } -@@ -1213,7 +1213,7 @@ static void virtio_blk_reset(VirtIODevice *vdev) - VirtIOBlockReq *req; - - /* Dataplane has stopped... */ -- assert(!s->dataplane_started); -+ assert(!s->ioeventfd_started); - - /* ...but requests may still be in flight. */ - blk_drain(s->blk); -@@ -1380,7 +1380,7 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) - VirtIOBlock *s = VIRTIO_BLK(vdev); - - if (!(status & (VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK))) { -- assert(!s->dataplane_started); -+ assert(!s->ioeventfd_started); - } - - if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { -@@ -1545,7 +1545,7 @@ static void virtio_blk_resize(void *opaque) - aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); - } - --static void virtio_blk_data_plane_detach(VirtIOBlock *s) -+static void virtio_blk_ioeventfd_detach(VirtIOBlock *s) - { - VirtIODevice *vdev = VIRTIO_DEVICE(s); - -@@ -1555,7 +1555,7 @@ static void virtio_blk_data_plane_detach(VirtIOBlock *s) - } - } - --static void virtio_blk_data_plane_attach(VirtIOBlock *s) -+static void virtio_blk_ioeventfd_attach(VirtIOBlock *s) - { - VirtIODevice *vdev = VIRTIO_DEVICE(s); - -@@ -1570,8 +1570,8 @@ static void virtio_blk_drained_begin(void *opaque) - { - VirtIOBlock *s = opaque; - -- if (s->dataplane_started) { -- virtio_blk_data_plane_detach(s); -+ if (s->ioeventfd_started) { -+ virtio_blk_ioeventfd_detach(s); - } - } - -@@ -1580,8 +1580,8 @@ static void virtio_blk_drained_end(void *opaque) - { - VirtIOBlock *s = opaque; - -- if (s->dataplane_started) { -- virtio_blk_data_plane_attach(s); -+ if (s->ioeventfd_started) { -+ virtio_blk_ioeventfd_attach(s); - } - } - -@@ -1651,11 +1651,11 @@ static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp) - } - - /* -- * If dataplane is (re-)enabled while the guest is running there could -+ * If ioeventfd is (re-)enabled while the guest is running there could - * be block jobs that can conflict. - */ - if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { -- error_prepend(errp, "cannot start virtio-blk dataplane: "); -+ error_prepend(errp, "cannot start virtio-blk ioeventfd: "); - return false; - } - } -@@ -1688,7 +1688,7 @@ static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s) - { - VirtIOBlkConf *conf = &s->conf; - -- assert(!s->dataplane_started); -+ assert(!s->ioeventfd_started); - - if (conf->iothread_vq_mapping_list) { - IOThreadVirtQueueMappingList *node; -@@ -1708,7 +1708,7 @@ static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s) - } - - /* Context: BQL held */ --static int virtio_blk_data_plane_start(VirtIODevice *vdev) -+static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) - { - VirtIOBlock *s = VIRTIO_BLK(vdev); - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); -@@ -1718,11 +1718,11 @@ static int virtio_blk_data_plane_start(VirtIODevice *vdev) - Error *local_err = NULL; - int r; - -- if (s->dataplane_started || s->dataplane_starting) { -+ if (s->ioeventfd_started || s->ioeventfd_starting) { - return 0; - } - -- s->dataplane_starting = true; -+ s->ioeventfd_starting = true; - - /* Set up guest notifier (irq) */ - r = k->set_guest_notifiers(qbus->parent, nvqs, true); -@@ -1773,14 +1773,14 @@ static int virtio_blk_data_plane_start(VirtIODevice *vdev) - - /* - * These fields must be visible to the IOThread when it processes the -- * virtqueue, otherwise it will think dataplane has not started yet. -+ * virtqueue, otherwise it will think ioeventfd has not started yet. - * -- * Make sure ->dataplane_started is false when blk_set_aio_context() is -+ * Make sure ->ioeventfd_started is false when blk_set_aio_context() is - * called above so that draining does not cause the host notifier to be - * detached/attached prematurely. - */ -- s->dataplane_starting = false; -- s->dataplane_started = true; -+ s->ioeventfd_starting = false; -+ s->ioeventfd_started = true; - smp_wmb(); /* paired with aio_notify_accept() on the read side */ - - /* Get this show started by hooking up our callbacks */ -@@ -1812,8 +1812,8 @@ static int virtio_blk_data_plane_start(VirtIODevice *vdev) - fail_host_notifiers: - k->set_guest_notifiers(qbus->parent, nvqs, false); - fail_guest_notifiers: -- s->dataplane_disabled = true; -- s->dataplane_starting = false; -+ s->ioeventfd_disabled = true; -+ s->ioeventfd_starting = false; - return -ENOSYS; - } - -@@ -1821,7 +1821,7 @@ static int virtio_blk_data_plane_start(VirtIODevice *vdev) - * - * Context: BH in IOThread - */ --static void virtio_blk_data_plane_stop_vq_bh(void *opaque) -+static void virtio_blk_ioeventfd_stop_vq_bh(void *opaque) - { - VirtQueue *vq = opaque; - EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); -@@ -1836,7 +1836,7 @@ static void virtio_blk_data_plane_stop_vq_bh(void *opaque) - } - - /* Context: BQL held */ --static void virtio_blk_data_plane_stop(VirtIODevice *vdev) -+static void virtio_blk_stop_ioeventfd(VirtIODevice *vdev) - { - VirtIOBlock *s = VIRTIO_BLK(vdev); - BusState *qbus = qdev_get_parent_bus(DEVICE(s)); -@@ -1844,24 +1844,24 @@ static void virtio_blk_data_plane_stop(VirtIODevice *vdev) - unsigned i; - unsigned nvqs = s->conf.num_queues; - -- if (!s->dataplane_started || s->dataplane_stopping) { -+ if (!s->ioeventfd_started || s->ioeventfd_stopping) { - return; - } - - /* Better luck next time. */ -- if (s->dataplane_disabled) { -- s->dataplane_disabled = false; -- s->dataplane_started = false; -+ if (s->ioeventfd_disabled) { -+ s->ioeventfd_disabled = false; -+ s->ioeventfd_started = false; - return; - } -- s->dataplane_stopping = true; -+ s->ioeventfd_stopping = true; - - if (!blk_in_drain(s->conf.conf.blk)) { - for (i = 0; i < nvqs; i++) { - VirtQueue *vq = virtio_get_queue(vdev, i); - AioContext *ctx = s->vq_aio_context[i]; - -- aio_wait_bh_oneshot(ctx, virtio_blk_data_plane_stop_vq_bh, vq); -+ aio_wait_bh_oneshot(ctx, virtio_blk_ioeventfd_stop_vq_bh, vq); - } - } - -@@ -1886,10 +1886,10 @@ static void virtio_blk_data_plane_stop(VirtIODevice *vdev) - } - - /* -- * Set ->dataplane_started to false before draining so that host notifiers -+ * Set ->ioeventfd_started to false before draining so that host notifiers - * are not detached/attached anymore. - */ -- s->dataplane_started = false; -+ s->ioeventfd_started = false; - - /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ - blk_drain(s->conf.conf.blk); -@@ -1903,7 +1903,7 @@ static void virtio_blk_data_plane_stop(VirtIODevice *vdev) - /* Clean up guest notifier (irq) */ - k->set_guest_notifiers(qbus->parent, nvqs, false); - -- s->dataplane_stopping = false; -+ s->ioeventfd_stopping = false; - } - - static void virtio_blk_device_realize(DeviceState *dev, Error **errp) -@@ -2011,9 +2011,9 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) - } - qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); - -- /* Don't start dataplane if transport does not support notifiers. */ -+ /* Don't start ioeventfd if transport does not support notifiers. */ - if (!virtio_device_ioeventfd_enabled(vdev)) { -- s->dataplane_disabled = true; -+ s->ioeventfd_disabled = true; - } - - virtio_blk_vq_aio_context_init(s, &err); -@@ -2137,8 +2137,8 @@ static void virtio_blk_class_init(ObjectClass *klass, void *data) - vdc->reset = virtio_blk_reset; - vdc->save = virtio_blk_save_device; - vdc->load = virtio_blk_load_device; -- vdc->start_ioeventfd = virtio_blk_data_plane_start; -- vdc->stop_ioeventfd = virtio_blk_data_plane_stop; -+ vdc->start_ioeventfd = virtio_blk_start_ioeventfd; -+ vdc->stop_ioeventfd = virtio_blk_stop_ioeventfd; - } - - static const TypeInfo virtio_blk_info = { -diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h -index fecffdc303..833a9a344f 100644 ---- a/include/hw/virtio/virtio-blk.h -+++ b/include/hw/virtio/virtio-blk.h -@@ -60,10 +60,10 @@ struct VirtIOBlock { - unsigned short sector_mask; - bool original_wce; - VMChangeStateEntry *change; -- bool dataplane_disabled; -- bool dataplane_started; -- bool dataplane_starting; -- bool dataplane_stopping; -+ bool ioeventfd_disabled; -+ bool ioeventfd_started; -+ bool ioeventfd_starting; -+ bool ioeventfd_stopping; - - /* - * The AioContext for each virtqueue. The BlockDriverState will use the --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch b/SOURCES/kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch deleted file mode 100644 index 611b881..0000000 --- a/SOURCES/kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 9311035821b3fea3f78c7f06ddb8a3861584f907 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Fri, 19 Jan 2024 08:57:46 -0500 -Subject: [PATCH 17/22] virtio-blk: restart s->rq reqs in vq AioContexts - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [13/17] cf5ad0352a78458ffc7588f967963f62b267fd64 (stefanha/centos-stream-qemu-kvm) - -A virtio-blk device with the iothread-vq-mapping parameter has -per-virtqueue AioContexts. It is not thread-safe to process s->rq -requests in the BlockBackend AioContext since that may be different from -the virtqueue's AioContext to which this request belongs. The code -currently races and could crash. - -Adapt virtio_blk_dma_restart_cb() to first split s->rq into per-vq lists -and then schedule a BH each vq's AioContext as necessary. This way -requests are safely processed in their vq's AioContext. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240119135748.270944-5-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 71ee0cdd14cc01a8b51aa4e9577dd0a1bb2f8e19) -Signed-off-by: Stefan Hajnoczi ---- - hw/block/virtio-blk.c | 44 ++++++++++++++++++++++++++++++++----------- - 1 file changed, 33 insertions(+), 11 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 08c566946a..f48ce5cbb8 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -1156,16 +1156,11 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) - - static void virtio_blk_dma_restart_bh(void *opaque) - { -- VirtIOBlock *s = opaque; -+ VirtIOBlockReq *req = opaque; -+ VirtIOBlock *s = req->dev; /* we're called with at least one request */ - -- VirtIOBlockReq *req; - MultiReqBuffer mrb = {}; - -- WITH_QEMU_LOCK_GUARD(&s->rq_lock) { -- req = s->rq; -- s->rq = NULL; -- } -- - while (req) { - VirtIOBlockReq *next = req->next; - if (virtio_blk_handle_request(req, &mrb)) { -@@ -1195,16 +1190,43 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool running, - RunState state) - { - VirtIOBlock *s = opaque; -+ uint16_t num_queues = s->conf.num_queues; - - if (!running) { - return; - } - -- /* Paired with dec in virtio_blk_dma_restart_bh() */ -- blk_inc_in_flight(s->conf.conf.blk); -+ /* Split the device-wide s->rq request list into per-vq request lists */ -+ g_autofree VirtIOBlockReq **vq_rq = g_new0(VirtIOBlockReq *, num_queues); -+ VirtIOBlockReq *rq; -+ -+ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { -+ rq = s->rq; -+ s->rq = NULL; -+ } -+ -+ while (rq) { -+ VirtIOBlockReq *next = rq->next; -+ uint16_t idx = virtio_get_queue_index(rq->vq); -+ -+ rq->next = vq_rq[idx]; -+ vq_rq[idx] = rq; -+ rq = next; -+ } - -- aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.conf.blk), -- virtio_blk_dma_restart_bh, s); -+ /* Schedule a BH to submit the requests in each vq's AioContext */ -+ for (uint16_t i = 0; i < num_queues; i++) { -+ if (!vq_rq[i]) { -+ continue; -+ } -+ -+ /* Paired with dec in virtio_blk_dma_restart_bh() */ -+ blk_inc_in_flight(s->conf.conf.blk); -+ -+ aio_bh_schedule_oneshot(s->vq_aio_context[i], -+ virtio_blk_dma_restart_bh, -+ vq_rq[i]); -+ } - } - - static void virtio_blk_reset(VirtIODevice *vdev) --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch b/SOURCES/kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch deleted file mode 100644 index 303c007..0000000 --- a/SOURCES/kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 282cebc22987958d11efc76e4f6ddb9601e709d9 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Fri, 19 Jan 2024 08:57:47 -0500 -Subject: [PATCH 18/22] virtio-blk: tolerate failure to set BlockBackend - AioContext - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [14/17] edb113ce9fea0c1a88ae7b5d61c35c1981e6993f (stefanha/centos-stream-qemu-kvm) - -We no longer rely on setting the AioContext since the block layer -IO_CODE APIs can be called from any thread. Now it's just a hint to help -block jobs and other operations co-locate themselves in a thread with -the guest I/O requests. Keep going if setting the AioContext fails. - -Suggested-by: Kevin Wolf -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240119135748.270944-6-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit ea0736d7f84ead109a6b701427991828f97724c3) -Signed-off-by: Stefan Hajnoczi ---- - hw/block/virtio-blk.c | 19 +++++-------------- - 1 file changed, 5 insertions(+), 14 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index f48ce5cbb8..81de06c9f6 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -1786,11 +1786,14 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) - - memory_region_transaction_commit(); - -+ /* -+ * Try to change the AioContext so that block jobs and other operations can -+ * co-locate their activity in the same AioContext. If it fails, nevermind. -+ */ - r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0], - &local_err); - if (r < 0) { -- error_report_err(local_err); -- goto fail_aio_context; -+ warn_report_err(local_err); - } - - /* -@@ -1819,18 +1822,6 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) - } - return 0; - -- fail_aio_context: -- memory_region_transaction_begin(); -- -- for (i = 0; i < nvqs; i++) { -- virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -- } -- -- memory_region_transaction_commit(); -- -- for (i = 0; i < nvqs; i++) { -- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); -- } - fail_host_notifiers: - k->set_guest_notifiers(qbus->parent, nvqs, false); - fail_guest_notifiers: --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch b/SOURCES/kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch deleted file mode 100644 index 1f70049..0000000 --- a/SOURCES/kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 5db0b4131c56d96760b3300298f4bedab99d35cb Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 6 Sep 2023 17:00:22 +0400 -Subject: [PATCH 100/101] virtio-gpu: block migration of VMs with blob=true -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -RH-MergeRequest: 217: virtio-gpu: block migration of VMs with blob=true -RH-Jira: RHEL-7565 -RH-Commit: [1/1] f978ca697d574b1419eb027a1007c060dfb83298 - -JIRA: https://issues.redhat.com/browse/RHEL-7565 - -commit 9c549ab6895a43ad0cb33e684e11cdb0b5400897 -Author: Marc-André Lureau -Date: Wed Sep 6 17:00:22 2023 +0400 - -virtio-gpu: block migration of VMs with blob=true - -"blob" resources don't have an associated pixman image: - -#0 pixman_image_get_stride (image=0x0) at ../pixman/pixman-image.c:921 -#1 0x0000562327c25236 in virtio_gpu_save (f=0x56232bb13b00, opaque=0x56232b555a60, size=0, field=0x5623289ab6c8 <__compound_literal.3+104>, vmdesc=0x56232ab59fe0) at ../hw/display/virtio-gpu.c:1225 - -Related to: -https://bugzilla.redhat.com/show_bug.cgi?id=2236353 - -Signed-off-by: Marc-André Lureau -Acked-by: Peter Xu - -[ rhel backport - fix Error* vs Error** argument ] -Signed-off-by: Marc-André Lureau ---- - hw/display/virtio-gpu.c | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c -index b016d3bac8..1702190ead 100644 ---- a/hw/display/virtio-gpu.c -+++ b/hw/display/virtio-gpu.c -@@ -27,6 +27,7 @@ - #include "hw/virtio/virtio-gpu-pixman.h" - #include "hw/virtio/virtio-bus.h" - #include "hw/qdev-properties.h" -+#include "migration/blocker.h" - #include "qemu/log.h" - #include "qemu/module.h" - #include "qapi/error.h" -@@ -41,6 +42,8 @@ virtio_gpu_find_check_resource(VirtIOGPU *g, uint32_t resource_id, - - static void virtio_gpu_reset_bh(void *opaque); - -+static Error *blob_mig_blocker; -+ - void virtio_gpu_update_cursor_data(VirtIOGPU *g, - struct virtio_gpu_scanout *s, - uint32_t resource_id) -@@ -1452,6 +1455,14 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) - error_setg(errp, "blobs and virgl are not compatible (yet)"); - return; - } -+ -+ if (!blob_mig_blocker) { -+ error_setg(&blob_mig_blocker, -+ "virtio-gpu blob VMs are currently not migratable."); -+ } -+ if (migrate_add_blocker(&blob_mig_blocker, errp)) { -+ return; -+ } - } - - if (!virtio_gpu_base_device_realize(qdev, -@@ -1478,6 +1489,9 @@ static void virtio_gpu_device_unrealize(DeviceState *qdev) - { - VirtIOGPU *g = VIRTIO_GPU(qdev); - -+ if (virtio_gpu_blob_enabled(g->parent_obj.conf)) { -+ migrate_del_blocker(&blob_mig_blocker); -+ } - g_clear_pointer(&g->ctrl_bh, qemu_bh_delete); - g_clear_pointer(&g->cursor_bh, qemu_bh_delete); - g_clear_pointer(&g->reset_bh, qemu_bh_delete); --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-gpu-fix-scanout-migration-post-load.patch b/SOURCES/kvm-virtio-gpu-fix-scanout-migration-post-load.patch deleted file mode 100644 index b1581bf..0000000 --- a/SOURCES/kvm-virtio-gpu-fix-scanout-migration-post-load.patch +++ /dev/null @@ -1,196 +0,0 @@ -From 523423436e83b01a83c60bc44bd08471992aaff4 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Mon, 15 Jan 2024 16:34:33 +0400 -Subject: [PATCH 1/4] virtio-gpu: fix scanout migration post-load -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -RH-MergeRequest: 377: virtio-gpu: fix scanout migration post-load -RH-Jira: RHEL-36181 -RH-Acked-by: Peter Xu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/3] 05e9dbf7f602d62ec2f336a855daf57e6ee8f3f3 - -The current post-loading code for scanout has a FIXME: it doesn't take -the resource region/rect into account. But there is more, when adding -blob migration support in commit f66767f75c9, I didn't realize that blob -resources could be used for scanouts. This situationn leads to a crash -during post-load, as they don't have an associated res->image. - -virtio_gpu_do_set_scanout() handle all cases, but requires the -associated virtio_gpu_framebuffer, which is currently not saved during -migration. - -Add a v2 of "virtio-gpu-one-scanout" with the framebuffer fields, so we -can restore blob scanouts, as well as fixing the existing FIXME. - -Signed-off-by: Marc-André Lureau -Reviewed-by: Sebastian Ott -(cherry picked from commit dfcf74fa68c88233209aafc5f82728d0b9a1af5c) ---- - hw/display/virtio-gpu.c | 55 +++++++++++++++++++++++++++------- - include/hw/virtio/virtio-gpu.h | 1 + - 2 files changed, 46 insertions(+), 10 deletions(-) - -diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c -index 1702190ead..bc485d60ea 100644 ---- a/hw/display/virtio-gpu.c -+++ b/hw/display/virtio-gpu.c -@@ -598,6 +598,7 @@ static void virtio_unref_resource(pixman_image_t *image, void *data) - static void virtio_gpu_update_scanout(VirtIOGPU *g, - uint32_t scanout_id, - struct virtio_gpu_simple_resource *res, -+ struct virtio_gpu_framebuffer *fb, - struct virtio_gpu_rect *r) - { - struct virtio_gpu_simple_resource *ores; -@@ -615,9 +616,10 @@ static void virtio_gpu_update_scanout(VirtIOGPU *g, - scanout->y = r->y; - scanout->width = r->width; - scanout->height = r->height; -+ scanout->fb = *fb; - } - --static void virtio_gpu_do_set_scanout(VirtIOGPU *g, -+static bool virtio_gpu_do_set_scanout(VirtIOGPU *g, - uint32_t scanout_id, - struct virtio_gpu_framebuffer *fb, - struct virtio_gpu_simple_resource *res, -@@ -643,7 +645,7 @@ static void virtio_gpu_do_set_scanout(VirtIOGPU *g, - r->x, r->y, r->width, r->height, - fb->width, fb->height); - *error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -- return; -+ return false; - } - - g->parent_obj.enable = 1; -@@ -651,11 +653,12 @@ static void virtio_gpu_do_set_scanout(VirtIOGPU *g, - if (res->blob) { - if (console_has_gl(scanout->con)) { - if (!virtio_gpu_update_dmabuf(g, scanout_id, res, fb, r)) { -- virtio_gpu_update_scanout(g, scanout_id, res, r); -+ virtio_gpu_update_scanout(g, scanout_id, res, fb, r); - } else { - *error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; -+ return false; - } -- return; -+ return true; - } - - data = res->blob; -@@ -684,7 +687,7 @@ static void virtio_gpu_do_set_scanout(VirtIOGPU *g, - scanout->ds = qemu_create_displaysurface_pixman(rect); - if (!scanout->ds) { - *error = VIRTIO_GPU_RESP_ERR_UNSPEC; -- return; -+ return false; - } - #ifdef WIN32 - qemu_displaysurface_win32_set_handle(scanout->ds, res->handle, fb->offset); -@@ -695,7 +698,8 @@ static void virtio_gpu_do_set_scanout(VirtIOGPU *g, - scanout->ds); - } - -- virtio_gpu_update_scanout(g, scanout_id, res, r); -+ virtio_gpu_update_scanout(g, scanout_id, res, fb, r); -+ return true; - } - - static void virtio_gpu_set_scanout(VirtIOGPU *g, -@@ -1166,8 +1170,9 @@ static void virtio_gpu_cursor_bh(void *opaque) - - static const VMStateDescription vmstate_virtio_gpu_scanout = { - .name = "virtio-gpu-one-scanout", -- .version_id = 1, -- .fields = (VMStateField[]) { -+ .version_id = 2, -+ .minimum_version_id = 1, -+ .fields = (const VMStateField[]) { - VMSTATE_UINT32(resource_id, struct virtio_gpu_scanout), - VMSTATE_UINT32(width, struct virtio_gpu_scanout), - VMSTATE_UINT32(height, struct virtio_gpu_scanout), -@@ -1178,6 +1183,12 @@ static const VMStateDescription vmstate_virtio_gpu_scanout = { - VMSTATE_UINT32(cursor.hot_y, struct virtio_gpu_scanout), - VMSTATE_UINT32(cursor.pos.x, struct virtio_gpu_scanout), - VMSTATE_UINT32(cursor.pos.y, struct virtio_gpu_scanout), -+ VMSTATE_UINT32_V(fb.format, struct virtio_gpu_scanout, 2), -+ VMSTATE_UINT32_V(fb.bytes_pp, struct virtio_gpu_scanout, 2), -+ VMSTATE_UINT32_V(fb.width, struct virtio_gpu_scanout, 2), -+ VMSTATE_UINT32_V(fb.height, struct virtio_gpu_scanout, 2), -+ VMSTATE_UINT32_V(fb.stride, struct virtio_gpu_scanout, 2), -+ VMSTATE_UINT32_V(fb.offset, struct virtio_gpu_scanout, 2), - VMSTATE_END_OF_LIST() - }, - }; -@@ -1349,6 +1360,7 @@ static int virtio_gpu_blob_save(QEMUFile *f, void *opaque, size_t size, - if (!res->blob_size) { - continue; - } -+ assert(!res->image); - qemu_put_be32(f, res->resource_id); - qemu_put_be32(f, res->blob_size); - qemu_put_be32(f, res->iov_cnt); -@@ -1411,11 +1423,11 @@ static int virtio_gpu_post_load(void *opaque, int version_id) - int i; - - for (i = 0; i < g->parent_obj.conf.max_outputs; i++) { -- /* FIXME: should take scanout.r.{x,y} into account */ - scanout = &g->parent_obj.scanout[i]; - if (!scanout->resource_id) { - continue; - } -+ - res = virtio_gpu_find_resource(g, scanout->resource_id); - if (!res) { - return -EINVAL; -@@ -1428,7 +1440,30 @@ static int virtio_gpu_post_load(void *opaque, int version_id) - qemu_displaysurface_win32_set_handle(scanout->ds, res->handle, 0); - #endif - -- dpy_gfx_replace_surface(scanout->con, scanout->ds); -+ if (scanout->fb.format != 0) { -+ uint32_t error = 0; -+ struct virtio_gpu_rect r = { -+ .x = scanout->x, -+ .y = scanout->y, -+ .width = scanout->width, -+ .height = scanout->height -+ }; -+ -+ if (!virtio_gpu_do_set_scanout(g, i, &scanout->fb, res, &r, &error)) { -+ return -EINVAL; -+ } -+ } else { -+ /* legacy v1 migration support */ -+ if (!res->image) { -+ return -EINVAL; -+ } -+ scanout->ds = qemu_create_displaysurface_pixman(res->image); -+#ifdef WIN32 -+ qemu_displaysurface_win32_set_handle(scanout->ds, res->handle, 0); -+#endif -+ dpy_gfx_replace_surface(scanout->con, scanout->ds); -+ } -+ - dpy_gfx_update_full(scanout->con); - if (scanout->cursor.resource_id) { - update_cursor(g, &scanout->cursor); -diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h -index 584ba2ed73..9a13afb34e 100644 ---- a/include/hw/virtio/virtio-gpu.h -+++ b/include/hw/virtio/virtio-gpu.h -@@ -81,6 +81,7 @@ struct virtio_gpu_scanout { - uint32_t resource_id; - struct virtio_gpu_update_cursor cursor; - QEMUCursor *current_cursor; -+ struct virtio_gpu_framebuffer fb; - }; - - struct virtio_gpu_requested_state { --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-gpu-fix-v2-migration.patch b/SOURCES/kvm-virtio-gpu-fix-v2-migration.patch index 028b23b..7183a65 100644 --- a/SOURCES/kvm-virtio-gpu-fix-v2-migration.patch +++ b/SOURCES/kvm-virtio-gpu-fix-v2-migration.patch @@ -1,17 +1,17 @@ -From 7741a7d1c1d719ff681f73a023f27d5951b98882 Mon Sep 17 00:00:00 2001 +From 97d039841728570b54d11ee7e5322743f519d861 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Thu, 16 May 2024 12:40:22 +0400 -Subject: [PATCH 2/4] virtio-gpu: fix v2 migration +Subject: [PATCH 3/4] virtio-gpu: fix v2 migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Marc-André Lureau -RH-MergeRequest: 377: virtio-gpu: fix scanout migration post-load -RH-Jira: RHEL-36181 +RH-MergeRequest: 246: virtio-gpu: fix v2 migration +RH-Jira: RHEL-34621 RH-Acked-by: Peter Xu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/3] 482c37c10ac7e5e8a6c36ac2eb806ac02beeefdc +RH-Acked-by: Thomas Huth +RH-Commit: [1/2] 187370bc6198a4ed0e4763314f8113ffcd21eb36 (marcandre.lureau-rh/qemu-kvm-centos) Commit dfcf74fa ("virtio-gpu: fix scanout migration post-load") broke forward/backward version migration. Versioning of nested VMSD structures @@ -27,7 +27,7 @@ Tested-by: Fiona Ebner [fixed long lines] Signed-off-by: Fabiano Rosas -[ Move the hw-compat from 8.2 to 8.1 ] +Jira: https://issues.redhat.com/browse/RHEL-34621 Signed-off-by: Marc-André Lureau (cherry picked from commit 40a23ef643664b5c1021a9789f9d680b6294fb50) --- @@ -37,22 +37,22 @@ Signed-off-by: Marc-André Lureau 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/hw/core/machine.c b/hw/core/machine.c -index 309f6ba685..a0843ab93e 100644 +index 0f256d9633..cf1d7faaaf 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -37,6 +37,7 @@ GlobalProperty hw_compat_8_1[] = { - { "ramfb", "x-migrate", "off" }, - { "vfio-pci-nohotplug", "x-ramfb-migrate", "off" }, - { "igb", "x-pcie-flr-init", "off" }, +@@ -37,6 +37,7 @@ GlobalProperty hw_compat_8_2[] = { + { "migration", "zero-page-detection", "legacy"}, + { TYPE_VIRTIO_IOMMU_PCI, "granule", "4k" }, + { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "64" }, + { "virtio-gpu-device", "x-scanout-vmstate-version", "1" }, }; - const size_t hw_compat_8_1_len = G_N_ELEMENTS(hw_compat_8_1); + const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2); diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c -index bc485d60ea..9d35a016ae 100644 +index ae831b6b3e..d60b1b2973 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c -@@ -1168,10 +1168,17 @@ static void virtio_gpu_cursor_bh(void *opaque) +@@ -1166,10 +1166,17 @@ static void virtio_gpu_cursor_bh(void *opaque) virtio_gpu_handle_cursor(&g->parent_obj.parent_obj, g->cursor_vq); } @@ -72,7 +72,7 @@ index bc485d60ea..9d35a016ae 100644 .fields = (const VMStateField[]) { VMSTATE_UINT32(resource_id, struct virtio_gpu_scanout), VMSTATE_UINT32(width, struct virtio_gpu_scanout), -@@ -1183,12 +1190,18 @@ static const VMStateDescription vmstate_virtio_gpu_scanout = { +@@ -1181,12 +1188,18 @@ static const VMStateDescription vmstate_virtio_gpu_scanout = { VMSTATE_UINT32(cursor.hot_y, struct virtio_gpu_scanout), VMSTATE_UINT32(cursor.pos.x, struct virtio_gpu_scanout), VMSTATE_UINT32(cursor.pos.y, struct virtio_gpu_scanout), @@ -97,7 +97,7 @@ index bc485d60ea..9d35a016ae 100644 VMSTATE_END_OF_LIST() }, }; -@@ -1668,6 +1681,7 @@ static Property virtio_gpu_properties[] = { +@@ -1659,6 +1672,7 @@ static Property virtio_gpu_properties[] = { DEFINE_PROP_BIT("blob", VirtIOGPU, parent_obj.conf.flags, VIRTIO_GPU_FLAG_BLOB_ENABLED, false), DEFINE_PROP_SIZE("hostmem", VirtIOGPU, parent_obj.conf.hostmem, 0), @@ -106,7 +106,7 @@ index bc485d60ea..9d35a016ae 100644 }; diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h -index 9a13afb34e..18500432c6 100644 +index ed44cdad6b..842315d51d 100644 --- a/include/hw/virtio/virtio-gpu.h +++ b/include/hw/virtio/virtio-gpu.h @@ -177,6 +177,7 @@ typedef struct VGPUDMABuf { diff --git a/SOURCES/kvm-virtio-mem-default-enable-dynamic-memslots.patch b/SOURCES/kvm-virtio-mem-default-enable-dynamic-memslots.patch deleted file mode 100644 index 6ad1c98..0000000 --- a/SOURCES/kvm-virtio-mem-default-enable-dynamic-memslots.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 94bccae527f1ab8328cc7692532046d700e2ca71 Mon Sep 17 00:00:00 2001 -From: David Hildenbrand -Date: Mon, 5 Feb 2024 19:27:07 +0100 -Subject: [PATCH 22/22] virtio-mem: default-enable "dynamic-memslots" - -RH-Author: David Hildenbrand -RH-MergeRequest: 220: virtio-mem: default-enable "dynamic-memslots" -RH-Jira: RHEL-24045 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] d9a60acd7de1d8703ea3ca938e388e19f31f5347 - -JIRA: https://issues.redhat.com/browse/RHEL-24045 -Upstream: RHEL only - -We only support selected vhost-user devices in combination with -virtio-mem in RHEL. One devices that works well is virtiofsd, devices that -are currently incompatible include DPDK and SPDK. - -The vhost devices we support must be compatible with the dynamic-memslot -feature (i.e., support at least 509 memslots, support dynamically adding/ -removing memslots), such that setting "dynamic-memslots=on" will work a -expected and not make certain QEMU commandlines or hotplug of vhost-user -devices bail out. - -Let's set "dynamic-memslots=on" starting with RHEL 9.4, so we -get the benefits (i.e., reduced metadata consumption in KVM, majority of -unplugged memory being inaccessible) as default. - -When wanting to run virtio-mem with incompatible vhost-user devices, it -might just work (if the vhost-user device is created before the -virtio-mem device), or the feature can be manually disabled by -specifying "dynamic-memslots=off". - -Signed-off-by: David Hildenbrand ---- - hw/core/machine.c | 2 ++ - hw/virtio/virtio-mem.c | 3 ++- - 2 files changed, 4 insertions(+), 1 deletion(-) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 446601ee30..309f6ba685 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -78,6 +78,8 @@ GlobalProperty hw_compat_rhel_9_4[] = { - { "vfio-pci-nohotplug", "x-ramfb-migrate", "off" }, - /* hw_compat_rhel_9_4 from hw_compat_8_1 */ - { "igb", "x-pcie-flr-init", "off" }, -+ /* hw_compat_rhel_9_4 jira RHEL-24045 */ -+ { "virtio-mem", "dynamic-memslots", "off" }, - }; - const size_t hw_compat_rhel_9_4_len = G_N_ELEMENTS(hw_compat_rhel_9_4); - -diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c -index 75ee38aa46..00ca91e8fe 100644 ---- a/hw/virtio/virtio-mem.c -+++ b/hw/virtio/virtio-mem.c -@@ -1696,8 +1696,9 @@ static Property virtio_mem_properties[] = { - #endif - DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM, - early_migration, true), -+ /* RHEL: default-enable "dynamic-memslots" (jira RHEL-24045) */ - DEFINE_PROP_BOOL(VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP, VirtIOMEM, -- dynamic_memslots, false), -+ dynamic_memslots, true), - DEFINE_PROP_END_OF_LIST(), - }; - --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-pci-fix-use-of-a-released-vector.patch b/SOURCES/kvm-virtio-pci-fix-use-of-a-released-vector.patch deleted file mode 100644 index 4128907..0000000 --- a/SOURCES/kvm-virtio-pci-fix-use-of-a-released-vector.patch +++ /dev/null @@ -1,162 +0,0 @@ -From ccd8ffa5cd7f9bcfddeda7a9fa1ad86d4bad870e Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Fri, 12 Apr 2024 14:26:55 +0800 -Subject: [PATCH] virtio-pci: fix use of a released vector -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 366: virtio-pci: fix use of a released vector -RH-Jira: RHEL-32837 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Jason Wang -RH-Commit: [1/1] edd26ffb8727635310aed42e42925afe87df2287 - -During the booting process of the non-standard image, the behavior of the -called function in qemu is as follows: - -1. vhost_net_stop() was triggered by guest image. This will call the function -virtio_pci_set_guest_notifiers() with assgin= false, -virtio_pci_set_guest_notifiers() will release the irqfd for vector 0 - -2. virtio_reset() was triggered, this will set configure vector to VIRTIO_NO_VECTOR - -3.vhost_net_start() was called (at this time, the configure vector is -still VIRTIO_NO_VECTOR) and then call virtio_pci_set_guest_notifiers() with -assgin=true, so the irqfd for vector 0 is still not "init" during this process - -4. The system continues to boot and sets the vector back to 0. After that -msix_fire_vector_notifier() was triggered to unmask the vector 0 and meet the crash - -To fix the issue, we need to support changing the vector after VIRTIO_CONFIG_S_DRIVER_OK is set. - -(gdb) bt -0 __pthread_kill_implementation (threadid=, signo=signo@entry=6, no_tid=no_tid@entry=0) - at pthread_kill.c:44 -1 0x00007fc87148ec53 in __pthread_kill_internal (signo=6, threadid=) at pthread_kill.c:78 -2 0x00007fc87143e956 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26 -3 0x00007fc8714287f4 in __GI_abort () at abort.c:79 -4 0x00007fc87142871b in __assert_fail_base - (fmt=0x7fc8715bbde0 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x5606413efd53 "ret == 0", file=0x5606413ef87d "../accel/kvm/kvm-all.c", line=1837, function=) at assert.c:92 -5 0x00007fc871437536 in __GI___assert_fail - (assertion=0x5606413efd53 "ret == 0", file=0x5606413ef87d "../accel/kvm/kvm-all.c", line=1837, function=0x5606413f06f0 <__PRETTY_FUNCTION__.19> "kvm_irqchip_commit_routes") at assert.c:101 -6 0x0000560640f884b5 in kvm_irqchip_commit_routes (s=0x560642cae1f0) at ../accel/kvm/kvm-all.c:1837 -7 0x0000560640c98f8e in virtio_pci_one_vector_unmask - (proxy=0x560643c65f00, queue_no=4294967295, vector=0, msg=..., n=0x560643c6e4c8) - at ../hw/virtio/virtio-pci.c:1005 -8 0x0000560640c99201 in virtio_pci_vector_unmask (dev=0x560643c65f00, vector=0, msg=...) - at ../hw/virtio/virtio-pci.c:1070 -9 0x0000560640bc402e in msix_fire_vector_notifier (dev=0x560643c65f00, vector=0, is_masked=false) - at ../hw/pci/msix.c:120 -10 0x0000560640bc40f1 in msix_handle_mask_update (dev=0x560643c65f00, vector=0, was_masked=true) - at ../hw/pci/msix.c:140 -11 0x0000560640bc4503 in msix_table_mmio_write (opaque=0x560643c65f00, addr=12, val=0, size=4) - at ../hw/pci/msix.c:231 -12 0x0000560640f26d83 in memory_region_write_accessor - (mr=0x560643c66540, addr=12, value=0x7fc86b7bc628, size=4, shift=0, mask=4294967295, attrs=...) - at ../system/memory.c:497 -13 0x0000560640f270a6 in access_with_adjusted_size - - (addr=12, value=0x7fc86b7bc628, size=4, access_size_min=1, access_size_max=4, access_fn=0x560640f26c8d , mr=0x560643c66540, attrs=...) at ../system/memory.c:573 -14 0x0000560640f2a2b5 in memory_region_dispatch_write (mr=0x560643c66540, addr=12, data=0, op=MO_32, attrs=...) - at ../system/memory.c:1521 -15 0x0000560640f37bac in flatview_write_continue - (fv=0x7fc65805e0b0, addr=4273803276, attrs=..., ptr=0x7fc871e9c028, len=4, addr1=12, l=4, mr=0x560643c66540) - at ../system/physmem.c:2714 -16 0x0000560640f37d0f in flatview_write - (fv=0x7fc65805e0b0, addr=4273803276, attrs=..., buf=0x7fc871e9c028, len=4) at ../system/physmem.c:2756 -17 0x0000560640f380bf in address_space_write - (as=0x560642161ae0 , addr=4273803276, attrs=..., buf=0x7fc871e9c028, len=4) - at ../system/physmem.c:2863 -18 0x0000560640f3812c in address_space_rw - (as=0x560642161ae0 , addr=4273803276, attrs=..., buf=0x7fc871e9c028, len=4, is_write=true) at ../system/physmem.c:2873 ---Type for more, q to quit, c to continue without paging-- -19 0x0000560640f8aa55 in kvm_cpu_exec (cpu=0x560642f205e0) at ../accel/kvm/kvm-all.c:2915 -20 0x0000560640f8d731 in kvm_vcpu_thread_fn (arg=0x560642f205e0) at ../accel/kvm/kvm-accel-ops.c:51 -21 0x00005606411949f4 in qemu_thread_start (args=0x560642f292b0) at ../util/qemu-thread-posix.c:541 -22 0x00007fc87148cdcd in start_thread (arg=) at pthread_create.c:442 -23 0x00007fc871512630 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 -(gdb) - -MST: coding style and typo fixups - -Fixes: f9a09ca3ea ("vhost: add support for configure interrupt") -Cc: qemu-stable@nongnu.org -Signed-off-by: Cindy Lu -Message-ID: <2321ade5f601367efe7380c04e3f61379c59b48f.1713173550.git.mst@redhat.com> -Cc: Lei Yang -Cc: Jason Wang -Signed-off-by: Michael S. Tsirkin -Tested-by: Cindy Lu -(cherry picked from commit 2ce6cff94df2650c460f809e5ad263f1d22507c0) -Signed-off-by: Cindy Lu ---- - hw/virtio/virtio-pci.c | 37 +++++++++++++++++++++++++++++++++++-- - 1 file changed, 35 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c -index e433879542..08faefe29a 100644 ---- a/hw/virtio/virtio-pci.c -+++ b/hw/virtio/virtio-pci.c -@@ -1424,6 +1424,38 @@ static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy, - return offset; - } - -+static void virtio_pci_set_vector(VirtIODevice *vdev, -+ VirtIOPCIProxy *proxy, -+ int queue_no, uint16_t old_vector, -+ uint16_t new_vector) -+{ -+ bool kvm_irqfd = (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) && -+ msix_enabled(&proxy->pci_dev) && kvm_msi_via_irqfd_enabled(); -+ -+ if (new_vector == old_vector) { -+ return; -+ } -+ -+ /* -+ * If the device uses irqfd and the vector changes after DRIVER_OK is -+ * set, we need to release the old vector and set up the new one. -+ * Otherwise just need to set the new vector on the device. -+ */ -+ if (kvm_irqfd && old_vector != VIRTIO_NO_VECTOR) { -+ kvm_virtio_pci_vector_release_one(proxy, queue_no); -+ } -+ /* Set the new vector on the device. */ -+ if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { -+ vdev->config_vector = new_vector; -+ } else { -+ virtio_queue_set_vector(vdev, queue_no, new_vector); -+ } -+ /* If the new vector changed need to set it up. */ -+ if (kvm_irqfd && new_vector != VIRTIO_NO_VECTOR) { -+ kvm_virtio_pci_vector_use_one(proxy, queue_no); -+ } -+} -+ - int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy, - uint8_t bar, uint64_t offset, uint64_t length, - uint8_t id) -@@ -1570,7 +1602,8 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, - } else { - val = VIRTIO_NO_VECTOR; - } -- vdev->config_vector = val; -+ virtio_pci_set_vector(vdev, proxy, VIRTIO_CONFIG_IRQ_IDX, -+ vdev->config_vector, val); - break; - case VIRTIO_PCI_COMMON_STATUS: - if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) { -@@ -1610,7 +1643,7 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, - } else { - val = VIRTIO_NO_VECTOR; - } -- virtio_queue_set_vector(vdev, vdev->queue_sel, val); -+ virtio_pci_set_vector(vdev, proxy, vdev->queue_sel, vector, val); - break; - case VIRTIO_PCI_COMMON_Q_ENABLE: - if (val == 1) { --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-rng-block-max-bytes-0.patch b/SOURCES/kvm-virtio-rng-block-max-bytes-0.patch new file mode 100644 index 0000000..2fba53d --- /dev/null +++ b/SOURCES/kvm-virtio-rng-block-max-bytes-0.patch @@ -0,0 +1,49 @@ +From 3dd1412176a8ee6c06b5d41aa00ca49b535d99b7 Mon Sep 17 00:00:00 2001 +From: "Michael S. Tsirkin" +Date: Wed, 24 Jul 2024 06:48:59 -0400 +Subject: [PATCH 092/100] virtio-rng: block max-bytes=0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 259: virtio-rng: block max-bytes=0 +RH-Jira: RHEL-50336 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Thomas Huth +RH-Acked-by: Eric Auger +RH-Commit: [1/1] 6d9852cc7cf7fdf49521b6301ceda26e11b1291f (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-50336 + +with max-bytes set to 0, quota is 0 and so device does not work. +block this to avoid user confusion + +Message-Id: <73a89a42d82ec8b47358f25119b87063e4a6ea57.1721818306.git.mst@redhat.com> +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Philippe Mathieu-Daudé +(cherry picked from commit 024d046bf41b5256adec671085bcee767a6da125) +Signed-off-by: Laurent Vivier +--- + hw/virtio/virtio-rng.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c +index f74efffef7..7cf31da071 100644 +--- a/hw/virtio/virtio-rng.c ++++ b/hw/virtio/virtio-rng.c +@@ -184,8 +184,9 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp) + + /* Workaround: Property parsing does not enforce unsigned integers, + * So this is a hack to reject such numbers. */ +- if (vrng->conf.max_bytes > INT64_MAX) { +- error_setg(errp, "'max-bytes' parameter must be non-negative, " ++ if (vrng->conf.max_bytes == 0 || ++ vrng->conf.max_bytes > INT64_MAX) { ++ error_setg(errp, "'max-bytes' parameter must be positive, " + "and less than 2^63"); + return; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch b/SOURCES/kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch deleted file mode 100644 index b8066b2..0000000 --- a/SOURCES/kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch +++ /dev/null @@ -1,78 +0,0 @@ -From da3a5afa41790ae913d41cfcdc3c6a8731ae3fe8 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Fri, 2 Feb 2024 16:31:56 +0100 -Subject: [PATCH 1/6] virtio-scsi: Attach event vq notifier with no_poll - -RH-Author: Hanna Czenczek -RH-MergeRequest: 223: virtio: Re-enable notifications after drain -RH-Jira: RHEL-3934 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/3] d29b461a0a4b584af0ee80fb3f9e45c92ea88eb0 (hreitz/qemu-kvm-c-9-s) - -As of commit 38738f7dbbda90fbc161757b7f4be35b52205552 ("virtio-scsi: -don't waste CPU polling the event virtqueue"), we only attach an io_read -notifier for the virtio-scsi event virtqueue instead, and no polling -notifiers. During operation, the event virtqueue is typically -non-empty, but none of the buffers are intended to be used immediately. -Instead, they only get used when certain events occur. Therefore, it -makes no sense to continuously poll it when non-empty, because it is -supposed to be and stay non-empty. - -We do this by using virtio_queue_aio_attach_host_notifier_no_poll() -instead of virtio_queue_aio_attach_host_notifier() for the event -virtqueue. - -Commit 766aa2de0f29b657148e04599320d771c36fd126 ("virtio-scsi: implement -BlockDevOps->drained_begin()") however has virtio_scsi_drained_end() use -virtio_queue_aio_attach_host_notifier() for all virtqueues, including -the event virtqueue. This can lead to it being polled again, undoing -the benefit of commit 38738f7dbbda90fbc161757b7f4be35b52205552. - -Fix it by using virtio_queue_aio_attach_host_notifier_no_poll() for the -event virtqueue. - -Reported-by: Fiona Ebner -Fixes: 766aa2de0f29b657148e04599320d771c36fd126 - ("virtio-scsi: implement BlockDevOps->drained_begin()") -Reviewed-by: Stefan Hajnoczi -Tested-by: Fiona Ebner -Reviewed-by: Fiona Ebner -Signed-off-by: Hanna Czenczek -Message-ID: <20240202153158.788922-2-hreitz@redhat.com> -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit c42c3833e0cfdf2b80fb3ca410acfd392b6874ab) ---- - hw/scsi/virtio-scsi.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index ca365a70e9..9943186917 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -1149,6 +1149,7 @@ static void virtio_scsi_drained_begin(SCSIBus *bus) - static void virtio_scsi_drained_end(SCSIBus *bus) - { - VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus); -+ VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); - VirtIODevice *vdev = VIRTIO_DEVICE(s); - uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED + - s->parent_obj.conf.num_queues; -@@ -1166,7 +1167,11 @@ static void virtio_scsi_drained_end(SCSIBus *bus) - - for (uint32_t i = 0; i < total_queues; i++) { - VirtQueue *vq = virtio_get_queue(vdev, i); -- virtio_queue_aio_attach_host_notifier(vq, s->ctx); -+ if (vq == vs->event_vq) { -+ virtio_queue_aio_attach_host_notifier_no_poll(vq, s->ctx); -+ } else { -+ virtio_queue_aio_attach_host_notifier(vq, s->ctx); -+ } - } - } - --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch b/SOURCES/kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch deleted file mode 100644 index 9ad8fdf..0000000 --- a/SOURCES/kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 1ee3f919a51135a0798a14c734ca80d74d30025d Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 4 Dec 2023 11:42:57 -0500 -Subject: [PATCH 078/101] virtio-scsi: don't lock AioContext around - virtio_queue_aio_attach_host_notifier() - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [9/26] 5e1179e617d05bf765b285ba42393ec1ddbeba28 (kmwolf/centos-qemu-kvm) - -virtio_queue_aio_attach_host_notifier() does not require the AioContext -lock. Stop taking the lock and add an explicit smp_wmb() because we were -relying on the implicit barrier in the AioContext lock before. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Reviewed-by: Kevin Wolf -Message-ID: <20231204164259.1515217-3-stefanha@redhat.com> -Signed-off-by: Kevin Wolf ---- - hw/scsi/virtio-scsi-dataplane.c | 8 +------- - 1 file changed, 1 insertion(+), 7 deletions(-) - -diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c -index 1e684beebe..135e23fe54 100644 ---- a/hw/scsi/virtio-scsi-dataplane.c -+++ b/hw/scsi/virtio-scsi-dataplane.c -@@ -149,23 +149,17 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) - - memory_region_transaction_commit(); - -- /* -- * These fields are visible to the IOThread so we rely on implicit barriers -- * in aio_context_acquire() on the write side and aio_notify_accept() on -- * the read side. -- */ - s->dataplane_starting = false; - s->dataplane_started = true; -+ smp_wmb(); /* paired with aio_notify_accept() */ - - if (s->bus.drain_count == 0) { -- aio_context_acquire(s->ctx); - virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx); - virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx); - - for (i = 0; i < vs->conf.num_queues; i++) { - virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx); - } -- aio_context_release(s->ctx); - } - return 0; - --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch b/SOURCES/kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch deleted file mode 100644 index 2654cb7..0000000 --- a/SOURCES/kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch +++ /dev/null @@ -1,173 +0,0 @@ -From c2d7633ead6e19d4b6af5552ca907ae071b8734b Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:19:58 -0500 -Subject: [PATCH 081/101] virtio-scsi: replace AioContext lock with tmf_bh_lock - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [12/26] 8fb375bfd72a491d47321c78078577071a4e90fb (kmwolf/centos-qemu-kvm) - -Protect the Task Management Function BH state with a lock. The TMF BH -runs in the main loop thread. An IOThread might process a TMF at the -same time as the TMF BH is running. Therefore tmf_bh_list and tmf_bh -must be protected by a lock. - -Run TMF request completion in the IOThread using aio_wait_bh_oneshot(). -This avoids more locking to protect the virtqueue and SCSI layer state. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Reviewed-by: Kevin Wolf -Message-ID: <20231205182011.1976568-2-stefanha@redhat.com> -Signed-off-by: Kevin Wolf ---- - hw/scsi/virtio-scsi.c | 62 ++++++++++++++++++++++----------- - include/hw/virtio/virtio-scsi.h | 3 +- - 2 files changed, 43 insertions(+), 22 deletions(-) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 9c751bf296..4f8d35facc 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -123,6 +123,30 @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req) - virtio_scsi_free_req(req); - } - -+static void virtio_scsi_complete_req_bh(void *opaque) -+{ -+ VirtIOSCSIReq *req = opaque; -+ -+ virtio_scsi_complete_req(req); -+} -+ -+/* -+ * Called from virtio_scsi_do_one_tmf_bh() in main loop thread. The main loop -+ * thread cannot touch the virtqueue since that could race with an IOThread. -+ */ -+static void virtio_scsi_complete_req_from_main_loop(VirtIOSCSIReq *req) -+{ -+ VirtIOSCSI *s = req->dev; -+ -+ if (!s->ctx || s->ctx == qemu_get_aio_context()) { -+ /* No need to schedule a BH when there is no IOThread */ -+ virtio_scsi_complete_req(req); -+ } else { -+ /* Run request completion in the IOThread */ -+ aio_wait_bh_oneshot(s->ctx, virtio_scsi_complete_req_bh, req); -+ } -+} -+ - static void virtio_scsi_bad_req(VirtIOSCSIReq *req) - { - virtio_error(VIRTIO_DEVICE(req->dev), "wrong size for virtio-scsi headers"); -@@ -338,10 +362,7 @@ static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req) - - out: - object_unref(OBJECT(d)); -- -- virtio_scsi_acquire(s); -- virtio_scsi_complete_req(req); -- virtio_scsi_release(s); -+ virtio_scsi_complete_req_from_main_loop(req); - } - - /* Some TMFs must be processed from the main loop thread */ -@@ -354,18 +375,16 @@ static void virtio_scsi_do_tmf_bh(void *opaque) - - GLOBAL_STATE_CODE(); - -- virtio_scsi_acquire(s); -+ WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) { -+ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { -+ QTAILQ_REMOVE(&s->tmf_bh_list, req, next); -+ QTAILQ_INSERT_TAIL(&reqs, req, next); -+ } - -- QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { -- QTAILQ_REMOVE(&s->tmf_bh_list, req, next); -- QTAILQ_INSERT_TAIL(&reqs, req, next); -+ qemu_bh_delete(s->tmf_bh); -+ s->tmf_bh = NULL; - } - -- qemu_bh_delete(s->tmf_bh); -- s->tmf_bh = NULL; -- -- virtio_scsi_release(s); -- - QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) { - QTAILQ_REMOVE(&reqs, req, next); - virtio_scsi_do_one_tmf_bh(req); -@@ -379,8 +398,7 @@ static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s) - - GLOBAL_STATE_CODE(); - -- virtio_scsi_acquire(s); -- -+ /* Called after ioeventfd has been stopped, so tmf_bh_lock is not needed */ - if (s->tmf_bh) { - qemu_bh_delete(s->tmf_bh); - s->tmf_bh = NULL; -@@ -393,19 +411,19 @@ static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s) - req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE; - virtio_scsi_complete_req(req); - } -- -- virtio_scsi_release(s); - } - - static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req) - { - VirtIOSCSI *s = req->dev; - -- QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next); -+ WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) { -+ QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next); - -- if (!s->tmf_bh) { -- s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s); -- qemu_bh_schedule(s->tmf_bh); -+ if (!s->tmf_bh) { -+ s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s); -+ qemu_bh_schedule(s->tmf_bh); -+ } - } - } - -@@ -1235,6 +1253,7 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) - Error *err = NULL; - - QTAILQ_INIT(&s->tmf_bh_list); -+ qemu_mutex_init(&s->tmf_bh_lock); - - virtio_scsi_common_realize(dev, - virtio_scsi_handle_ctrl, -@@ -1277,6 +1296,7 @@ static void virtio_scsi_device_unrealize(DeviceState *dev) - - qbus_set_hotplug_handler(BUS(&s->bus), NULL); - virtio_scsi_common_unrealize(dev); -+ qemu_mutex_destroy(&s->tmf_bh_lock); - } - - static Property virtio_scsi_properties[] = { -diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h -index 779568ab5d..da8cb928d9 100644 ---- a/include/hw/virtio/virtio-scsi.h -+++ b/include/hw/virtio/virtio-scsi.h -@@ -85,8 +85,9 @@ struct VirtIOSCSI { - - /* - * TMFs deferred to main loop BH. These fields are protected by -- * virtio_scsi_acquire(). -+ * tmf_bh_lock. - */ -+ QemuMutex tmf_bh_lock; - QEMUBH *tmf_bh; - QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list; - --- -2.39.3 - diff --git a/SOURCES/kvm-x86-rhel-9.2.0-machine-type-compat-fix.patch b/SOURCES/kvm-x86-rhel-9.2.0-machine-type-compat-fix.patch deleted file mode 100644 index a8bf6ac..0000000 --- a/SOURCES/kvm-x86-rhel-9.2.0-machine-type-compat-fix.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 2932c8de175fadeed4bb7c1024724cbabc53f6d5 Mon Sep 17 00:00:00 2001 -From: Sebastian Ott -Date: Mon, 19 Feb 2024 02:37:27 -0500 -Subject: [PATCH 6/6] x86: rhel 9.2.0 machine type compat fix - -RH-Author: Sebastian Ott -RH-MergeRequest: 342: Draft: x86: rhel 9.2.0 machine type compat fix (RHEL) -RH-Jira: RHEL-17068 -RH-Acked-by: Thomas Huth -RH-Commit: [23/23] 658dda965f34119de300eef26155f47b1b3fa7f1 - -Fix up the compatibility for 9.2.0 and older. - -Signed-off-by: Sebastian Ott's avatarSebastian Ott ---- - hw/i386/pc_piix.c | 2 ++ - hw/i386/pc_q35.c | 2 ++ - 2 files changed, 4 insertions(+) - -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 44038391fb..09d02cc91f 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -1023,6 +1023,8 @@ static void pc_machine_rhel760_options(MachineClass *m) - pcmc->enforce_amd_1tb_hole = false; - /* From pc_i440fx_8_0_machine_options() */ - pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; -+ /* From pc_i440fx_8_1_machine_options() */ -+ pcmc->broken_32bit_mem_addr_check = true; - /* Introduced in QEMU 8.2 */ - pcmc->default_south_bridge = TYPE_PIIX3_DEVICE; - -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 6387df97c8..c6967e1846 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -759,6 +759,8 @@ static void pc_q35_machine_rhel920_options(MachineClass *m) - - /* From pc_q35_8_0_machine_options() */ - pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; -+ /* From pc_q35_8_1_machine_options() */ -+ pcmc->broken_32bit_mem_addr_check = true; - - compat_props_add(m->compat_props, hw_compat_rhel_9_4, - hw_compat_rhel_9_4_len); --- -2.39.3 - diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index ee77f46..5a22a63 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -12,22 +12,18 @@ %endif %global have_usbredir 1 -%global have_spice 1 -%global have_virgl 1 %global have_opengl 1 %global have_fdt 1 %global have_modules_load 0 %global have_memlock_limits 0 # Some of these are not relevant for RHEL, but defining them # makes it easier to sync the dependency list with Fedora -%global have_block_gluster 1 -%global have_block_iscsi 1 %global have_block_rbd 1 -%global have_block_ssh 1 %global enable_werror 1 %global have_clang 1 %global have_safe_stack 0 + %if %{have_clang} %global toolchain clang %ifarch x86_64 @@ -64,9 +60,6 @@ %ifnarch %{ix86} x86_64 aarch64 %global have_usbredir 0 - %global have_spice 0 - %global have_virgl 0 - %global have_opengl 0 %endif %ifnarch s390x @@ -88,9 +81,8 @@ %endif %ifarch x86_64 %global kvm_target x86_64 -%endif -%ifarch aarch64 - %global kvm_target aarch64 +%else + %global have_opengl 0 %endif %ifarch %{power64} %global kvm_target ppc64 @@ -103,77 +95,61 @@ %ifarch ppc %global kvm_target ppc %endif +%ifarch aarch64 + %global kvm_target aarch64 +%endif %global target_list %{kvm_target}-softmmu -%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress,virtio-blk-vhost-vdpa,virtio-blk-vfio-pci,virtio-blk-vhost-user,io_uring,nvme-io_uring -%if 0%{have_block_gluster} - %global block_drivers_rw_list %{block_drivers_rw_list},gluster -%endif -%if 0%{have_block_iscsi} - %global block_drivers_rw_list %{block_drivers_rw_list},iscsi -%endif -%if 0%{have_block_rbd} - %global block_drivers_rw_list %{block_drivers_rw_list},rbd -%endif +%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress,virtio-blk-vhost-vdpa,virtio-blk-vfio-pci,virtio-blk-vhost-user,io_uring,nvme-io_uring %global block_drivers_ro_list vdi,vmdk,vhdx,vpc,https -%if 0%{have_block_ssh} - %global block_drivers_ro_list %{block_drivers_rw_list},ssh -%endif %define qemudocdir %{_docdir}/%{name} %global firmwaredirs "%{_datadir}/qemu-firmware:%{_datadir}/ipxe/qemu:%{_datadir}/seavgabios:%{_datadir}/seabios" #Versions of various parts: %global requires_all_modules \ -%if %{have_spice} \ -Requires: %{name}-ui-spice = %{epoch}:%{version}-%{release} \ -%endif \ %if %{have_opengl} \ Requires: %{name}-ui-opengl = %{epoch}:%{version}-%{release} \ Requires: %{name}-ui-egl-headless = %{epoch}:%{version}-%{release} \ %endif \ Requires: %{name}-device-display-virtio-gpu = %{epoch}:%{version}-%{release} \ -%ifarch x86_64 aarch64 %{power64} \ -Requires: %{name}-device-display-virtio-gpu-gl = %{epoch}:%{version}-%{release} \ -%endif \ %ifarch s390x \ Requires: %{name}-device-display-virtio-gpu-ccw = %{epoch}:%{version}-%{release} \ %else \ Requires: %{name}-device-display-virtio-gpu-pci = %{epoch}:%{version}-%{release} \ -%ifarch x86_64 aarch64 %{power64} \ -Requires: %{name}-device-display-virtio-gpu-pci-gl = %{epoch}:%{version}-%{release} \ -%endif \ %endif \ %ifarch x86_64 %{power64} \ Requires: %{name}-device-display-virtio-vga = %{epoch}:%{version}-%{release} \ -Requires: %{name}-device-display-virtio-vga-gl = %{epoch}:%{version}-%{release} \ -%endif \ -%if %{have_virgl} \ -Requires: %{name}-device-display-vhost-user-gpu = %{epoch}:%{version}-%{release} \ %endif \ Requires: %{name}-device-usb-host = %{epoch}:%{version}-%{release} \ %if %{have_usbredir} \ Requires: %{name}-device-usb-redirect = %{epoch}:%{version}-%{release} \ %endif \ Requires: %{name}-block-blkio = %{epoch}:%{version}-%{release} \ -%if %{have_block_gluster} \ -Requires: %{name}-block-gluster = %{epoch}:%{version}-%{release} \ -%endif \ -%if %{have_block_iscsi} \ -Requires: %{name}-block-iscsi = %{epoch}:%{version}-%{release} \ -%endif \ -%if %{have_block_rbd} \ -Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ -%endif \ -%if %{have_block_ssh} \ -Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} \ -%endif \ +Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ Requires: %{name}-audio-pa = %{epoch}:%{version}-%{release} +# Since SPICE is removed from RHEL-9, the following Obsoletes: +# removes {name}-ui-spice for upgrades from RHEL-8 +# The "<= {version}" assumes RHEL-9 version >= RHEL-8 version (in +# other words RHEL-9 rebases are done together/before RHEL-8 ones) + +# In addition, we obsolete some block drivers as we are no longer support +# them in default qemu-kvm installation. + +# Note: ssh driver wasn't removed yet just disabled due to late handling + +%global obsoletes_some_modules \ +Obsoletes: %{name}-ui-spice <= %{epoch}:%{version} \ +Obsoletes: %{name}-block-gluster <= %{epoch}:%{version} \ +Obsoletes: %{name}-block-iscsi <= %{epoch}:%{version} \ +Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ + + Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 8.2.0 -Release: 11%{?rcrel}%{?dist}%{?cc_suffix}.6.inferit.1 +Version: 9.0.0 +Release: 10%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -210,468 +186,252 @@ Patch0012: 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch Patch0013: 0013-Add-support-statement-to-help-output.patch Patch0014: 0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch Patch0015: 0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch -Patch0016: 0016-Introduce-RHEL-9.4.0-qemu-kvm-machine-type-for-aarch.patch -# For RHEL-17168 - Introduce virt-rhel9.4.0 arm-virt machine type [aarch64] -Patch17: kvm-hw-arm-virt-Fix-compats.patch -# For RHEL-19738 - Enable properties allowing to disable high memory regions -Patch18: kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch19: kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch20: kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch21: kvm-vfio-container-Switch-to-dma_map-unmap-API.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch22: kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch23: kvm-vfio-common-Move-giommu_list-in-base-container.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch24: kvm-vfio-container-Move-space-field-to-base-container.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch25: kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch26: kvm-vfio-container-Move-per-container-device-list-in-bas.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch27: kvm-vfio-container-Convert-functions-to-base-container.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch28: kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch29: kvm-vfio-container-Move-vrdl_list-to-base-container.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch30: kvm-vfio-container-Move-listener-to-base-container.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch31: kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch32: kvm-vfio-container-Move-iova_ranges-to-base-container.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch33: kvm-vfio-container-Implement-attach-detach_device.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch34: kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch35: kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch36: kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch37: kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch38: kvm-backends-iommufd-Introduce-the-iommufd-object.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch39: kvm-util-char_dev-Add-open_cdev.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch40: kvm-vfio-common-return-early-if-space-isn-t-empty.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch41: kvm-vfio-iommufd-Implement-the-iommufd-backend.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch42: kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch43: kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch44: kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch45: kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch46: kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch47: kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch48: kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch49: kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch50: kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch51: kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch52: kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch53: kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch54: kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch55: kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch56: kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch57: kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch58: kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch59: kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch60: kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch61: kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch62: kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch63: kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch64: kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch65: kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch66: kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch67: kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch68: kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch69: kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch70: kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch71: kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch72: kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch73: kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch74: kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch75: kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch76: kvm-vfio-container-Replace-basename-with-g_path_get_base.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch77: kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch78: kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch79: kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch80: kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch81: kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch82: kvm-backends-iommufd-Remove-mutex.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch83: kvm-Compile-IOMMUFD-object-on-aarch64.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch84: kvm-Compile-IOMMUFD-on-s390x.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch85: kvm-Compile-IOMMUFD-on-x86_64.patch -# For RHEL-18212 - [RHEL9][Secure-execution][s390x] The error message is not clear when boot up a SE guest with wrong encryption -Patch86: kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch87: kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch88: kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch89: kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch90: kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch91: kvm-virtio-blk-add-lock-to-protect-s-rq.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch92: kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch93: kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch94: kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch95: kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch96: kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch97: kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch98: kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch99: kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch100: kvm-tests-remove-aio_context_acquire-tests.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch101: kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch102: kvm-graph-lock-remove-AioContext-locking.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch103: kvm-block-remove-AioContext-locking.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch104: kvm-block-remove-bdrv_co_lock.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch105: kvm-scsi-remove-AioContext-locking.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch106: kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch107: kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch108: kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch109: kvm-scsi-remove-outdated-AioContext-lock-comment.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch110: kvm-job-remove-outdated-AioContext-locking-comments.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch111: kvm-block-remove-outdated-AioContext-locking-comments.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch112: kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch -# For RHEL-21169 - [s390x] VM fails to start with ISM passed through QEMU 8.2 -Patch113: kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch -# For RHEL-21169 - [s390x] VM fails to start with ISM passed through QEMU 8.2 -Patch114: kvm-s390x-pci-refresh-fh-before-disabling-aif.patch -# For RHEL-21169 - [s390x] VM fails to start with ISM passed through QEMU 8.2 -Patch115: kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch -# For RHEL-21570 - Critical performance degradation for input devices in virtio vnc session -Patch116: kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch -# For RHEL-7565 - qemu crashed when migrate guest with blob resources enabled -Patch117: kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch -# For RHEL-21293 - [emulated igb] Failed to set up TRIGGER eventfd signaling for interrupt INTX-0: VFIO_DEVICE_SET_IRQS failure: Invalid argument -Patch118: kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch -# For RHEL-20341 - memory-device size alignment check invalid in QEMU 8.2 -Patch119: kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch -# For RHEL-20341 - memory-device size alignment check invalid in QEMU 8.2 -Patch120: kvm-memory-device-reintroduce-memory-region-size-check.patch -# For RHEL-24593 - qemu crash blk_get_aio_context(BlockBackend *): Assertion `ctx == blk->ctx' when repeatedly hotplug/unplug disk -Patch121: kvm-block-backend-Allow-concurrent-context-changes.patch -# For RHEL-24593 - qemu crash blk_get_aio_context(BlockBackend *): Assertion `ctx == blk->ctx' when repeatedly hotplug/unplug disk -Patch122: kvm-scsi-Await-request-purging.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch123: kvm-string-output-visitor-show-structs-as-omitted.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch124: kvm-string-output-visitor-Fix-pseudo-struct-handling.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch125: kvm-qdev-properties-alias-all-object-class-properties.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch126: kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch127: kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch128: kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch129: kvm-iotests-add-filter_qmp_generated_node_ids.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch130: kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch131: kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch132: kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch133: kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch134: kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch135: kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch136: kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch137: kvm-virtio-blk-always-set-ioeventfd-during-startup.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch138: kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch139: kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch -# For RHEL-24045 - QEMU: default-enable dynamically using multiple memslots for virtio-mem -Patch140: kvm-virtio-mem-default-enable-dynamic-memslots.patch -# For RHEL-3934 - [qemu-kvm] Failed on repeatedly hotplug/unplug disk iothread enabled -Patch141: kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch -# For RHEL-3934 - [qemu-kvm] Failed on repeatedly hotplug/unplug disk iothread enabled -Patch142: kvm-virtio-Re-enable-notifications-after-drain.patch -# For RHEL-3934 - [qemu-kvm] Failed on repeatedly hotplug/unplug disk iothread enabled -Patch143: kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch -# For RHEL-15394 - virtio-blk: qemu hang on "no response on QMP query-status" when write data to disk without enough space -Patch144: kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch -# For RHEL-24988 - Mark virt-rhel9.{0,2}.0 machine types as deprecated -Patch145: kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch -# For RHEL-17068 - Check/fix machine type compatibility for qemu-kvm 8.2.0 [x86_64] -Patch146: kvm-x86-rhel-9.2.0-machine-type-compat-fix.patch -# For RHEL-26049 - When max vcpu is greater than or equal to 246, qemu unable to init event notifier -Patch147: kvm-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch -# For RHEL-24614 - [RHEL9][chardev][s390x] qemu hit core dump while using TLS server from host to guest -Patch148: kvm-chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch -# For RHEL-19629 - CVE-2023-6683 qemu-kvm: QEMU: VNC: NULL pointer dereference in qemu_clipboard_request() [rhel-9] -Patch149: kvm-ui-clipboard-mark-type-as-not-available-when-there-i.patch -# For RHEL-19629 - CVE-2023-6683 qemu-kvm: QEMU: VNC: NULL pointer dereference in qemu_clipboard_request() [rhel-9] -Patch150: kvm-ui-clipboard-add-asserts-for-update-and-request.patch -# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information -Patch151: kvm-hw-i386-pc-Defer-smbios_set_defaults-to-machine_done.patch -# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information -Patch152: kvm-Implement-base-of-SMBIOS-type-9-descriptor.patch -# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information -Patch153: kvm-Implement-SMBIOS-type-9-v2.6.patch -# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information -Patch154: kvm-smbios-cleanup-smbios_get_tables-from-legacy-handlin.patch -# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information -Patch155: kvm-smbios-get-rid-of-smbios_smp_sockets-global.patch -# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information -Patch156: kvm-smbios-get-rid-of-smbios_legacy-global.patch -# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information -Patch157: kvm-smbios-avoid-mangling-user-provided-tables.patch -# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information -Patch158: kvm-smbios-don-t-check-type4-structures-in-legacy-mode.patch -# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information -Patch159: kvm-smbios-add-smbios_add_usr_blob_size-helper.patch -# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information -Patch160: kvm-smbios-rename-expose-structures-bitmaps-used-by-both.patch -# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information -Patch161: kvm-smbios-build-legacy-mode-code-only-for-pc-machine.patch -# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information -Patch162: kvm-smbios-handle-errors-consistently.patch -# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information -Patch163: kvm-smbios-get-rid-of-global-smbios_ep_type.patch -# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information -Patch164: kvm-smbios-clear-smbios_type4_count-before-building-tabl.patch -# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information -Patch165: kvm-smbios-extend-smbios-entry-point-type-with-auto-valu.patch -# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information -Patch166: kvm-smbios-in-case-of-entry-point-is-auto-try-to-build-v.patch -# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information -Patch167: kvm-smbios-error-out-when-building-type-4-table-is-not-p.patch -# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information -Patch168: kvm-pc-q35-set-SMBIOS-entry-point-type-to-auto-by-defaul.patch -# For RHEL-28125 - RHEL9.4 - KVM : Live migration of guest with multiple qcow devices remains incomplete. -Patch169: kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch -# For RHEL-28125 - RHEL9.4 - KVM : Live migration of guest with multiple qcow devices remains incomplete. -Patch170: kvm-nbd-server-Fix-race-in-draining-the-export.patch -# For RHEL-28125 - RHEL9.4 - KVM : Live migration of guest with multiple qcow devices remains incomplete. -Patch171: kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch -# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information -Patch172: kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch -# For RHEL-24614 - [RHEL9][chardev] qemu hit core dump while using TLS server from host to guest -Patch173: kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch -# For RHEL-24614 - [RHEL9][chardev] qemu hit core dump while using TLS server from host to guest -Patch174: kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch -# For RHEL-24614 - [RHEL9][chardev] qemu hit core dump while using TLS server from host to guest -Patch175: kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch -# For RHEL-28947 - Qemu crashing with "failed to set up stack guard page: Cannot allocate memory" -Patch176: kvm-coroutine-cap-per-thread-local-pool-size.patch -# For RHEL-28947 - Qemu crashing with "failed to set up stack guard page: Cannot allocate memory" -Patch177: kvm-coroutine-reserve-5-000-mappings.patch -# For RHEL-32837 - qemu-kvm running Vyatta hits assert when doing KVM_SET_GSI_ROUTING [rhel-9.4.z] -Patch178: kvm-virtio-pci-fix-use-of-a-released-vector.patch -# For RHEL-32990 - qemu crash with kvm_irqchip_commit_routes: Assertion `ret == 0' failed if booting with many virtio disks and vcpus [rhel-9.4.z] -Patch179: kvm-kvm-error-out-of-kvm_irqchip_add_msi_route-in-case-o.patch -# For RHEL-33754 - Qemu hang when quit dst vm after storage migration(nbd+tls) [rhel-9.4.z] -Patch180: kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch -# For RHEL-33754 - Qemu hang when quit dst vm after storage migration(nbd+tls) [rhel-9.4.z] -Patch181: kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch -# For RHEL-33754 - Qemu hang when quit dst vm after storage migration(nbd+tls) [rhel-9.4.z] -Patch182: kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch -# For RHEL-33754 - Qemu hang when quit dst vm after storage migration(nbd+tls) [rhel-9.4.z] -Patch183: kvm-iotests-test-NBD-TLS-iothread.patch -# For RHEL-35610 -Patch184: kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch -# For RHEL-35610 -Patch185: kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch -# For RHEL-35610 -Patch186: kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch -# For RHEL-35610 -Patch187: kvm-block-Parse-filenames-only-when-explicitly-requested.patch -# For RHEL-36181 - [RHEL9.5.0][stable_guest_abi]Failed to migrate VM with (qemu) qemu-kvm: Missing section footer for 0000:00:01.0/virtio-gpu qemu-kvm: load of migration failed: Invalid argument [rhel-9.4.0.z] -Patch188: kvm-virtio-gpu-fix-scanout-migration-post-load.patch -# For RHEL-36181 - [RHEL9.5.0][stable_guest_abi]Failed to migrate VM with (qemu) qemu-kvm: Missing section footer for 0000:00:01.0/virtio-gpu qemu-kvm: load of migration failed: Invalid argument [rhel-9.4.0.z] -Patch189: kvm-virtio-gpu-fix-v2-migration.patch -# For RHEL-36181 - [RHEL9.5.0][stable_guest_abi]Failed to migrate VM with (qemu) qemu-kvm: Missing section footer for 0000:00:01.0/virtio-gpu qemu-kvm: load of migration failed: Invalid argument [rhel-9.4.0.z] -Patch190: kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch -# For RHEL-43261 - qemu-kvm: linux-aio: add support for IO_CMD_FDSYNC command [rhel-9.4.z] -Patch191: kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch -# For RHEL-53565 - [RHEL9.4_to_RHEL9.5]When the VM is with only 9.4.0 (q35) machine type, still hit error of virtio-gpu issue [rhel-9.4.z] -Patch192: kvm-Fix-scanout-version-with-pc-q35-rhel9.4.0.patch -# For RHEL-52616 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.4.z] -Patch193: kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch -# For RHEL-52616 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.4.z] -Patch194: kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch -# For RHEL-52616 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.4.z] -Patch195: kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch -# For RHEL-52616 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.4.z] -Patch196: kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch +Patch0016: 0016-Add-upstream-compatibility-bits.patch +Patch0017: 0017-x86-rhel-9.4.0-machine-type-compat-fix.patch +# For RHEL-34945 - [aarch64, kvm-unit-tests] all tests tagged as FAIL [qemu-kvm: GLib: g_ptr_array_add: assertion 'rarray' failed] +Patch18: kvm-hw-arm-virt-Fix-spurious-call-to-arm_virt_compat_set.patch +# For RHEL-30362 - Check/fix machine type compatibility for QEMU 9.0.0 [x86_64][rhel-9.5.0] +Patch19: kvm-Revert-x86-rhel-9.4.0-machine-type-compat-fix.patch +# For RHEL-33440 - Qemu hang when quit dst vm after storage migration(nbd+tls) +Patch20: kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch +# For RHEL-33440 - Qemu hang when quit dst vm after storage migration(nbd+tls) +Patch21: kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch +# For RHEL-33440 - Qemu hang when quit dst vm after storage migration(nbd+tls) +Patch22: kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch +# For RHEL-33440 - Qemu hang when quit dst vm after storage migration(nbd+tls) +Patch23: kvm-iotests-test-NBD-TLS-iothread.patch +# For RHEL-34621 - [RHEL9.5.0][stable_guest_abi]Failed to migrate VM with (qemu) qemu-kvm: Missing section footer for 0000:00:01.0/virtio-gpu qemu-kvm: load of migration failed: Invalid argument +Patch24: kvm-virtio-gpu-fix-v2-migration.patch +# For RHEL-34621 - [RHEL9.5.0][stable_guest_abi]Failed to migrate VM with (qemu) qemu-kvm: Missing section footer for 0000:00:01.0/virtio-gpu qemu-kvm: load of migration failed: Invalid argument +Patch25: kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch +# For RHEL-42411 - qemu-kvm: linux-aio: add support for IO_CMD_FDSYNC command +Patch26: kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch +# For RHEL-34618 - aio=io_uring: Assertion failure `luringcb->co->ctx == s->aio_context' with block_resize +# For RHEL-38697 - aio=native: Assertion failure `laiocb->co->ctx == laiocb->ctx->aio_context' with block_resize +Patch27: kvm-Revert-monitor-use-aio_co_reschedule_self.patch +# For RHEL-34618 - aio=io_uring: Assertion failure `luringcb->co->ctx == s->aio_context' with block_resize +# For RHEL-38697 - aio=native: Assertion failure `laiocb->co->ctx == laiocb->ctx->aio_context' with block_resize +Patch28: kvm-aio-warn-about-iohandler_ctx-special-casing.patch +# For RHEL-36159 - qemu crash on Assertion `block->n_free_ciphers > 0' failed in guest installation with luks and iothread-vq-mapping +Patch29: kvm-block-crypto-create-ciphers-on-demand.patch +# For RHEL-36159 - qemu crash on Assertion `block->n_free_ciphers > 0' failed in guest installation with luks and iothread-vq-mapping +Patch30: kvm-crypto-block-drop-qcrypto_block_open-n_threads-argum.patch +# For RHEL-35611 - CVE-2024-4467 qemu-kvm: QEMU: 'qemu-img info' leads to host file read/write [rhel-9.5] +Patch31: kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch +# For RHEL-35611 - CVE-2024-4467 qemu-kvm: QEMU: 'qemu-img info' leads to host file read/write [rhel-9.5] +Patch32: kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch +# For RHEL-35611 - CVE-2024-4467 qemu-kvm: QEMU: 'qemu-img info' leads to host file read/write [rhel-9.5] +Patch33: kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch +# For RHEL-35611 - CVE-2024-4467 qemu-kvm: QEMU: 'qemu-img info' leads to host file read/write [rhel-9.5] +Patch34: kvm-block-Parse-filenames-only-when-explicitly-requested.patch +# For RHEL-40708 - [RHEL9.5.0][virtio_fs][s390x] after hot-unplug the vhost-user-fs-ccw device, the device is failed to hot-plug again +Patch35: kvm-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch +# For RHEL-39936 - ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (128 < 256) on FUJITSU +Patch36: kvm-hw-arm-virt-Avoid-unexpected-warning-from-Linux-gues.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch37: kvm-introduce-pc_rhel_9_5_compat.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch38: kvm-target-i386-add-guest-phys-bits-cpu-property.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch39: kvm-kvm-add-support-for-guest-physical-bits.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch40: kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch41: kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch42: kvm-target-i386-Add-new-CPU-model-SierraForest.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch43: kvm-target-i386-Export-RFDS-bit-to-guests.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch44: kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch45: kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch46: kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch47: kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch48: kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch49: kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch50: kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch51: kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch52: kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch53: kvm-linux-headers-update-to-current-kvm-next.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch54: kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch55: kvm-KVM-track-whether-guest-state-is-encrypted.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch56: kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch57: kvm-target-i386-introduce-x86-confidential-guest.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch58: kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch59: kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch60: kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch61: kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch62: kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch63: kvm-kvm-Introduce-support-for-memory_attributes.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch64: kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch65: kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch66: kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch67: kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch68: kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch69: kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch70: kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch71: kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch72: kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch73: kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch74: kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch75: kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch76: kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch77: kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch78: kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch79: kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch80: kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch81: kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch82: kvm-hw-i386-split-x86.c-in-multiple-parts.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch83: kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch84: kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch85: kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch86: kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch87: kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch88: kvm-linux-headers-Update-to-current-kvm-next.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch89: kvm-update-linux-headers-import-linux-kvm_para.h-header.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch90: kvm-machine-allow-early-use-of-machine_require_guest_mem.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch91: kvm-i386-sev-Replace-error_report-with-error_setg.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch92: kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch93: kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch94: kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch95: kvm-i386-sev-Introduce-sev-snp-guest-object.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch96: kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch97: kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch98: kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch99: kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch100: kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch101: kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch102: kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch103: kvm-i386-sev-Add-the-SNP-launch-start-context.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch104: kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch105: kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch106: kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch107: kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch108: kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch109: kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch110: kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch111: kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch112: kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch113: kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch114: kvm-i386-sev-Extract-build_kernel_loader_hashes.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch115: kvm-i386-sev-Reorder-struct-declarations.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch116: kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch117: kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch118: kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch119: kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch120: kvm-i386-sev-fix-unreachable-code-coverity-issue.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch121: kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch122: kvm-i386-sev-Return-when-sev_common-is-null.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch123: kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch124: kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch125: kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch126: kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch127: kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch +# For RHEL-50336 - Fail to boot up the guest including vtpm and virtio-rng (max-bytes=0) devices +Patch128: kvm-virtio-rng-block-max-bytes-0.patch +# For RHEL-50000 - scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert +Patch129: kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch +# For RHEL-50000 - scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert +Patch130: kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch +# For RHEL-50000 - scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert +Patch131: kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch +# For RHEL-50000 - scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert +Patch132: kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch +# For RHEL-52617 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5] +Patch133: kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch +# For RHEL-52617 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5] +Patch134: kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch +# For RHEL-52617 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5] +Patch135: kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch +# For RHEL-52617 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5] +Patch136: kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch +# For RHEL-52250 - fsfreeze hooks break on the systems first restorecon +Patch137: kvm-qemu-guest-agent-Update-the-logfile-path-of-qga-fsfr.patch +# For RHEL-52617 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5] +Patch138: kvm-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch %if %{have_clang} BuildRequires: clang @@ -703,18 +463,9 @@ BuildRequires: python3-sphinx_rtd_theme BuildRequires: libseccomp-devel >= %{libseccomp_version} # For network block driver BuildRequires: libcurl-devel -%if %{have_block_gluster} -BuildRequires: glusterfs-api-devel -%endif -%if %{have_block_iscsi} -BuildRequires: libiscsi-devel -%endif %if %{have_block_rbd} BuildRequires: librbd-devel %endif -%if %{have_block_ssh} -BuildRequires: libssh-devel -%endif # We need both because the 'stap' binary is probed for by configure BuildRequires: systemtap BuildRequires: systemtap-sdt-devel @@ -754,16 +505,6 @@ BuildRequires: perl-Test-Harness BuildRequires: libslirp-devel BuildRequires: pulseaudio-libs-devel BuildRequires: spice-protocol -%if %{have_spice} -BuildRequires: spice-server-devel -BuildRequires: libcacard-devel -# For smartcard NSS support -BuildRequires: nss-devel -%endif -%if %{have_virgl} -# virgl 3d support -BuildRequires: virglrenderer-devel -%endif BuildRequires: capstone-devel # Requires for qemu-kvm package @@ -783,6 +524,7 @@ hardware for a full system such as a PC and its associated peripherals. %package core Summary: %{name} core components +%{obsoletes_some_modules} Requires: %{name}-common = %{epoch}:%{version}-%{release} Requires: qemu-img = %{epoch}:%{version}-%{release} %ifarch %{ix86} x86_64 @@ -829,6 +571,10 @@ Requires: seabios-bin >= 1.10.2-1 Requires: seavgabios-bin >= 1.12.0-3 Requires: ipxe-roms-qemu >= %{ipxe_version} %endif +# Removal -gl modules as they do not provide any functionality - see bz#2149022 +Obsoletes: %{name}-device-display-virtio-gpu-gl <= %{epoch}:%{version} +Obsoletes: %{name}-device-display-virtio-gpu-pci-gl <= %{epoch}:%{version} +Obsoletes: %{name}-device-display-virtio-vga-gl <= %{epoch}:%{version} %description common %{name} is an open source virtualizer that provides hardware emulation for @@ -910,26 +656,7 @@ This package provides the additional CURL block driver for QEMU. Install this package if you want to access remote disks over http, https, ftp and other transports provided by the CURL library. -%if %{have_block_gluster} -%package block-gluster -Summary: QEMU Gluster block driver -Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} -%description block-gluster -This package provides the additional Gluster block driver for QEMU. - -Install this package if you want to access remote Gluster storage. -%endif - -%if %{have_block_iscsi} -%package block-iscsi -Summary: QEMU iSCSI block driver -Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} -%description block-iscsi -This package provides the additional iSCSI block driver for QEMU. - -Install this package if you want to access iSCSI volumes. -%endif %if %{have_block_rbd} %package block-rbd @@ -942,17 +669,6 @@ Install this package if you want to access remote Ceph volumes using the rbd protocol. %endif -%if %{have_block_ssh} -%package block-ssh -Summary: QEMU SSH block driver -Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} - -%description block-ssh -This package provides the additional SSH block driver for QEMU. - -Install this package if you want to access remote disks using -the Secure Shell (SSH) protocol. -%endif %package audio-pa Summary: QEMU PulseAudio audio driver @@ -960,16 +676,6 @@ Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} %description audio-pa This package provides the additional PulseAudio audio driver for QEMU. -%if %{have_spice} -%package ui-spice -Summary: QEMU spice support -Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} -%if %{have_opengl} -Requires: %{name}-ui-opengl%{?_isa} = %{epoch}:%{version}-%{release} -%endif -%description ui-spice -This package provides spice support. -%endif %if %{have_opengl} %package ui-opengl @@ -989,13 +695,6 @@ Requires: %{name}-ui-opengl%{?_isa} = %{epoch}:%{version}-%{release} This package provides the additional egl-headless UI for QEMU. %endif -%if %{have_virgl} -%package device-display-vhost-user-gpu -Summary: QEMU QXL display device -Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} -%description device-display-vhost-user-gpu -This package provides the vhost-user-gpu display device for QEMU. -%endif %package device-display-virtio-gpu Summary: QEMU virtio-gpu display device @@ -1003,35 +702,20 @@ Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} %description device-display-virtio-gpu This package provides the virtio-gpu display device for QEMU. -%ifarch x86_64 aarch64 %{power64} -%package device-display-virtio-gpu-gl -Summary: QEMU virtio-gpu-gl display device -Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} -%description device-display-virtio-gpu-gl -This package provides the virtio-gpu-gl display device for QEMU. -%endif - %ifarch s390x %package device-display-virtio-gpu-ccw Summary: QEMU virtio-gpu-ccw display device Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +Requires: %{name}-device-display-virtio-gpu = %{epoch}:%{version}-%{release} %description device-display-virtio-gpu-ccw This package provides the virtio-gpu-ccw display device for QEMU. %else %package device-display-virtio-gpu-pci Summary: QEMU virtio-gpu-pci display device Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +Requires: %{name}-device-display-virtio-gpu = %{epoch}:%{version}-%{release} %description device-display-virtio-gpu-pci This package provides the virtio-gpu-pci display device for QEMU. -%ifarch x86_64 aarch64 %{power64} -%package device-display-virtio-gpu-pci-gl -Summary: QEMU virtio-gpu-pci-gl display device -Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} -%description device-display-virtio-gpu-pci-gl -This package provides the virtio-gpu-pci-gl display device for QEMU. -%else -Obsoletes: %{name}-device-display-virtio-gpu-pci-gl <= %{epoch}:%{version} -%endif %endif %ifarch x86_64 %{power64} @@ -1040,11 +724,6 @@ Summary: QEMU virtio-vga display device Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} %description device-display-virtio-vga This package provides the virtio-vga display device for QEMU. -%package device-display-virtio-vga-gl -Summary: QEMU virtio-vga-gl display device -Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} -%description device-display-virtio-vga-gl -This package provides the virtio-vga-gl display device for QEMU. %endif %package device-usb-host @@ -1285,21 +964,12 @@ run_configure \ --enable-fdt=system \ %endif --enable-gio \ -%if %{have_block_gluster} - --enable-glusterfs \ -%endif --enable-gnutls \ --enable-guest-agent \ --enable-iconv \ --enable-kvm \ -%if %{have_block_iscsi} - --enable-libiscsi \ -%endif %if %{have_pmem} --enable-libpmem \ -%endif -%if %{have_block_ssh} - --enable-libssh \ %endif --enable-libusb \ --enable-libudev \ @@ -1327,10 +997,6 @@ run_configure \ --enable-selinux \ --enable-slirp \ --enable-snappy \ -%if %{have_spice} - --enable-smartcard \ - --enable-spice \ -%endif --enable-spice-protocol \ --enable-system \ --enable-tcg \ @@ -1345,9 +1011,6 @@ run_configure \ --enable-vhost-user \ --enable-vhost-user-blk-server \ --enable-vhost-vdpa \ -%if %{have_virgl} - --enable-virglrenderer \ -%endif --enable-vnc \ --enable-png \ --enable-vnc-sasl \ @@ -1393,7 +1056,7 @@ run_configure \ --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ trace/trace-events-all qemu-kvm-simpletrace.stp -cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm +cp -a qemu-system-%{kvm_target} qemu-kvm %ifarch s390x # Copy the built new images into place for "make check": @@ -1479,7 +1142,7 @@ popd mkdir -p %{buildroot}%{_datadir}/systemtap/tapset -install -m 0755 %{qemu_kvm_build}/%{kvm_target}-softmmu/qemu-system-%{kvm_target} %{buildroot}%{_libexecdir}/qemu-kvm +install -m 0755 %{qemu_kvm_build}/qemu-system-%{kvm_target} %{buildroot}%{_libexecdir}/qemu-kvm install -m 0644 %{qemu_kvm_build}/qemu-kvm.stp %{buildroot}%{_datadir}/systemtap/tapset/ install -m 0644 %{qemu_kvm_build}/qemu-kvm-log.stp %{buildroot}%{_datadir}/systemtap/tapset/ install -m 0644 %{qemu_kvm_build}/qemu-kvm-simpletrace.stp %{buildroot}%{_datadir}/systemtap/tapset/ @@ -1538,6 +1201,7 @@ rm -rf %{buildroot}%{_datadir}/%{name}/qboot.rom rm -rf %{buildroot}%{_datadir}/%{name}/s390-ccw.img rm -rf %{buildroot}%{_datadir}/%{name}/s390-netboot.img rm -rf %{buildroot}%{_datadir}/%{name}/hppa-firmware.img +rm -rf %{buildroot}%{_datadir}/%{name}/hppa-firmware64.img rm -rf %{buildroot}%{_datadir}/%{name}/canyonlands.dtb rm -rf %{buildroot}%{_datadir}/%{name}/u-boot-sam460-20100605.bin @@ -1612,6 +1276,16 @@ install -D -m 0644 %{_sourcedir}/bridge.conf %{buildroot}%{_sysconfdir}/%{name}/ install -m 0644 contrib/systemd/qemu-pr-helper.service %{buildroot}%{_unitdir} install -m 0644 contrib/systemd/qemu-pr-helper.socket %{buildroot}%{_unitdir} +# We do not support gl display devices so we can remove their modules as they +# do not have expected functionality included. +# +# https://gitlab.com/qemu-project/qemu/-/issues/1352 was filed to stop building these +# modules in case all dependencies are not satisfied. + +rm -rf %{buildroot}%{_libdir}/%{name}/hw-display-virtio-gpu-gl.so +rm -rf %{buildroot}%{_libdir}/%{name}/hw-display-virtio-gpu-pci-gl.so +rm -rf %{buildroot}%{_libdir}/%{name}/hw-display-virtio-vga-gl.so + # We need to make the block device modules and other qemu SO files executable # otherwise RPM won't pick up their dependencies. chmod +x %{buildroot}%{_libdir}/%{name}/*.so @@ -1623,12 +1297,6 @@ rm -rf %{buildroot}%{qemudocdir}/specs # endif !tools_only %endif -%if %{have_virgl} -# Move vhost-user JSON files to the standard "qemu" directory -mkdir -p %{buildroot}%{_datadir}/qemu -mv %{buildroot}%{_datadir}/%{name}/vhost-user %{buildroot}%{_datadir}/qemu/ -%endif - %check %if !%{tools_only} @@ -1764,42 +1432,29 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/systemtap/tapset/qemu-kvm-simpletrace.stp %{_datadir}/%{name}/systemtap/script.d/qemu_kvm.stp %{_datadir}/%{name}/systemtap/conf.d/qemu_kvm.conf +%{_datadir}/systemtap/tapset/qemu-img*.stp +%{_datadir}/systemtap/tapset/qemu-io*.stp +%{_datadir}/systemtap/tapset/qemu-nbd*.stp +%{_datadir}/systemtap/tapset/qemu-storage-daemon*.stp %ifarch x86_64 %{_libdir}/%{name}/accel-tcg-%{kvm_target}.so %endif -%if %{have_virgl} -%files device-display-vhost-user-gpu - %{_datadir}/qemu/vhost-user/50-qemu-gpu.json - %{_libexecdir}/vhost-user-gpu -%endif - %files device-display-virtio-gpu %{_libdir}/%{name}/hw-display-virtio-gpu.so -%ifarch x86_64 aarch64 %{power64} -%files device-display-virtio-gpu-gl - %{_libdir}/%{name}/hw-display-virtio-gpu-gl.so -%endif - %ifarch s390x %files device-display-virtio-gpu-ccw %{_libdir}/%{name}/hw-s390x-virtio-gpu-ccw.so %else %files device-display-virtio-gpu-pci %{_libdir}/%{name}/hw-display-virtio-gpu-pci.so -%ifarch x86_64 aarch64 %{power64} -%files device-display-virtio-gpu-pci-gl - %{_libdir}/%{name}/hw-display-virtio-gpu-pci-gl.so -%endif %endif %ifarch x86_64 %{power64} %files device-display-virtio-vga %{_libdir}/%{name}/hw-display-virtio-vga.so -%files device-display-virtio-vga-gl - %{_libdir}/%{name}/hw-display-virtio-vga-gl.so %endif %files tests @@ -1811,47 +1466,18 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %files block-curl %{_libdir}/%{name}/block-curl.so - -%if %{have_block_gluster} -%files block-gluster - %{_libdir}/%{name}/block-gluster.so -%endif - -%if %{have_block_iscsi} -%files block-iscsi - %{_libdir}/qemu-kvm/block-iscsi.so -%endif - %if %{have_block_rbd} %files block-rbd - %{_libdir}/%{name}/block-rbd.so -%endif - -%if %{have_block_ssh} -%files block-ssh - %{_libdir}/qemu-kvm/block-ssh.so +%{_libdir}/%{name}/block-rbd.so %endif - %files audio-pa %{_libdir}/%{name}/audio-pa.so -%if 0%{have_spice} -%files ui-spice - %{_libdir}/qemu-kvm/audio-spice.so - %{_libdir}/qemu-kvm/chardev-spice.so - %{_libdir}/qemu-kvm/ui-spice-core.so - %{_libdir}/qemu-kvm/ui-spice-app.so - %{_libdir}/qemu-kvm/hw-usb-smartcard.so -%ifarch x86_64 aarch64 - %{_libdir}/qemu-kvm/hw-display-qxl.so -%endif -%endif - %if %{have_opengl} %files ui-opengl - %{_libdir}/%{name}/ui-opengl.so +%{_libdir}/%{name}/ui-opengl.so %files ui-egl-headless - %{_libdir}/%{name}/ui-egl-headless.so +%{_libdir}/%{name}/ui-egl-headless.so %endif %files device-usb-host @@ -1872,57 +1498,185 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog -* Thu Aug 15 2024 Miroslav Rezanina - 8.2.0-11.el9_4.6 -- kvm-Fix-scanout-version-with-pc-q35-rhel9.4.0.patch [RHEL-53565] -- kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch [RHEL-52616] -- kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch [RHEL-52616] -- kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch [RHEL-52616] -- kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch [RHEL-52616] -- Resolves: RHEL-53565 - ([RHEL9.4_to_RHEL9.5]When the VM is with only 9.4.0 (q35) machine type, still hit error of virtio-gpu issue [rhel-9.4.z]) -- Resolves: RHEL-52616 - (CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.4.z]) - -* Mon Jul 15 2024 Miroslav Rezanina - 8.2.0-11.el9_4.5 -- kvm-virtio-gpu-fix-scanout-migration-post-load.patch [RHEL-36181] -- kvm-virtio-gpu-fix-v2-migration.patch [RHEL-36181] -- kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch [RHEL-36181] -- kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch [RHEL-43261] -- Resolves: RHEL-36181 - ([RHEL9.5.0][stable_guest_abi]Failed to migrate VM with (qemu) qemu-kvm: Missing section footer for 0000:00:01.0/virtio-gpu qemu-kvm: load of migration failed: Invalid argument [rhel-9.4.0.z]) -- Resolves: RHEL-43261 - (qemu-kvm: linux-aio: add support for IO_CMD_FDSYNC command [rhel-9.4.z]) - -* Thu Jun 20 2024 Dmitry Samoylik - 8.2.0-11.4.inferit.1 -- SPICE enable - -* Wed Jun 19 2024 Miroslav Rezanina - 8.2.0-11.el9_4.4 -- Fixing CVE-2024-4467 -- Resolves: RHEL-35610 - -* Fri Jun 07 2024 Miroslav Rezanina - 8.2.0-11.el9_4.3 -- kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch [RHEL-33754] -- kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch [RHEL-33754] -- kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch [RHEL-33754] -- kvm-iotests-test-NBD-TLS-iothread.patch [RHEL-33754] -- Resolves: RHEL-33754 - (Qemu hang when quit dst vm after storage migration(nbd+tls) [rhel-9.4.z]) - -* Thu Jun 06 2024 Sergey Cherevko - 17:8.2.0-11.2.inferit -- Update to 8.2.0-11.el9_4.2 - -* Mon May 20 2024 Miroslav Rezanina - 8.2.0-11.el9_4.2 -- kvm-kvm-error-out-of-kvm_irqchip_add_msi_route-in-case-o.patch [RHEL-32990] -- Resolves: RHEL-32990 - (qemu crash with kvm_irqchip_commit_routes: Assertion `ret == 0' failed if booting with many virtio disks and vcpus [rhel-9.4.z]) - -* Wed May 1 2024 Arkady L. Shane - 8.2.0-11.inferit -- Added host_cdrom driver to whitelist (INF-741) - -* Thu Apr 18 2024 Miroslav Rezanina - 8.2.0-11.el9_4.1 -- kvm-virtio-pci-fix-use-of-a-released-vector.patch [RHEL-32837] -- Resolves: RHEL-32837 - (qemu-kvm running Vyatta hits assert when doing KVM_SET_GSI_ROUTING [rhel-9.4.z]) +* Mon Sep 02 2024 Miroslav Rezanina - 9.0.0-10 +- kvm-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch [RHEL-52617] +- Resolves: RHEL-52617 + (CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5]) + +* Mon Aug 26 2024 Miroslav Rezanina - 9.0.0-9 +- kvm-qemu-guest-agent-Update-the-logfile-path-of-qga-fsfr.patch [RHEL-52250] +- Resolves: RHEL-52250 + (fsfreeze hooks break on the systems first restorecon) + +* Wed Aug 14 2024 Miroslav Rezanina - 9.0.0-8 +- kvm-introduce-pc_rhel_9_5_compat.patch [RHEL-39544] +- kvm-target-i386-add-guest-phys-bits-cpu-property.patch [RHEL-39544] +- kvm-kvm-add-support-for-guest-physical-bits.patch [RHEL-39544] +- kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch [RHEL-39544] +- kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch [RHEL-39544] +- kvm-target-i386-Add-new-CPU-model-SierraForest.patch [RHEL-39544] +- kvm-target-i386-Export-RFDS-bit-to-guests.patch [RHEL-39544] +- kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch [RHEL-39544] +- kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch [RHEL-39544] +- kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch [RHEL-39544] +- kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch [RHEL-39544] +- kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch [RHEL-39544] +- kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch [RHEL-39544] +- kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch [RHEL-39544] +- kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch [RHEL-39544] +- kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch [RHEL-39544] +- kvm-linux-headers-update-to-current-kvm-next.patch [RHEL-39544] +- kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch [RHEL-39544] +- kvm-KVM-track-whether-guest-state-is-encrypted.patch [RHEL-39544] +- kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch [RHEL-39544] +- kvm-target-i386-introduce-x86-confidential-guest.patch [RHEL-39544] +- kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch [RHEL-39544] +- kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch [RHEL-39544] +- kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch [RHEL-39544] +- kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch [RHEL-39544] +- kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch [RHEL-39544] +- kvm-kvm-Introduce-support-for-memory_attributes.patch [RHEL-39544] +- kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch [RHEL-39544] +- kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch [RHEL-39544] +- kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch [RHEL-39544] +- kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch [RHEL-39544] +- kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch [RHEL-39544] +- kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch [RHEL-39544] +- kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch [RHEL-39544] +- kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch [RHEL-39544] +- kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch [RHEL-39544] +- kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch [RHEL-39544] +- kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch [RHEL-39544] +- kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch [RHEL-39544] +- kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch [RHEL-39544] +- kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch [RHEL-39544] +- kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch [RHEL-39544] +- kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch [RHEL-39544] +- kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch [RHEL-39544] +- kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch [RHEL-39544] +- kvm-hw-i386-split-x86.c-in-multiple-parts.patch [RHEL-39544] +- kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch [RHEL-39544] +- kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch [RHEL-39544] +- kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch [RHEL-39544] +- kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch [RHEL-39544] +- kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch [RHEL-39544] +- kvm-linux-headers-Update-to-current-kvm-next.patch [RHEL-39544] +- kvm-update-linux-headers-import-linux-kvm_para.h-header.patch [RHEL-39544] +- kvm-machine-allow-early-use-of-machine_require_guest_mem.patch [RHEL-39544] +- kvm-i386-sev-Replace-error_report-with-error_setg.patch [RHEL-39544] +- kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch [RHEL-39544] +- kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch [RHEL-39544] +- kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch [RHEL-39544] +- kvm-i386-sev-Introduce-sev-snp-guest-object.patch [RHEL-39544] +- kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch [RHEL-39544] +- kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch [RHEL-39544] +- kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch [RHEL-39544] +- kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch [RHEL-39544] +- kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch [RHEL-39544] +- kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch [RHEL-39544] +- kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch [RHEL-39544] +- kvm-i386-sev-Add-the-SNP-launch-start-context.patch [RHEL-39544] +- kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch [RHEL-39544] +- kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch [RHEL-39544] +- kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch [RHEL-39544] +- kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch [RHEL-39544] +- kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch [RHEL-39544] +- kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch [RHEL-39544] +- kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch [RHEL-39544] +- kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch [RHEL-39544] +- kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch [RHEL-39544] +- kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch [RHEL-39544] +- kvm-i386-sev-Extract-build_kernel_loader_hashes.patch [RHEL-39544] +- kvm-i386-sev-Reorder-struct-declarations.patch [RHEL-39544] +- kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch [RHEL-39544] +- kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch [RHEL-39544] +- kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch [RHEL-39544] +- kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch [RHEL-39544] +- kvm-i386-sev-fix-unreachable-code-coverity-issue.patch [RHEL-39544] +- kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch [RHEL-39544] +- kvm-i386-sev-Return-when-sev_common-is-null.patch [RHEL-39544] +- kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch [RHEL-39544] +- kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch [RHEL-39544] +- kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch [RHEL-39544] +- kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch [RHEL-39544] +- kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch [RHEL-39544] +- kvm-virtio-rng-block-max-bytes-0.patch [RHEL-50336] +- kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch [RHEL-50000] +- kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch [RHEL-50000] +- kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch [RHEL-50000] +- kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch [RHEL-50000] +- kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch [RHEL-52617] +- kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch [RHEL-52617] +- kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch [RHEL-52617] +- kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch [RHEL-52617] +- Resolves: RHEL-39544 + ([QEMU] Add support for AMD SEV-SNP to Qemu) +- Resolves: RHEL-50336 + (Fail to boot up the guest including vtpm and virtio-rng (max-bytes=0) devices) +- Resolves: RHEL-50000 + (scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert) +- Resolves: RHEL-52617 + (CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5]) + +* Mon Jul 15 2024 Miroslav Rezanina - 9.0.0-7 +- kvm-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch [RHEL-40708] +- kvm-hw-arm-virt-Avoid-unexpected-warning-from-Linux-gues.patch [RHEL-39936] +- Resolves: RHEL-40708 + ([RHEL9.5.0][virtio_fs][s390x] after hot-unplug the vhost-user-fs-ccw device, the device is failed to hot-plug again ) +- Resolves: RHEL-39936 + (ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (128 < 256) on FUJITSU) + +* Thu Jul 04 2024 Miroslav Rezanina - 9.0.0-6 +- kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch [RHEL-35611] +- kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch [RHEL-35611] +- kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch [RHEL-35611] +- kvm-block-Parse-filenames-only-when-explicitly-requested.patch [RHEL-35611] +- Resolves: RHEL-35611 + (CVE-2024-4467 qemu-kvm: QEMU: 'qemu-img info' leads to host file read/write [rhel-9.5]) + +* Tue Jun 25 2024 Miroslav Rezanina - 9.0.0-5 +- kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch [RHEL-42411] +- kvm-Revert-monitor-use-aio_co_reschedule_self.patch [RHEL-34618 RHEL-38697] +- kvm-aio-warn-about-iohandler_ctx-special-casing.patch [RHEL-34618 RHEL-38697] +- kvm-block-crypto-create-ciphers-on-demand.patch [RHEL-36159] +- kvm-crypto-block-drop-qcrypto_block_open-n_threads-argum.patch [RHEL-36159] +- Resolves: RHEL-42411 + (qemu-kvm: linux-aio: add support for IO_CMD_FDSYNC command) +- Resolves: RHEL-34618 + (aio=io_uring: Assertion failure `luringcb->co->ctx == s->aio_context' with block_resize) +- Resolves: RHEL-38697 + (aio=native: Assertion failure `laiocb->co->ctx == laiocb->ctx->aio_context' with block_resize) +- Resolves: RHEL-36159 + (qemu crash on Assertion `block->n_free_ciphers > 0' failed in guest installation with luks and iothread-vq-mapping) + +* Mon Jun 17 2024 Miroslav Rezanina - 9.0.0-4 +- kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch [RHEL-33440] +- kvm-iotests-test-NBD-TLS-iothread.patch [RHEL-33440] +- kvm-virtio-gpu-fix-v2-migration.patch [RHEL-34621] +- kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch [RHEL-34621] +- Resolves: RHEL-33440 + (Qemu hang when quit dst vm after storage migration(nbd+tls)) +- Resolves: RHEL-34621 + ([RHEL9.5.0][stable_guest_abi]Failed to migrate VM with (qemu) qemu-kvm: Missing section footer for 0000:00:01.0/virtio-gpu qemu-kvm: load of migration failed: Invalid argument) + +* Tue May 21 2024 Miroslav Rezanina - 9.0.0-3 +- kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch [RHEL-33440] +- kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch [RHEL-33440] +- Resolves: RHEL-33440 + (Qemu hang when quit dst vm after storage migration(nbd+tls)) + +* Tue May 07 2024 Miroslav Rezanina - 9.0.0-2 +- kvm-hw-arm-virt-Fix-spurious-call-to-arm_virt_compat_set.patch [RHEL-34945] +- kvm-Revert-x86-rhel-9.4.0-machine-type-compat-fix.patch [RHEL-30362] +- Resolves: RHEL-34945 + ([aarch64, kvm-unit-tests] all tests tagged as FAIL [qemu-kvm: GLib: g_ptr_array_add: assertion 'rarray' failed] ) +- Resolves: RHEL-30362 + (Check/fix machine type compatibility for QEMU 9.0.0 [x86_64][rhel-9.5.0]) + +* Wed Apr 24 2024 Miroslav Rezanina - 9.0.0-1 +- Rebase to QEMU 9.0.0 [RHEL-28073] +- Resolves: RHEL-28073 + (Rebase qemu-kvm to QEMU 9.0.0 for RHEL 9.5) * Tue Mar 26 2024 Miroslav Rezanina - 8.2.0-11 - kvm-coroutine-cap-per-thread-local-pool-size.patch [RHEL-28947]